net/mlx5: discover max flow priority using DevX
[dpdk.git] / drivers / net / mlx5 / mlx5_flow.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2016 6WIND S.A.
3  * Copyright 2016 Mellanox Technologies, Ltd
4  */
5
6 #include <stdalign.h>
7 #include <stdint.h>
8 #include <string.h>
9 #include <stdbool.h>
10 #include <sys/queue.h>
11
12 #include <rte_common.h>
13 #include <rte_ether.h>
14 #include <ethdev_driver.h>
15 #include <rte_eal_paging.h>
16 #include <rte_flow.h>
17 #include <rte_cycles.h>
18 #include <rte_flow_driver.h>
19 #include <rte_malloc.h>
20 #include <rte_ip.h>
21
22 #include <mlx5_glue.h>
23 #include <mlx5_devx_cmds.h>
24 #include <mlx5_prm.h>
25 #include <mlx5_malloc.h>
26
27 #include "mlx5_defs.h"
28 #include "mlx5.h"
29 #include "mlx5_flow.h"
30 #include "mlx5_flow_os.h"
31 #include "mlx5_rx.h"
32 #include "mlx5_tx.h"
33 #include "mlx5_common_os.h"
34 #include "rte_pmd_mlx5.h"
35
36 struct tunnel_default_miss_ctx {
37         uint16_t *queue;
38         __extension__
39         union {
40                 struct rte_flow_action_rss action_rss;
41                 struct rte_flow_action_queue miss_queue;
42                 struct rte_flow_action_jump miss_jump;
43                 uint8_t raw[0];
44         };
45 };
46
47 static int
48 flow_tunnel_add_default_miss(struct rte_eth_dev *dev,
49                              struct rte_flow *flow,
50                              const struct rte_flow_attr *attr,
51                              const struct rte_flow_action *app_actions,
52                              uint32_t flow_idx,
53                              const struct mlx5_flow_tunnel *tunnel,
54                              struct tunnel_default_miss_ctx *ctx,
55                              struct rte_flow_error *error);
56 static struct mlx5_flow_tunnel *
57 mlx5_find_tunnel_id(struct rte_eth_dev *dev, uint32_t id);
58 static void
59 mlx5_flow_tunnel_free(struct rte_eth_dev *dev, struct mlx5_flow_tunnel *tunnel);
60 static uint32_t
61 tunnel_flow_group_to_flow_table(struct rte_eth_dev *dev,
62                                 const struct mlx5_flow_tunnel *tunnel,
63                                 uint32_t group, uint32_t *table,
64                                 struct rte_flow_error *error);
65
66 static struct mlx5_flow_workspace *mlx5_flow_push_thread_workspace(void);
67 static void mlx5_flow_pop_thread_workspace(void);
68
69
70 /** Device flow drivers. */
71 extern const struct mlx5_flow_driver_ops mlx5_flow_verbs_drv_ops;
72
73 const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops;
74
75 const struct mlx5_flow_driver_ops *flow_drv_ops[] = {
76         [MLX5_FLOW_TYPE_MIN] = &mlx5_flow_null_drv_ops,
77 #if defined(HAVE_IBV_FLOW_DV_SUPPORT) || !defined(HAVE_INFINIBAND_VERBS_H)
78         [MLX5_FLOW_TYPE_DV] = &mlx5_flow_dv_drv_ops,
79 #endif
80         [MLX5_FLOW_TYPE_VERBS] = &mlx5_flow_verbs_drv_ops,
81         [MLX5_FLOW_TYPE_MAX] = &mlx5_flow_null_drv_ops
82 };
83
84 /** Helper macro to build input graph for mlx5_flow_expand_rss(). */
85 #define MLX5_FLOW_EXPAND_RSS_NEXT(...) \
86         (const int []){ \
87                 __VA_ARGS__, 0, \
88         }
89
90 /** Node object of input graph for mlx5_flow_expand_rss(). */
91 struct mlx5_flow_expand_node {
92         const int *const next;
93         /**<
94          * List of next node indexes. Index 0 is interpreted as a terminator.
95          */
96         const enum rte_flow_item_type type;
97         /**< Pattern item type of current node. */
98         uint64_t rss_types;
99         /**<
100          * RSS types bit-field associated with this node
101          * (see RTE_ETH_RSS_* definitions).
102          */
103         uint64_t node_flags;
104         /**<
105          *  Bit-fields that define how the node is used in the expansion.
106          * (see MLX5_EXPANSION_NODE_* definitions).
107          */
108 };
109
110 /* Optional expand field. The expansion alg will not go deeper. */
111 #define MLX5_EXPANSION_NODE_OPTIONAL (UINT64_C(1) << 0)
112
113 /* The node is not added implicitly as expansion to the flow pattern.
114  * If the node type does not match the flow pattern item type, the
115  * expansion alg will go deeper to its next items.
116  * In the current implementation, the list of next nodes indexes can
117  * have up to one node with this flag set and it has to be the last
118  * node index (before the list terminator).
119  */
120 #define MLX5_EXPANSION_NODE_EXPLICIT (UINT64_C(1) << 1)
121
122 /** Object returned by mlx5_flow_expand_rss(). */
123 struct mlx5_flow_expand_rss {
124         uint32_t entries;
125         /**< Number of entries @p patterns and @p priorities. */
126         struct {
127                 struct rte_flow_item *pattern; /**< Expanded pattern array. */
128                 uint32_t priority; /**< Priority offset for each expansion. */
129         } entry[];
130 };
131
132 static void
133 mlx5_dbg__print_pattern(const struct rte_flow_item *item);
134
135 static const struct mlx5_flow_expand_node *
136 mlx5_flow_expand_rss_adjust_node(const struct rte_flow_item *pattern,
137                 unsigned int item_idx,
138                 const struct mlx5_flow_expand_node graph[],
139                 const struct mlx5_flow_expand_node *node);
140
141 static bool
142 mlx5_flow_is_rss_expandable_item(const struct rte_flow_item *item)
143 {
144         switch (item->type) {
145         case RTE_FLOW_ITEM_TYPE_ETH:
146         case RTE_FLOW_ITEM_TYPE_VLAN:
147         case RTE_FLOW_ITEM_TYPE_IPV4:
148         case RTE_FLOW_ITEM_TYPE_IPV6:
149         case RTE_FLOW_ITEM_TYPE_UDP:
150         case RTE_FLOW_ITEM_TYPE_TCP:
151         case RTE_FLOW_ITEM_TYPE_VXLAN:
152         case RTE_FLOW_ITEM_TYPE_NVGRE:
153         case RTE_FLOW_ITEM_TYPE_GRE:
154         case RTE_FLOW_ITEM_TYPE_GENEVE:
155         case RTE_FLOW_ITEM_TYPE_MPLS:
156         case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
157         case RTE_FLOW_ITEM_TYPE_GRE_KEY:
158         case RTE_FLOW_ITEM_TYPE_IPV6_FRAG_EXT:
159         case RTE_FLOW_ITEM_TYPE_GTP:
160                 return true;
161         default:
162                 break;
163         }
164         return false;
165 }
166
167 static enum rte_flow_item_type
168 mlx5_flow_expand_rss_item_complete(const struct rte_flow_item *item)
169 {
170         enum rte_flow_item_type ret = RTE_FLOW_ITEM_TYPE_VOID;
171         uint16_t ether_type = 0;
172         uint16_t ether_type_m;
173         uint8_t ip_next_proto = 0;
174         uint8_t ip_next_proto_m;
175
176         if (item == NULL || item->spec == NULL)
177                 return ret;
178         switch (item->type) {
179         case RTE_FLOW_ITEM_TYPE_ETH:
180                 if (item->mask)
181                         ether_type_m = ((const struct rte_flow_item_eth *)
182                                                 (item->mask))->type;
183                 else
184                         ether_type_m = rte_flow_item_eth_mask.type;
185                 if (ether_type_m != RTE_BE16(0xFFFF))
186                         break;
187                 ether_type = ((const struct rte_flow_item_eth *)
188                                 (item->spec))->type;
189                 if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_IPV4)
190                         ret = RTE_FLOW_ITEM_TYPE_IPV4;
191                 else if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_IPV6)
192                         ret = RTE_FLOW_ITEM_TYPE_IPV6;
193                 else if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_VLAN)
194                         ret = RTE_FLOW_ITEM_TYPE_VLAN;
195                 else
196                         ret = RTE_FLOW_ITEM_TYPE_END;
197                 break;
198         case RTE_FLOW_ITEM_TYPE_VLAN:
199                 if (item->mask)
200                         ether_type_m = ((const struct rte_flow_item_vlan *)
201                                                 (item->mask))->inner_type;
202                 else
203                         ether_type_m = rte_flow_item_vlan_mask.inner_type;
204                 if (ether_type_m != RTE_BE16(0xFFFF))
205                         break;
206                 ether_type = ((const struct rte_flow_item_vlan *)
207                                 (item->spec))->inner_type;
208                 if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_IPV4)
209                         ret = RTE_FLOW_ITEM_TYPE_IPV4;
210                 else if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_IPV6)
211                         ret = RTE_FLOW_ITEM_TYPE_IPV6;
212                 else if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_VLAN)
213                         ret = RTE_FLOW_ITEM_TYPE_VLAN;
214                 else
215                         ret = RTE_FLOW_ITEM_TYPE_END;
216                 break;
217         case RTE_FLOW_ITEM_TYPE_IPV4:
218                 if (item->mask)
219                         ip_next_proto_m = ((const struct rte_flow_item_ipv4 *)
220                                         (item->mask))->hdr.next_proto_id;
221                 else
222                         ip_next_proto_m =
223                                 rte_flow_item_ipv4_mask.hdr.next_proto_id;
224                 if (ip_next_proto_m != 0xFF)
225                         break;
226                 ip_next_proto = ((const struct rte_flow_item_ipv4 *)
227                                 (item->spec))->hdr.next_proto_id;
228                 if (ip_next_proto == IPPROTO_UDP)
229                         ret = RTE_FLOW_ITEM_TYPE_UDP;
230                 else if (ip_next_proto == IPPROTO_TCP)
231                         ret = RTE_FLOW_ITEM_TYPE_TCP;
232                 else if (ip_next_proto == IPPROTO_IP)
233                         ret = RTE_FLOW_ITEM_TYPE_IPV4;
234                 else if (ip_next_proto == IPPROTO_IPV6)
235                         ret = RTE_FLOW_ITEM_TYPE_IPV6;
236                 else
237                         ret = RTE_FLOW_ITEM_TYPE_END;
238                 break;
239         case RTE_FLOW_ITEM_TYPE_IPV6:
240                 if (item->mask)
241                         ip_next_proto_m = ((const struct rte_flow_item_ipv6 *)
242                                                 (item->mask))->hdr.proto;
243                 else
244                         ip_next_proto_m =
245                                 rte_flow_item_ipv6_mask.hdr.proto;
246                 if (ip_next_proto_m != 0xFF)
247                         break;
248                 ip_next_proto = ((const struct rte_flow_item_ipv6 *)
249                                 (item->spec))->hdr.proto;
250                 if (ip_next_proto == IPPROTO_UDP)
251                         ret = RTE_FLOW_ITEM_TYPE_UDP;
252                 else if (ip_next_proto == IPPROTO_TCP)
253                         ret = RTE_FLOW_ITEM_TYPE_TCP;
254                 else if (ip_next_proto == IPPROTO_IP)
255                         ret = RTE_FLOW_ITEM_TYPE_IPV4;
256                 else if (ip_next_proto == IPPROTO_IPV6)
257                         ret = RTE_FLOW_ITEM_TYPE_IPV6;
258                 else
259                         ret = RTE_FLOW_ITEM_TYPE_END;
260                 break;
261         default:
262                 ret = RTE_FLOW_ITEM_TYPE_VOID;
263                 break;
264         }
265         return ret;
266 }
267
268 static const int *
269 mlx5_flow_expand_rss_skip_explicit(const struct mlx5_flow_expand_node graph[],
270                 const int *next_node)
271 {
272         const struct mlx5_flow_expand_node *node = NULL;
273         const int *next = next_node;
274
275         while (next && *next) {
276                 /*
277                  * Skip the nodes with the MLX5_EXPANSION_NODE_EXPLICIT
278                  * flag set, because they were not found in the flow pattern.
279                  */
280                 node = &graph[*next];
281                 if (!(node->node_flags & MLX5_EXPANSION_NODE_EXPLICIT))
282                         break;
283                 next = node->next;
284         }
285         return next;
286 }
287
288 #define MLX5_RSS_EXP_ELT_N 16
289
290 /**
291  * Expand RSS flows into several possible flows according to the RSS hash
292  * fields requested and the driver capabilities.
293  *
294  * @param[out] buf
295  *   Buffer to store the result expansion.
296  * @param[in] size
297  *   Buffer size in bytes. If 0, @p buf can be NULL.
298  * @param[in] pattern
299  *   User flow pattern.
300  * @param[in] types
301  *   RSS types to expand (see RTE_ETH_RSS_* definitions).
302  * @param[in] graph
303  *   Input graph to expand @p pattern according to @p types.
304  * @param[in] graph_root_index
305  *   Index of root node in @p graph, typically 0.
306  *
307  * @return
308  *   A positive value representing the size of @p buf in bytes regardless of
309  *   @p size on success, a negative errno value otherwise and rte_errno is
310  *   set, the following errors are defined:
311  *
312  *   -E2BIG: graph-depth @p graph is too deep.
313  *   -EINVAL: @p size has not enough space for expanded pattern.
314  */
315 static int
316 mlx5_flow_expand_rss(struct mlx5_flow_expand_rss *buf, size_t size,
317                      const struct rte_flow_item *pattern, uint64_t types,
318                      const struct mlx5_flow_expand_node graph[],
319                      int graph_root_index)
320 {
321         const struct rte_flow_item *item;
322         const struct mlx5_flow_expand_node *node = &graph[graph_root_index];
323         const int *next_node;
324         const int *stack[MLX5_RSS_EXP_ELT_N];
325         int stack_pos = 0;
326         struct rte_flow_item flow_items[MLX5_RSS_EXP_ELT_N];
327         unsigned int i, item_idx, last_expand_item_idx = 0;
328         size_t lsize;
329         size_t user_pattern_size = 0;
330         void *addr = NULL;
331         const struct mlx5_flow_expand_node *next = NULL;
332         struct rte_flow_item missed_item;
333         int missed = 0;
334         int elt = 0;
335         const struct rte_flow_item *last_expand_item = NULL;
336
337         memset(&missed_item, 0, sizeof(missed_item));
338         lsize = offsetof(struct mlx5_flow_expand_rss, entry) +
339                 MLX5_RSS_EXP_ELT_N * sizeof(buf->entry[0]);
340         if (lsize > size)
341                 return -EINVAL;
342         buf->entry[0].priority = 0;
343         buf->entry[0].pattern = (void *)&buf->entry[MLX5_RSS_EXP_ELT_N];
344         buf->entries = 0;
345         addr = buf->entry[0].pattern;
346         for (item = pattern, item_idx = 0;
347                         item->type != RTE_FLOW_ITEM_TYPE_END;
348                         item++, item_idx++) {
349                 if (!mlx5_flow_is_rss_expandable_item(item)) {
350                         user_pattern_size += sizeof(*item);
351                         continue;
352                 }
353                 last_expand_item = item;
354                 last_expand_item_idx = item_idx;
355                 i = 0;
356                 while (node->next && node->next[i]) {
357                         next = &graph[node->next[i]];
358                         if (next->type == item->type)
359                                 break;
360                         if (next->node_flags & MLX5_EXPANSION_NODE_EXPLICIT) {
361                                 node = next;
362                                 i = 0;
363                         } else {
364                                 ++i;
365                         }
366                 }
367                 if (next)
368                         node = next;
369                 user_pattern_size += sizeof(*item);
370         }
371         user_pattern_size += sizeof(*item); /* Handle END item. */
372         lsize += user_pattern_size;
373         if (lsize > size)
374                 return -EINVAL;
375         /* Copy the user pattern in the first entry of the buffer. */
376         rte_memcpy(addr, pattern, user_pattern_size);
377         addr = (void *)(((uintptr_t)addr) + user_pattern_size);
378         buf->entries = 1;
379         /* Start expanding. */
380         memset(flow_items, 0, sizeof(flow_items));
381         user_pattern_size -= sizeof(*item);
382         /*
383          * Check if the last valid item has spec set, need complete pattern,
384          * and the pattern can be used for expansion.
385          */
386         missed_item.type = mlx5_flow_expand_rss_item_complete(last_expand_item);
387         if (missed_item.type == RTE_FLOW_ITEM_TYPE_END) {
388                 /* Item type END indicates expansion is not required. */
389                 return lsize;
390         }
391         if (missed_item.type != RTE_FLOW_ITEM_TYPE_VOID) {
392                 next = NULL;
393                 missed = 1;
394                 i = 0;
395                 while (node->next && node->next[i]) {
396                         next = &graph[node->next[i]];
397                         if (next->type == missed_item.type) {
398                                 flow_items[0].type = missed_item.type;
399                                 flow_items[1].type = RTE_FLOW_ITEM_TYPE_END;
400                                 break;
401                         }
402                         if (next->node_flags & MLX5_EXPANSION_NODE_EXPLICIT) {
403                                 node = next;
404                                 i = 0;
405                         } else {
406                                 ++i;
407                         }
408                         next = NULL;
409                 }
410         }
411         if (next && missed) {
412                 elt = 2; /* missed item + item end. */
413                 node = next;
414                 lsize += elt * sizeof(*item) + user_pattern_size;
415                 if (lsize > size)
416                         return -EINVAL;
417                 if (node->rss_types & types) {
418                         buf->entry[buf->entries].priority = 1;
419                         buf->entry[buf->entries].pattern = addr;
420                         buf->entries++;
421                         rte_memcpy(addr, buf->entry[0].pattern,
422                                    user_pattern_size);
423                         addr = (void *)(((uintptr_t)addr) + user_pattern_size);
424                         rte_memcpy(addr, flow_items, elt * sizeof(*item));
425                         addr = (void *)(((uintptr_t)addr) +
426                                         elt * sizeof(*item));
427                 }
428         } else if (last_expand_item != NULL) {
429                 node = mlx5_flow_expand_rss_adjust_node(pattern,
430                                 last_expand_item_idx, graph, node);
431         }
432         memset(flow_items, 0, sizeof(flow_items));
433         next_node = mlx5_flow_expand_rss_skip_explicit(graph,
434                         node->next);
435         stack[stack_pos] = next_node;
436         node = next_node ? &graph[*next_node] : NULL;
437         while (node) {
438                 flow_items[stack_pos].type = node->type;
439                 if (node->rss_types & types) {
440                         size_t n;
441                         /*
442                          * compute the number of items to copy from the
443                          * expansion and copy it.
444                          * When the stack_pos is 0, there are 1 element in it,
445                          * plus the addition END item.
446                          */
447                         elt = stack_pos + 2;
448                         flow_items[stack_pos + 1].type = RTE_FLOW_ITEM_TYPE_END;
449                         lsize += elt * sizeof(*item) + user_pattern_size;
450                         if (lsize > size)
451                                 return -EINVAL;
452                         n = elt * sizeof(*item);
453                         buf->entry[buf->entries].priority =
454                                 stack_pos + 1 + missed;
455                         buf->entry[buf->entries].pattern = addr;
456                         buf->entries++;
457                         rte_memcpy(addr, buf->entry[0].pattern,
458                                    user_pattern_size);
459                         addr = (void *)(((uintptr_t)addr) +
460                                         user_pattern_size);
461                         rte_memcpy(addr, &missed_item,
462                                    missed * sizeof(*item));
463                         addr = (void *)(((uintptr_t)addr) +
464                                 missed * sizeof(*item));
465                         rte_memcpy(addr, flow_items, n);
466                         addr = (void *)(((uintptr_t)addr) + n);
467                 }
468                 /* Go deeper. */
469                 if (!(node->node_flags & MLX5_EXPANSION_NODE_OPTIONAL) &&
470                                 node->next) {
471                         next_node = mlx5_flow_expand_rss_skip_explicit(graph,
472                                         node->next);
473                         if (stack_pos++ == MLX5_RSS_EXP_ELT_N) {
474                                 rte_errno = E2BIG;
475                                 return -rte_errno;
476                         }
477                         stack[stack_pos] = next_node;
478                 } else if (*(next_node + 1)) {
479                         /* Follow up with the next possibility. */
480                         next_node = mlx5_flow_expand_rss_skip_explicit(graph,
481                                         ++next_node);
482                 } else if (!stack_pos) {
483                         /*
484                          * Completing the traverse over the different paths.
485                          * The next_node is advanced to the terminator.
486                          */
487                         ++next_node;
488                 } else {
489                         /* Move to the next path. */
490                         while (stack_pos) {
491                                 next_node = stack[--stack_pos];
492                                 next_node++;
493                                 if (*next_node)
494                                         break;
495                         }
496                         next_node = mlx5_flow_expand_rss_skip_explicit(graph,
497                                         next_node);
498                         stack[stack_pos] = next_node;
499                 }
500                 node = next_node && *next_node ? &graph[*next_node] : NULL;
501         };
502         return lsize;
503 }
504
505 enum mlx5_expansion {
506         MLX5_EXPANSION_ROOT,
507         MLX5_EXPANSION_ROOT_OUTER,
508         MLX5_EXPANSION_OUTER_ETH,
509         MLX5_EXPANSION_OUTER_VLAN,
510         MLX5_EXPANSION_OUTER_IPV4,
511         MLX5_EXPANSION_OUTER_IPV4_UDP,
512         MLX5_EXPANSION_OUTER_IPV4_TCP,
513         MLX5_EXPANSION_OUTER_IPV6,
514         MLX5_EXPANSION_OUTER_IPV6_UDP,
515         MLX5_EXPANSION_OUTER_IPV6_TCP,
516         MLX5_EXPANSION_VXLAN,
517         MLX5_EXPANSION_STD_VXLAN,
518         MLX5_EXPANSION_L3_VXLAN,
519         MLX5_EXPANSION_VXLAN_GPE,
520         MLX5_EXPANSION_GRE,
521         MLX5_EXPANSION_NVGRE,
522         MLX5_EXPANSION_GRE_KEY,
523         MLX5_EXPANSION_MPLS,
524         MLX5_EXPANSION_ETH,
525         MLX5_EXPANSION_VLAN,
526         MLX5_EXPANSION_IPV4,
527         MLX5_EXPANSION_IPV4_UDP,
528         MLX5_EXPANSION_IPV4_TCP,
529         MLX5_EXPANSION_IPV6,
530         MLX5_EXPANSION_IPV6_UDP,
531         MLX5_EXPANSION_IPV6_TCP,
532         MLX5_EXPANSION_IPV6_FRAG_EXT,
533         MLX5_EXPANSION_GTP
534 };
535
536 /** Supported expansion of items. */
537 static const struct mlx5_flow_expand_node mlx5_support_expansion[] = {
538         [MLX5_EXPANSION_ROOT] = {
539                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
540                                                   MLX5_EXPANSION_IPV4,
541                                                   MLX5_EXPANSION_IPV6),
542                 .type = RTE_FLOW_ITEM_TYPE_END,
543         },
544         [MLX5_EXPANSION_ROOT_OUTER] = {
545                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_ETH,
546                                                   MLX5_EXPANSION_OUTER_IPV4,
547                                                   MLX5_EXPANSION_OUTER_IPV6),
548                 .type = RTE_FLOW_ITEM_TYPE_END,
549         },
550         [MLX5_EXPANSION_OUTER_ETH] = {
551                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_VLAN),
552                 .type = RTE_FLOW_ITEM_TYPE_ETH,
553                 .rss_types = 0,
554         },
555         [MLX5_EXPANSION_OUTER_VLAN] = {
556                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_IPV4,
557                                                   MLX5_EXPANSION_OUTER_IPV6),
558                 .type = RTE_FLOW_ITEM_TYPE_VLAN,
559                 .node_flags = MLX5_EXPANSION_NODE_EXPLICIT,
560         },
561         [MLX5_EXPANSION_OUTER_IPV4] = {
562                 .next = MLX5_FLOW_EXPAND_RSS_NEXT
563                         (MLX5_EXPANSION_OUTER_IPV4_UDP,
564                          MLX5_EXPANSION_OUTER_IPV4_TCP,
565                          MLX5_EXPANSION_GRE,
566                          MLX5_EXPANSION_NVGRE,
567                          MLX5_EXPANSION_IPV4,
568                          MLX5_EXPANSION_IPV6),
569                 .type = RTE_FLOW_ITEM_TYPE_IPV4,
570                 .rss_types = RTE_ETH_RSS_IPV4 | RTE_ETH_RSS_FRAG_IPV4 |
571                         RTE_ETH_RSS_NONFRAG_IPV4_OTHER,
572         },
573         [MLX5_EXPANSION_OUTER_IPV4_UDP] = {
574                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN,
575                                                   MLX5_EXPANSION_VXLAN_GPE,
576                                                   MLX5_EXPANSION_MPLS,
577                                                   MLX5_EXPANSION_GTP),
578                 .type = RTE_FLOW_ITEM_TYPE_UDP,
579                 .rss_types = RTE_ETH_RSS_NONFRAG_IPV4_UDP,
580         },
581         [MLX5_EXPANSION_OUTER_IPV4_TCP] = {
582                 .type = RTE_FLOW_ITEM_TYPE_TCP,
583                 .rss_types = RTE_ETH_RSS_NONFRAG_IPV4_TCP,
584         },
585         [MLX5_EXPANSION_OUTER_IPV6] = {
586                 .next = MLX5_FLOW_EXPAND_RSS_NEXT
587                         (MLX5_EXPANSION_OUTER_IPV6_UDP,
588                          MLX5_EXPANSION_OUTER_IPV6_TCP,
589                          MLX5_EXPANSION_IPV4,
590                          MLX5_EXPANSION_IPV6,
591                          MLX5_EXPANSION_GRE,
592                          MLX5_EXPANSION_NVGRE),
593                 .type = RTE_FLOW_ITEM_TYPE_IPV6,
594                 .rss_types = RTE_ETH_RSS_IPV6 | RTE_ETH_RSS_FRAG_IPV6 |
595                         RTE_ETH_RSS_NONFRAG_IPV6_OTHER,
596         },
597         [MLX5_EXPANSION_OUTER_IPV6_UDP] = {
598                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN,
599                                                   MLX5_EXPANSION_VXLAN_GPE,
600                                                   MLX5_EXPANSION_MPLS,
601                                                   MLX5_EXPANSION_GTP),
602                 .type = RTE_FLOW_ITEM_TYPE_UDP,
603                 .rss_types = RTE_ETH_RSS_NONFRAG_IPV6_UDP,
604         },
605         [MLX5_EXPANSION_OUTER_IPV6_TCP] = {
606                 .type = RTE_FLOW_ITEM_TYPE_TCP,
607                 .rss_types = RTE_ETH_RSS_NONFRAG_IPV6_TCP,
608         },
609         [MLX5_EXPANSION_VXLAN] = {
610                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
611                                                   MLX5_EXPANSION_IPV4,
612                                                   MLX5_EXPANSION_IPV6),
613                 .type = RTE_FLOW_ITEM_TYPE_VXLAN,
614         },
615         [MLX5_EXPANSION_STD_VXLAN] = {
616                         .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH),
617                                         .type = RTE_FLOW_ITEM_TYPE_VXLAN,
618         },
619         [MLX5_EXPANSION_L3_VXLAN] = {
620                         .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
621                                         MLX5_EXPANSION_IPV6),
622                                         .type = RTE_FLOW_ITEM_TYPE_VXLAN,
623         },
624         [MLX5_EXPANSION_VXLAN_GPE] = {
625                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
626                                                   MLX5_EXPANSION_IPV4,
627                                                   MLX5_EXPANSION_IPV6),
628                 .type = RTE_FLOW_ITEM_TYPE_VXLAN_GPE,
629         },
630         [MLX5_EXPANSION_GRE] = {
631                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
632                                                   MLX5_EXPANSION_IPV6,
633                                                   MLX5_EXPANSION_GRE_KEY,
634                                                   MLX5_EXPANSION_MPLS),
635                 .type = RTE_FLOW_ITEM_TYPE_GRE,
636         },
637         [MLX5_EXPANSION_GRE_KEY] = {
638                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
639                                                   MLX5_EXPANSION_IPV6,
640                                                   MLX5_EXPANSION_MPLS),
641                 .type = RTE_FLOW_ITEM_TYPE_GRE_KEY,
642                 .node_flags = MLX5_EXPANSION_NODE_OPTIONAL,
643         },
644         [MLX5_EXPANSION_NVGRE] = {
645                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH),
646                 .type = RTE_FLOW_ITEM_TYPE_NVGRE,
647         },
648         [MLX5_EXPANSION_MPLS] = {
649                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
650                                                   MLX5_EXPANSION_IPV6,
651                                                   MLX5_EXPANSION_ETH),
652                 .type = RTE_FLOW_ITEM_TYPE_MPLS,
653                 .node_flags = MLX5_EXPANSION_NODE_OPTIONAL,
654         },
655         [MLX5_EXPANSION_ETH] = {
656                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VLAN),
657                 .type = RTE_FLOW_ITEM_TYPE_ETH,
658         },
659         [MLX5_EXPANSION_VLAN] = {
660                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
661                                                   MLX5_EXPANSION_IPV6),
662                 .type = RTE_FLOW_ITEM_TYPE_VLAN,
663                 .node_flags = MLX5_EXPANSION_NODE_EXPLICIT,
664         },
665         [MLX5_EXPANSION_IPV4] = {
666                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4_UDP,
667                                                   MLX5_EXPANSION_IPV4_TCP),
668                 .type = RTE_FLOW_ITEM_TYPE_IPV4,
669                 .rss_types = RTE_ETH_RSS_IPV4 | RTE_ETH_RSS_FRAG_IPV4 |
670                         RTE_ETH_RSS_NONFRAG_IPV4_OTHER,
671         },
672         [MLX5_EXPANSION_IPV4_UDP] = {
673                 .type = RTE_FLOW_ITEM_TYPE_UDP,
674                 .rss_types = RTE_ETH_RSS_NONFRAG_IPV4_UDP,
675         },
676         [MLX5_EXPANSION_IPV4_TCP] = {
677                 .type = RTE_FLOW_ITEM_TYPE_TCP,
678                 .rss_types = RTE_ETH_RSS_NONFRAG_IPV4_TCP,
679         },
680         [MLX5_EXPANSION_IPV6] = {
681                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV6_UDP,
682                                                   MLX5_EXPANSION_IPV6_TCP,
683                                                   MLX5_EXPANSION_IPV6_FRAG_EXT),
684                 .type = RTE_FLOW_ITEM_TYPE_IPV6,
685                 .rss_types = RTE_ETH_RSS_IPV6 | RTE_ETH_RSS_FRAG_IPV6 |
686                         RTE_ETH_RSS_NONFRAG_IPV6_OTHER,
687         },
688         [MLX5_EXPANSION_IPV6_UDP] = {
689                 .type = RTE_FLOW_ITEM_TYPE_UDP,
690                 .rss_types = RTE_ETH_RSS_NONFRAG_IPV6_UDP,
691         },
692         [MLX5_EXPANSION_IPV6_TCP] = {
693                 .type = RTE_FLOW_ITEM_TYPE_TCP,
694                 .rss_types = RTE_ETH_RSS_NONFRAG_IPV6_TCP,
695         },
696         [MLX5_EXPANSION_IPV6_FRAG_EXT] = {
697                 .type = RTE_FLOW_ITEM_TYPE_IPV6_FRAG_EXT,
698         },
699         [MLX5_EXPANSION_GTP] = {
700                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
701                                                   MLX5_EXPANSION_IPV6),
702                 .type = RTE_FLOW_ITEM_TYPE_GTP,
703         },
704 };
705
706 static struct rte_flow_action_handle *
707 mlx5_action_handle_create(struct rte_eth_dev *dev,
708                           const struct rte_flow_indir_action_conf *conf,
709                           const struct rte_flow_action *action,
710                           struct rte_flow_error *error);
711 static int mlx5_action_handle_destroy
712                                 (struct rte_eth_dev *dev,
713                                  struct rte_flow_action_handle *handle,
714                                  struct rte_flow_error *error);
715 static int mlx5_action_handle_update
716                                 (struct rte_eth_dev *dev,
717                                  struct rte_flow_action_handle *handle,
718                                  const void *update,
719                                  struct rte_flow_error *error);
720 static int mlx5_action_handle_query
721                                 (struct rte_eth_dev *dev,
722                                  const struct rte_flow_action_handle *handle,
723                                  void *data,
724                                  struct rte_flow_error *error);
725 static int
726 mlx5_flow_tunnel_decap_set(struct rte_eth_dev *dev,
727                     struct rte_flow_tunnel *app_tunnel,
728                     struct rte_flow_action **actions,
729                     uint32_t *num_of_actions,
730                     struct rte_flow_error *error);
731 static int
732 mlx5_flow_tunnel_match(struct rte_eth_dev *dev,
733                        struct rte_flow_tunnel *app_tunnel,
734                        struct rte_flow_item **items,
735                        uint32_t *num_of_items,
736                        struct rte_flow_error *error);
737 static int
738 mlx5_flow_tunnel_item_release(struct rte_eth_dev *dev,
739                               struct rte_flow_item *pmd_items,
740                               uint32_t num_items, struct rte_flow_error *err);
741 static int
742 mlx5_flow_tunnel_action_release(struct rte_eth_dev *dev,
743                                 struct rte_flow_action *pmd_actions,
744                                 uint32_t num_actions,
745                                 struct rte_flow_error *err);
746 static int
747 mlx5_flow_tunnel_get_restore_info(struct rte_eth_dev *dev,
748                                   struct rte_mbuf *m,
749                                   struct rte_flow_restore_info *info,
750                                   struct rte_flow_error *err);
751
752 static const struct rte_flow_ops mlx5_flow_ops = {
753         .validate = mlx5_flow_validate,
754         .create = mlx5_flow_create,
755         .destroy = mlx5_flow_destroy,
756         .flush = mlx5_flow_flush,
757         .isolate = mlx5_flow_isolate,
758         .query = mlx5_flow_query,
759         .dev_dump = mlx5_flow_dev_dump,
760         .get_aged_flows = mlx5_flow_get_aged_flows,
761         .action_handle_create = mlx5_action_handle_create,
762         .action_handle_destroy = mlx5_action_handle_destroy,
763         .action_handle_update = mlx5_action_handle_update,
764         .action_handle_query = mlx5_action_handle_query,
765         .tunnel_decap_set = mlx5_flow_tunnel_decap_set,
766         .tunnel_match = mlx5_flow_tunnel_match,
767         .tunnel_action_decap_release = mlx5_flow_tunnel_action_release,
768         .tunnel_item_release = mlx5_flow_tunnel_item_release,
769         .get_restore_info = mlx5_flow_tunnel_get_restore_info,
770 };
771
772 /* Tunnel information. */
773 struct mlx5_flow_tunnel_info {
774         uint64_t tunnel; /**< Tunnel bit (see MLX5_FLOW_*). */
775         uint32_t ptype; /**< Tunnel Ptype (see RTE_PTYPE_*). */
776 };
777
778 static struct mlx5_flow_tunnel_info tunnels_info[] = {
779         {
780                 .tunnel = MLX5_FLOW_LAYER_VXLAN,
781                 .ptype = RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L4_UDP,
782         },
783         {
784                 .tunnel = MLX5_FLOW_LAYER_GENEVE,
785                 .ptype = RTE_PTYPE_TUNNEL_GENEVE | RTE_PTYPE_L4_UDP,
786         },
787         {
788                 .tunnel = MLX5_FLOW_LAYER_VXLAN_GPE,
789                 .ptype = RTE_PTYPE_TUNNEL_VXLAN_GPE | RTE_PTYPE_L4_UDP,
790         },
791         {
792                 .tunnel = MLX5_FLOW_LAYER_GRE,
793                 .ptype = RTE_PTYPE_TUNNEL_GRE,
794         },
795         {
796                 .tunnel = MLX5_FLOW_LAYER_MPLS | MLX5_FLOW_LAYER_OUTER_L4_UDP,
797                 .ptype = RTE_PTYPE_TUNNEL_MPLS_IN_UDP | RTE_PTYPE_L4_UDP,
798         },
799         {
800                 .tunnel = MLX5_FLOW_LAYER_MPLS,
801                 .ptype = RTE_PTYPE_TUNNEL_MPLS_IN_GRE,
802         },
803         {
804                 .tunnel = MLX5_FLOW_LAYER_NVGRE,
805                 .ptype = RTE_PTYPE_TUNNEL_NVGRE,
806         },
807         {
808                 .tunnel = MLX5_FLOW_LAYER_IPIP,
809                 .ptype = RTE_PTYPE_TUNNEL_IP,
810         },
811         {
812                 .tunnel = MLX5_FLOW_LAYER_IPV6_ENCAP,
813                 .ptype = RTE_PTYPE_TUNNEL_IP,
814         },
815         {
816                 .tunnel = MLX5_FLOW_LAYER_GTP,
817                 .ptype = RTE_PTYPE_TUNNEL_GTPU,
818         },
819 };
820
821
822
823 /**
824  * Translate tag ID to register.
825  *
826  * @param[in] dev
827  *   Pointer to the Ethernet device structure.
828  * @param[in] feature
829  *   The feature that request the register.
830  * @param[in] id
831  *   The request register ID.
832  * @param[out] error
833  *   Error description in case of any.
834  *
835  * @return
836  *   The request register on success, a negative errno
837  *   value otherwise and rte_errno is set.
838  */
839 int
840 mlx5_flow_get_reg_id(struct rte_eth_dev *dev,
841                      enum mlx5_feature_name feature,
842                      uint32_t id,
843                      struct rte_flow_error *error)
844 {
845         struct mlx5_priv *priv = dev->data->dev_private;
846         struct mlx5_dev_config *config = &priv->config;
847         enum modify_reg start_reg;
848         bool skip_mtr_reg = false;
849
850         switch (feature) {
851         case MLX5_HAIRPIN_RX:
852                 return REG_B;
853         case MLX5_HAIRPIN_TX:
854                 return REG_A;
855         case MLX5_METADATA_RX:
856                 switch (config->dv_xmeta_en) {
857                 case MLX5_XMETA_MODE_LEGACY:
858                         return REG_B;
859                 case MLX5_XMETA_MODE_META16:
860                         return REG_C_0;
861                 case MLX5_XMETA_MODE_META32:
862                         return REG_C_1;
863                 }
864                 break;
865         case MLX5_METADATA_TX:
866                 return REG_A;
867         case MLX5_METADATA_FDB:
868                 switch (config->dv_xmeta_en) {
869                 case MLX5_XMETA_MODE_LEGACY:
870                         return REG_NON;
871                 case MLX5_XMETA_MODE_META16:
872                         return REG_C_0;
873                 case MLX5_XMETA_MODE_META32:
874                         return REG_C_1;
875                 }
876                 break;
877         case MLX5_FLOW_MARK:
878                 switch (config->dv_xmeta_en) {
879                 case MLX5_XMETA_MODE_LEGACY:
880                         return REG_NON;
881                 case MLX5_XMETA_MODE_META16:
882                         return REG_C_1;
883                 case MLX5_XMETA_MODE_META32:
884                         return REG_C_0;
885                 }
886                 break;
887         case MLX5_MTR_ID:
888                 /*
889                  * If meter color and meter id share one register, flow match
890                  * should use the meter color register for match.
891                  */
892                 if (priv->mtr_reg_share)
893                         return priv->mtr_color_reg;
894                 else
895                         return priv->mtr_color_reg != REG_C_2 ? REG_C_2 :
896                                REG_C_3;
897         case MLX5_MTR_COLOR:
898         case MLX5_ASO_FLOW_HIT:
899         case MLX5_ASO_CONNTRACK:
900                 /* All features use the same REG_C. */
901                 MLX5_ASSERT(priv->mtr_color_reg != REG_NON);
902                 return priv->mtr_color_reg;
903         case MLX5_COPY_MARK:
904                 /*
905                  * Metadata COPY_MARK register using is in meter suffix sub
906                  * flow while with meter. It's safe to share the same register.
907                  */
908                 return priv->mtr_color_reg != REG_C_2 ? REG_C_2 : REG_C_3;
909         case MLX5_APP_TAG:
910                 /*
911                  * If meter is enable, it will engage the register for color
912                  * match and flow match. If meter color match is not using the
913                  * REG_C_2, need to skip the REG_C_x be used by meter color
914                  * match.
915                  * If meter is disable, free to use all available registers.
916                  */
917                 start_reg = priv->mtr_color_reg != REG_C_2 ? REG_C_2 :
918                             (priv->mtr_reg_share ? REG_C_3 : REG_C_4);
919                 skip_mtr_reg = !!(priv->mtr_en && start_reg == REG_C_2);
920                 if (id > (uint32_t)(REG_C_7 - start_reg))
921                         return rte_flow_error_set(error, EINVAL,
922                                                   RTE_FLOW_ERROR_TYPE_ITEM,
923                                                   NULL, "invalid tag id");
924                 if (priv->sh->flow_mreg_c[id + start_reg - REG_C_0] == REG_NON)
925                         return rte_flow_error_set(error, ENOTSUP,
926                                                   RTE_FLOW_ERROR_TYPE_ITEM,
927                                                   NULL, "unsupported tag id");
928                 /*
929                  * This case means meter is using the REG_C_x great than 2.
930                  * Take care not to conflict with meter color REG_C_x.
931                  * If the available index REG_C_y >= REG_C_x, skip the
932                  * color register.
933                  */
934                 if (skip_mtr_reg && priv->sh->flow_mreg_c
935                     [id + start_reg - REG_C_0] >= priv->mtr_color_reg) {
936                         if (id >= (uint32_t)(REG_C_7 - start_reg))
937                                 return rte_flow_error_set(error, EINVAL,
938                                                        RTE_FLOW_ERROR_TYPE_ITEM,
939                                                         NULL, "invalid tag id");
940                         if (priv->sh->flow_mreg_c
941                             [id + 1 + start_reg - REG_C_0] != REG_NON)
942                                 return priv->sh->flow_mreg_c
943                                                [id + 1 + start_reg - REG_C_0];
944                         return rte_flow_error_set(error, ENOTSUP,
945                                                   RTE_FLOW_ERROR_TYPE_ITEM,
946                                                   NULL, "unsupported tag id");
947                 }
948                 return priv->sh->flow_mreg_c[id + start_reg - REG_C_0];
949         }
950         MLX5_ASSERT(false);
951         return rte_flow_error_set(error, EINVAL,
952                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
953                                   NULL, "invalid feature name");
954 }
955
956 /**
957  * Check extensive flow metadata register support.
958  *
959  * @param dev
960  *   Pointer to rte_eth_dev structure.
961  *
962  * @return
963  *   True if device supports extensive flow metadata register, otherwise false.
964  */
965 bool
966 mlx5_flow_ext_mreg_supported(struct rte_eth_dev *dev)
967 {
968         struct mlx5_priv *priv = dev->data->dev_private;
969
970         /*
971          * Having available reg_c can be regarded inclusively as supporting
972          * extensive flow metadata register, which could mean,
973          * - metadata register copy action by modify header.
974          * - 16 modify header actions is supported.
975          * - reg_c's are preserved across different domain (FDB and NIC) on
976          *   packet loopback by flow lookup miss.
977          */
978         return priv->sh->flow_mreg_c[2] != REG_NON;
979 }
980
981 /**
982  * Get the lowest priority.
983  *
984  * @param[in] dev
985  *   Pointer to the Ethernet device structure.
986  * @param[in] attributes
987  *   Pointer to device flow rule attributes.
988  *
989  * @return
990  *   The value of lowest priority of flow.
991  */
992 uint32_t
993 mlx5_get_lowest_priority(struct rte_eth_dev *dev,
994                           const struct rte_flow_attr *attr)
995 {
996         struct mlx5_priv *priv = dev->data->dev_private;
997
998         if (!attr->group && !attr->transfer)
999                 return priv->sh->flow_max_priority - 2;
1000         return MLX5_NON_ROOT_FLOW_MAX_PRIO - 1;
1001 }
1002
1003 /**
1004  * Calculate matcher priority of the flow.
1005  *
1006  * @param[in] dev
1007  *   Pointer to the Ethernet device structure.
1008  * @param[in] attr
1009  *   Pointer to device flow rule attributes.
1010  * @param[in] subpriority
1011  *   The priority based on the items.
1012  * @param[in] external
1013  *   Flow is user flow.
1014  * @return
1015  *   The matcher priority of the flow.
1016  */
1017 uint16_t
1018 mlx5_get_matcher_priority(struct rte_eth_dev *dev,
1019                           const struct rte_flow_attr *attr,
1020                           uint32_t subpriority, bool external)
1021 {
1022         uint16_t priority = (uint16_t)attr->priority;
1023         struct mlx5_priv *priv = dev->data->dev_private;
1024
1025         if (!attr->group && !attr->transfer) {
1026                 if (attr->priority == MLX5_FLOW_LOWEST_PRIO_INDICATOR)
1027                         priority = priv->sh->flow_max_priority - 1;
1028                 return mlx5_os_flow_adjust_priority(dev, priority, subpriority);
1029         } else if (!external && attr->transfer && attr->group == 0 &&
1030                    attr->priority == MLX5_FLOW_LOWEST_PRIO_INDICATOR) {
1031                 return (priv->sh->flow_max_priority - 1) * 3;
1032         }
1033         if (attr->priority == MLX5_FLOW_LOWEST_PRIO_INDICATOR)
1034                 priority = MLX5_NON_ROOT_FLOW_MAX_PRIO;
1035         return priority * 3 + subpriority;
1036 }
1037
1038 /**
1039  * Verify the @p item specifications (spec, last, mask) are compatible with the
1040  * NIC capabilities.
1041  *
1042  * @param[in] item
1043  *   Item specification.
1044  * @param[in] mask
1045  *   @p item->mask or flow default bit-masks.
1046  * @param[in] nic_mask
1047  *   Bit-masks covering supported fields by the NIC to compare with user mask.
1048  * @param[in] size
1049  *   Bit-masks size in bytes.
1050  * @param[in] range_accepted
1051  *   True if range of values is accepted for specific fields, false otherwise.
1052  * @param[out] error
1053  *   Pointer to error structure.
1054  *
1055  * @return
1056  *   0 on success, a negative errno value otherwise and rte_errno is set.
1057  */
1058 int
1059 mlx5_flow_item_acceptable(const struct rte_flow_item *item,
1060                           const uint8_t *mask,
1061                           const uint8_t *nic_mask,
1062                           unsigned int size,
1063                           bool range_accepted,
1064                           struct rte_flow_error *error)
1065 {
1066         unsigned int i;
1067
1068         MLX5_ASSERT(nic_mask);
1069         for (i = 0; i < size; ++i)
1070                 if ((nic_mask[i] | mask[i]) != nic_mask[i])
1071                         return rte_flow_error_set(error, ENOTSUP,
1072                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1073                                                   item,
1074                                                   "mask enables non supported"
1075                                                   " bits");
1076         if (!item->spec && (item->mask || item->last))
1077                 return rte_flow_error_set(error, EINVAL,
1078                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1079                                           "mask/last without a spec is not"
1080                                           " supported");
1081         if (item->spec && item->last && !range_accepted) {
1082                 uint8_t spec[size];
1083                 uint8_t last[size];
1084                 unsigned int i;
1085                 int ret;
1086
1087                 for (i = 0; i < size; ++i) {
1088                         spec[i] = ((const uint8_t *)item->spec)[i] & mask[i];
1089                         last[i] = ((const uint8_t *)item->last)[i] & mask[i];
1090                 }
1091                 ret = memcmp(spec, last, size);
1092                 if (ret != 0)
1093                         return rte_flow_error_set(error, EINVAL,
1094                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1095                                                   item,
1096                                                   "range is not valid");
1097         }
1098         return 0;
1099 }
1100
1101 /**
1102  * Adjust the hash fields according to the @p flow information.
1103  *
1104  * @param[in] dev_flow.
1105  *   Pointer to the mlx5_flow.
1106  * @param[in] tunnel
1107  *   1 when the hash field is for a tunnel item.
1108  * @param[in] layer_types
1109  *   RTE_ETH_RSS_* types.
1110  * @param[in] hash_fields
1111  *   Item hash fields.
1112  *
1113  * @return
1114  *   The hash fields that should be used.
1115  */
1116 uint64_t
1117 mlx5_flow_hashfields_adjust(struct mlx5_flow_rss_desc *rss_desc,
1118                             int tunnel __rte_unused, uint64_t layer_types,
1119                             uint64_t hash_fields)
1120 {
1121 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
1122         int rss_request_inner = rss_desc->level >= 2;
1123
1124         /* Check RSS hash level for tunnel. */
1125         if (tunnel && rss_request_inner)
1126                 hash_fields |= IBV_RX_HASH_INNER;
1127         else if (tunnel || rss_request_inner)
1128                 return 0;
1129 #endif
1130         /* Check if requested layer matches RSS hash fields. */
1131         if (!(rss_desc->types & layer_types))
1132                 return 0;
1133         return hash_fields;
1134 }
1135
1136 /**
1137  * Lookup and set the ptype in the data Rx part.  A single Ptype can be used,
1138  * if several tunnel rules are used on this queue, the tunnel ptype will be
1139  * cleared.
1140  *
1141  * @param rxq_ctrl
1142  *   Rx queue to update.
1143  */
1144 static void
1145 flow_rxq_tunnel_ptype_update(struct mlx5_rxq_ctrl *rxq_ctrl)
1146 {
1147         unsigned int i;
1148         uint32_t tunnel_ptype = 0;
1149
1150         /* Look up for the ptype to use. */
1151         for (i = 0; i != MLX5_FLOW_TUNNEL; ++i) {
1152                 if (!rxq_ctrl->flow_tunnels_n[i])
1153                         continue;
1154                 if (!tunnel_ptype) {
1155                         tunnel_ptype = tunnels_info[i].ptype;
1156                 } else {
1157                         tunnel_ptype = 0;
1158                         break;
1159                 }
1160         }
1161         rxq_ctrl->rxq.tunnel = tunnel_ptype;
1162 }
1163
1164 /**
1165  * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) according to the devive
1166  * flow.
1167  *
1168  * @param[in] dev
1169  *   Pointer to the Ethernet device structure.
1170  * @param[in] dev_handle
1171  *   Pointer to device flow handle structure.
1172  */
1173 void
1174 flow_drv_rxq_flags_set(struct rte_eth_dev *dev,
1175                        struct mlx5_flow_handle *dev_handle)
1176 {
1177         struct mlx5_priv *priv = dev->data->dev_private;
1178         const int mark = dev_handle->mark;
1179         const int tunnel = !!(dev_handle->layers & MLX5_FLOW_LAYER_TUNNEL);
1180         struct mlx5_ind_table_obj *ind_tbl = NULL;
1181         unsigned int i;
1182
1183         if (dev_handle->fate_action == MLX5_FLOW_FATE_QUEUE) {
1184                 struct mlx5_hrxq *hrxq;
1185
1186                 hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ],
1187                               dev_handle->rix_hrxq);
1188                 if (hrxq)
1189                         ind_tbl = hrxq->ind_table;
1190         } else if (dev_handle->fate_action == MLX5_FLOW_FATE_SHARED_RSS) {
1191                 struct mlx5_shared_action_rss *shared_rss;
1192
1193                 shared_rss = mlx5_ipool_get
1194                         (priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS],
1195                          dev_handle->rix_srss);
1196                 if (shared_rss)
1197                         ind_tbl = shared_rss->ind_tbl;
1198         }
1199         if (!ind_tbl)
1200                 return;
1201         for (i = 0; i != ind_tbl->queues_n; ++i) {
1202                 int idx = ind_tbl->queues[i];
1203                 struct mlx5_rxq_ctrl *rxq_ctrl =
1204                         container_of((*priv->rxqs)[idx],
1205                                      struct mlx5_rxq_ctrl, rxq);
1206
1207                 /*
1208                  * To support metadata register copy on Tx loopback,
1209                  * this must be always enabled (metadata may arive
1210                  * from other port - not from local flows only.
1211                  */
1212                 if (priv->config.dv_flow_en &&
1213                     priv->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY &&
1214                     mlx5_flow_ext_mreg_supported(dev)) {
1215                         rxq_ctrl->rxq.mark = 1;
1216                         rxq_ctrl->flow_mark_n = 1;
1217                 } else if (mark) {
1218                         rxq_ctrl->rxq.mark = 1;
1219                         rxq_ctrl->flow_mark_n++;
1220                 }
1221                 if (tunnel) {
1222                         unsigned int j;
1223
1224                         /* Increase the counter matching the flow. */
1225                         for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) {
1226                                 if ((tunnels_info[j].tunnel &
1227                                      dev_handle->layers) ==
1228                                     tunnels_info[j].tunnel) {
1229                                         rxq_ctrl->flow_tunnels_n[j]++;
1230                                         break;
1231                                 }
1232                         }
1233                         flow_rxq_tunnel_ptype_update(rxq_ctrl);
1234                 }
1235         }
1236 }
1237
1238 /**
1239  * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) for a flow
1240  *
1241  * @param[in] dev
1242  *   Pointer to the Ethernet device structure.
1243  * @param[in] flow
1244  *   Pointer to flow structure.
1245  */
1246 static void
1247 flow_rxq_flags_set(struct rte_eth_dev *dev, struct rte_flow *flow)
1248 {
1249         struct mlx5_priv *priv = dev->data->dev_private;
1250         uint32_t handle_idx;
1251         struct mlx5_flow_handle *dev_handle;
1252
1253         SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles,
1254                        handle_idx, dev_handle, next)
1255                 flow_drv_rxq_flags_set(dev, dev_handle);
1256 }
1257
1258 /**
1259  * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the
1260  * device flow if no other flow uses it with the same kind of request.
1261  *
1262  * @param dev
1263  *   Pointer to Ethernet device.
1264  * @param[in] dev_handle
1265  *   Pointer to the device flow handle structure.
1266  */
1267 static void
1268 flow_drv_rxq_flags_trim(struct rte_eth_dev *dev,
1269                         struct mlx5_flow_handle *dev_handle)
1270 {
1271         struct mlx5_priv *priv = dev->data->dev_private;
1272         const int mark = dev_handle->mark;
1273         const int tunnel = !!(dev_handle->layers & MLX5_FLOW_LAYER_TUNNEL);
1274         struct mlx5_ind_table_obj *ind_tbl = NULL;
1275         unsigned int i;
1276
1277         if (dev_handle->fate_action == MLX5_FLOW_FATE_QUEUE) {
1278                 struct mlx5_hrxq *hrxq;
1279
1280                 hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ],
1281                               dev_handle->rix_hrxq);
1282                 if (hrxq)
1283                         ind_tbl = hrxq->ind_table;
1284         } else if (dev_handle->fate_action == MLX5_FLOW_FATE_SHARED_RSS) {
1285                 struct mlx5_shared_action_rss *shared_rss;
1286
1287                 shared_rss = mlx5_ipool_get
1288                         (priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS],
1289                          dev_handle->rix_srss);
1290                 if (shared_rss)
1291                         ind_tbl = shared_rss->ind_tbl;
1292         }
1293         if (!ind_tbl)
1294                 return;
1295         MLX5_ASSERT(dev->data->dev_started);
1296         for (i = 0; i != ind_tbl->queues_n; ++i) {
1297                 int idx = ind_tbl->queues[i];
1298                 struct mlx5_rxq_ctrl *rxq_ctrl =
1299                         container_of((*priv->rxqs)[idx],
1300                                      struct mlx5_rxq_ctrl, rxq);
1301
1302                 if (priv->config.dv_flow_en &&
1303                     priv->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY &&
1304                     mlx5_flow_ext_mreg_supported(dev)) {
1305                         rxq_ctrl->rxq.mark = 1;
1306                         rxq_ctrl->flow_mark_n = 1;
1307                 } else if (mark) {
1308                         rxq_ctrl->flow_mark_n--;
1309                         rxq_ctrl->rxq.mark = !!rxq_ctrl->flow_mark_n;
1310                 }
1311                 if (tunnel) {
1312                         unsigned int j;
1313
1314                         /* Decrease the counter matching the flow. */
1315                         for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) {
1316                                 if ((tunnels_info[j].tunnel &
1317                                      dev_handle->layers) ==
1318                                     tunnels_info[j].tunnel) {
1319                                         rxq_ctrl->flow_tunnels_n[j]--;
1320                                         break;
1321                                 }
1322                         }
1323                         flow_rxq_tunnel_ptype_update(rxq_ctrl);
1324                 }
1325         }
1326 }
1327
1328 /**
1329  * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the
1330  * @p flow if no other flow uses it with the same kind of request.
1331  *
1332  * @param dev
1333  *   Pointer to Ethernet device.
1334  * @param[in] flow
1335  *   Pointer to the flow.
1336  */
1337 static void
1338 flow_rxq_flags_trim(struct rte_eth_dev *dev, struct rte_flow *flow)
1339 {
1340         struct mlx5_priv *priv = dev->data->dev_private;
1341         uint32_t handle_idx;
1342         struct mlx5_flow_handle *dev_handle;
1343
1344         SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles,
1345                        handle_idx, dev_handle, next)
1346                 flow_drv_rxq_flags_trim(dev, dev_handle);
1347 }
1348
1349 /**
1350  * Clear the Mark/Flag and Tunnel ptype information in all Rx queues.
1351  *
1352  * @param dev
1353  *   Pointer to Ethernet device.
1354  */
1355 static void
1356 flow_rxq_flags_clear(struct rte_eth_dev *dev)
1357 {
1358         struct mlx5_priv *priv = dev->data->dev_private;
1359         unsigned int i;
1360
1361         for (i = 0; i != priv->rxqs_n; ++i) {
1362                 struct mlx5_rxq_ctrl *rxq_ctrl;
1363                 unsigned int j;
1364
1365                 if (!(*priv->rxqs)[i])
1366                         continue;
1367                 rxq_ctrl = container_of((*priv->rxqs)[i],
1368                                         struct mlx5_rxq_ctrl, rxq);
1369                 rxq_ctrl->flow_mark_n = 0;
1370                 rxq_ctrl->rxq.mark = 0;
1371                 for (j = 0; j != MLX5_FLOW_TUNNEL; ++j)
1372                         rxq_ctrl->flow_tunnels_n[j] = 0;
1373                 rxq_ctrl->rxq.tunnel = 0;
1374         }
1375 }
1376
1377 /**
1378  * Set the Rx queue dynamic metadata (mask and offset) for a flow
1379  *
1380  * @param[in] dev
1381  *   Pointer to the Ethernet device structure.
1382  */
1383 void
1384 mlx5_flow_rxq_dynf_metadata_set(struct rte_eth_dev *dev)
1385 {
1386         struct mlx5_priv *priv = dev->data->dev_private;
1387         struct mlx5_rxq_data *data;
1388         unsigned int i;
1389
1390         for (i = 0; i != priv->rxqs_n; ++i) {
1391                 if (!(*priv->rxqs)[i])
1392                         continue;
1393                 data = (*priv->rxqs)[i];
1394                 if (!rte_flow_dynf_metadata_avail()) {
1395                         data->dynf_meta = 0;
1396                         data->flow_meta_mask = 0;
1397                         data->flow_meta_offset = -1;
1398                         data->flow_meta_port_mask = 0;
1399                 } else {
1400                         data->dynf_meta = 1;
1401                         data->flow_meta_mask = rte_flow_dynf_metadata_mask;
1402                         data->flow_meta_offset = rte_flow_dynf_metadata_offs;
1403                         data->flow_meta_port_mask = priv->sh->dv_meta_mask;
1404                 }
1405         }
1406 }
1407
1408 /*
1409  * return a pointer to the desired action in the list of actions.
1410  *
1411  * @param[in] actions
1412  *   The list of actions to search the action in.
1413  * @param[in] action
1414  *   The action to find.
1415  *
1416  * @return
1417  *   Pointer to the action in the list, if found. NULL otherwise.
1418  */
1419 const struct rte_flow_action *
1420 mlx5_flow_find_action(const struct rte_flow_action *actions,
1421                       enum rte_flow_action_type action)
1422 {
1423         if (actions == NULL)
1424                 return NULL;
1425         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++)
1426                 if (actions->type == action)
1427                         return actions;
1428         return NULL;
1429 }
1430
1431 /*
1432  * Validate the flag action.
1433  *
1434  * @param[in] action_flags
1435  *   Bit-fields that holds the actions detected until now.
1436  * @param[in] attr
1437  *   Attributes of flow that includes this action.
1438  * @param[out] error
1439  *   Pointer to error structure.
1440  *
1441  * @return
1442  *   0 on success, a negative errno value otherwise and rte_errno is set.
1443  */
1444 int
1445 mlx5_flow_validate_action_flag(uint64_t action_flags,
1446                                const struct rte_flow_attr *attr,
1447                                struct rte_flow_error *error)
1448 {
1449         if (action_flags & MLX5_FLOW_ACTION_MARK)
1450                 return rte_flow_error_set(error, EINVAL,
1451                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1452                                           "can't mark and flag in same flow");
1453         if (action_flags & MLX5_FLOW_ACTION_FLAG)
1454                 return rte_flow_error_set(error, EINVAL,
1455                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1456                                           "can't have 2 flag"
1457                                           " actions in same flow");
1458         if (attr->egress)
1459                 return rte_flow_error_set(error, ENOTSUP,
1460                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1461                                           "flag action not supported for "
1462                                           "egress");
1463         return 0;
1464 }
1465
1466 /*
1467  * Validate the mark action.
1468  *
1469  * @param[in] action
1470  *   Pointer to the queue action.
1471  * @param[in] action_flags
1472  *   Bit-fields that holds the actions detected until now.
1473  * @param[in] attr
1474  *   Attributes of flow that includes this action.
1475  * @param[out] error
1476  *   Pointer to error structure.
1477  *
1478  * @return
1479  *   0 on success, a negative errno value otherwise and rte_errno is set.
1480  */
1481 int
1482 mlx5_flow_validate_action_mark(const struct rte_flow_action *action,
1483                                uint64_t action_flags,
1484                                const struct rte_flow_attr *attr,
1485                                struct rte_flow_error *error)
1486 {
1487         const struct rte_flow_action_mark *mark = action->conf;
1488
1489         if (!mark)
1490                 return rte_flow_error_set(error, EINVAL,
1491                                           RTE_FLOW_ERROR_TYPE_ACTION,
1492                                           action,
1493                                           "configuration cannot be null");
1494         if (mark->id >= MLX5_FLOW_MARK_MAX)
1495                 return rte_flow_error_set(error, EINVAL,
1496                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1497                                           &mark->id,
1498                                           "mark id must in 0 <= id < "
1499                                           RTE_STR(MLX5_FLOW_MARK_MAX));
1500         if (action_flags & MLX5_FLOW_ACTION_FLAG)
1501                 return rte_flow_error_set(error, EINVAL,
1502                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1503                                           "can't flag and mark in same flow");
1504         if (action_flags & MLX5_FLOW_ACTION_MARK)
1505                 return rte_flow_error_set(error, EINVAL,
1506                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1507                                           "can't have 2 mark actions in same"
1508                                           " flow");
1509         if (attr->egress)
1510                 return rte_flow_error_set(error, ENOTSUP,
1511                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1512                                           "mark action not supported for "
1513                                           "egress");
1514         return 0;
1515 }
1516
1517 /*
1518  * Validate the drop action.
1519  *
1520  * @param[in] action_flags
1521  *   Bit-fields that holds the actions detected until now.
1522  * @param[in] attr
1523  *   Attributes of flow that includes this action.
1524  * @param[out] error
1525  *   Pointer to error structure.
1526  *
1527  * @return
1528  *   0 on success, a negative errno value otherwise and rte_errno is set.
1529  */
1530 int
1531 mlx5_flow_validate_action_drop(uint64_t action_flags __rte_unused,
1532                                const struct rte_flow_attr *attr,
1533                                struct rte_flow_error *error)
1534 {
1535         if (attr->egress)
1536                 return rte_flow_error_set(error, ENOTSUP,
1537                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1538                                           "drop action not supported for "
1539                                           "egress");
1540         return 0;
1541 }
1542
1543 /*
1544  * Validate the queue action.
1545  *
1546  * @param[in] action
1547  *   Pointer to the queue action.
1548  * @param[in] action_flags
1549  *   Bit-fields that holds the actions detected until now.
1550  * @param[in] dev
1551  *   Pointer to the Ethernet device structure.
1552  * @param[in] attr
1553  *   Attributes of flow that includes this action.
1554  * @param[out] error
1555  *   Pointer to error structure.
1556  *
1557  * @return
1558  *   0 on success, a negative errno value otherwise and rte_errno is set.
1559  */
1560 int
1561 mlx5_flow_validate_action_queue(const struct rte_flow_action *action,
1562                                 uint64_t action_flags,
1563                                 struct rte_eth_dev *dev,
1564                                 const struct rte_flow_attr *attr,
1565                                 struct rte_flow_error *error)
1566 {
1567         struct mlx5_priv *priv = dev->data->dev_private;
1568         const struct rte_flow_action_queue *queue = action->conf;
1569
1570         if (action_flags & MLX5_FLOW_FATE_ACTIONS)
1571                 return rte_flow_error_set(error, EINVAL,
1572                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1573                                           "can't have 2 fate actions in"
1574                                           " same flow");
1575         if (!priv->rxqs_n)
1576                 return rte_flow_error_set(error, EINVAL,
1577                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1578                                           NULL, "No Rx queues configured");
1579         if (queue->index >= priv->rxqs_n)
1580                 return rte_flow_error_set(error, EINVAL,
1581                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1582                                           &queue->index,
1583                                           "queue index out of range");
1584         if (!(*priv->rxqs)[queue->index])
1585                 return rte_flow_error_set(error, EINVAL,
1586                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1587                                           &queue->index,
1588                                           "queue is not configured");
1589         if (attr->egress)
1590                 return rte_flow_error_set(error, ENOTSUP,
1591                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1592                                           "queue action not supported for "
1593                                           "egress");
1594         return 0;
1595 }
1596
1597 /*
1598  * Validate the rss action.
1599  *
1600  * @param[in] dev
1601  *   Pointer to the Ethernet device structure.
1602  * @param[in] action
1603  *   Pointer to the queue action.
1604  * @param[out] error
1605  *   Pointer to error structure.
1606  *
1607  * @return
1608  *   0 on success, a negative errno value otherwise and rte_errno is set.
1609  */
1610 int
1611 mlx5_validate_action_rss(struct rte_eth_dev *dev,
1612                          const struct rte_flow_action *action,
1613                          struct rte_flow_error *error)
1614 {
1615         struct mlx5_priv *priv = dev->data->dev_private;
1616         const struct rte_flow_action_rss *rss = action->conf;
1617         enum mlx5_rxq_type rxq_type = MLX5_RXQ_TYPE_UNDEFINED;
1618         unsigned int i;
1619
1620         if (rss->func != RTE_ETH_HASH_FUNCTION_DEFAULT &&
1621             rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ)
1622                 return rte_flow_error_set(error, ENOTSUP,
1623                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1624                                           &rss->func,
1625                                           "RSS hash function not supported");
1626 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
1627         if (rss->level > 2)
1628 #else
1629         if (rss->level > 1)
1630 #endif
1631                 return rte_flow_error_set(error, ENOTSUP,
1632                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1633                                           &rss->level,
1634                                           "tunnel RSS is not supported");
1635         /* allow RSS key_len 0 in case of NULL (default) RSS key. */
1636         if (rss->key_len == 0 && rss->key != NULL)
1637                 return rte_flow_error_set(error, ENOTSUP,
1638                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1639                                           &rss->key_len,
1640                                           "RSS hash key length 0");
1641         if (rss->key_len > 0 && rss->key_len < MLX5_RSS_HASH_KEY_LEN)
1642                 return rte_flow_error_set(error, ENOTSUP,
1643                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1644                                           &rss->key_len,
1645                                           "RSS hash key too small");
1646         if (rss->key_len > MLX5_RSS_HASH_KEY_LEN)
1647                 return rte_flow_error_set(error, ENOTSUP,
1648                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1649                                           &rss->key_len,
1650                                           "RSS hash key too large");
1651         if (rss->queue_num > priv->config.ind_table_max_size)
1652                 return rte_flow_error_set(error, ENOTSUP,
1653                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1654                                           &rss->queue_num,
1655                                           "number of queues too large");
1656         if (rss->types & MLX5_RSS_HF_MASK)
1657                 return rte_flow_error_set(error, ENOTSUP,
1658                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1659                                           &rss->types,
1660                                           "some RSS protocols are not"
1661                                           " supported");
1662         if ((rss->types & (RTE_ETH_RSS_L3_SRC_ONLY | RTE_ETH_RSS_L3_DST_ONLY)) &&
1663             !(rss->types & RTE_ETH_RSS_IP))
1664                 return rte_flow_error_set(error, EINVAL,
1665                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
1666                                           "L3 partial RSS requested but L3 RSS"
1667                                           " type not specified");
1668         if ((rss->types & (RTE_ETH_RSS_L4_SRC_ONLY | RTE_ETH_RSS_L4_DST_ONLY)) &&
1669             !(rss->types & (RTE_ETH_RSS_UDP | RTE_ETH_RSS_TCP)))
1670                 return rte_flow_error_set(error, EINVAL,
1671                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
1672                                           "L4 partial RSS requested but L4 RSS"
1673                                           " type not specified");
1674         if (!priv->rxqs_n)
1675                 return rte_flow_error_set(error, EINVAL,
1676                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1677                                           NULL, "No Rx queues configured");
1678         if (!rss->queue_num)
1679                 return rte_flow_error_set(error, EINVAL,
1680                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1681                                           NULL, "No queues configured");
1682         for (i = 0; i != rss->queue_num; ++i) {
1683                 struct mlx5_rxq_ctrl *rxq_ctrl;
1684
1685                 if (rss->queue[i] >= priv->rxqs_n)
1686                         return rte_flow_error_set
1687                                 (error, EINVAL,
1688                                  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1689                                  &rss->queue[i], "queue index out of range");
1690                 if (!(*priv->rxqs)[rss->queue[i]])
1691                         return rte_flow_error_set
1692                                 (error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1693                                  &rss->queue[i], "queue is not configured");
1694                 rxq_ctrl = container_of((*priv->rxqs)[rss->queue[i]],
1695                                         struct mlx5_rxq_ctrl, rxq);
1696                 if (i == 0)
1697                         rxq_type = rxq_ctrl->type;
1698                 if (rxq_type != rxq_ctrl->type)
1699                         return rte_flow_error_set
1700                                 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1701                                  &rss->queue[i],
1702                                  "combining hairpin and regular RSS queues is not supported");
1703         }
1704         return 0;
1705 }
1706
1707 /*
1708  * Validate the rss action.
1709  *
1710  * @param[in] action
1711  *   Pointer to the queue action.
1712  * @param[in] action_flags
1713  *   Bit-fields that holds the actions detected until now.
1714  * @param[in] dev
1715  *   Pointer to the Ethernet device structure.
1716  * @param[in] attr
1717  *   Attributes of flow that includes this action.
1718  * @param[in] item_flags
1719  *   Items that were detected.
1720  * @param[out] error
1721  *   Pointer to error structure.
1722  *
1723  * @return
1724  *   0 on success, a negative errno value otherwise and rte_errno is set.
1725  */
1726 int
1727 mlx5_flow_validate_action_rss(const struct rte_flow_action *action,
1728                               uint64_t action_flags,
1729                               struct rte_eth_dev *dev,
1730                               const struct rte_flow_attr *attr,
1731                               uint64_t item_flags,
1732                               struct rte_flow_error *error)
1733 {
1734         const struct rte_flow_action_rss *rss = action->conf;
1735         int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1736         int ret;
1737
1738         if (action_flags & MLX5_FLOW_FATE_ACTIONS)
1739                 return rte_flow_error_set(error, EINVAL,
1740                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1741                                           "can't have 2 fate actions"
1742                                           " in same flow");
1743         ret = mlx5_validate_action_rss(dev, action, error);
1744         if (ret)
1745                 return ret;
1746         if (attr->egress)
1747                 return rte_flow_error_set(error, ENOTSUP,
1748                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1749                                           "rss action not supported for "
1750                                           "egress");
1751         if (rss->level > 1 && !tunnel)
1752                 return rte_flow_error_set(error, EINVAL,
1753                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
1754                                           "inner RSS is not supported for "
1755                                           "non-tunnel flows");
1756         if ((item_flags & MLX5_FLOW_LAYER_ECPRI) &&
1757             !(item_flags & MLX5_FLOW_LAYER_INNER_L4_UDP)) {
1758                 return rte_flow_error_set(error, EINVAL,
1759                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
1760                                           "RSS on eCPRI is not supported now");
1761         }
1762         if ((item_flags & MLX5_FLOW_LAYER_MPLS) &&
1763             !(item_flags &
1764               (MLX5_FLOW_LAYER_INNER_L2 | MLX5_FLOW_LAYER_INNER_L3)) &&
1765             rss->level > 1)
1766                 return rte_flow_error_set(error, EINVAL,
1767                                           RTE_FLOW_ERROR_TYPE_ITEM, NULL,
1768                                           "MPLS inner RSS needs to specify inner L2/L3 items after MPLS in pattern");
1769         return 0;
1770 }
1771
1772 /*
1773  * Validate the default miss action.
1774  *
1775  * @param[in] action_flags
1776  *   Bit-fields that holds the actions detected until now.
1777  * @param[out] error
1778  *   Pointer to error structure.
1779  *
1780  * @return
1781  *   0 on success, a negative errno value otherwise and rte_errno is set.
1782  */
1783 int
1784 mlx5_flow_validate_action_default_miss(uint64_t action_flags,
1785                                 const struct rte_flow_attr *attr,
1786                                 struct rte_flow_error *error)
1787 {
1788         if (action_flags & MLX5_FLOW_FATE_ACTIONS)
1789                 return rte_flow_error_set(error, EINVAL,
1790                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1791                                           "can't have 2 fate actions in"
1792                                           " same flow");
1793         if (attr->egress)
1794                 return rte_flow_error_set(error, ENOTSUP,
1795                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1796                                           "default miss action not supported "
1797                                           "for egress");
1798         if (attr->group)
1799                 return rte_flow_error_set(error, ENOTSUP,
1800                                           RTE_FLOW_ERROR_TYPE_ATTR_GROUP, NULL,
1801                                           "only group 0 is supported");
1802         if (attr->transfer)
1803                 return rte_flow_error_set(error, ENOTSUP,
1804                                           RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
1805                                           NULL, "transfer is not supported");
1806         return 0;
1807 }
1808
1809 /*
1810  * Validate the count action.
1811  *
1812  * @param[in] dev
1813  *   Pointer to the Ethernet device structure.
1814  * @param[in] attr
1815  *   Attributes of flow that includes this action.
1816  * @param[out] error
1817  *   Pointer to error structure.
1818  *
1819  * @return
1820  *   0 on success, a negative errno value otherwise and rte_errno is set.
1821  */
1822 int
1823 mlx5_flow_validate_action_count(struct rte_eth_dev *dev __rte_unused,
1824                                 const struct rte_flow_attr *attr,
1825                                 struct rte_flow_error *error)
1826 {
1827         if (attr->egress)
1828                 return rte_flow_error_set(error, ENOTSUP,
1829                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1830                                           "count action not supported for "
1831                                           "egress");
1832         return 0;
1833 }
1834
1835 /*
1836  * Validate the ASO CT action.
1837  *
1838  * @param[in] dev
1839  *   Pointer to the Ethernet device structure.
1840  * @param[in] conntrack
1841  *   Pointer to the CT action profile.
1842  * @param[out] error
1843  *   Pointer to error structure.
1844  *
1845  * @return
1846  *   0 on success, a negative errno value otherwise and rte_errno is set.
1847  */
1848 int
1849 mlx5_validate_action_ct(struct rte_eth_dev *dev,
1850                         const struct rte_flow_action_conntrack *conntrack,
1851                         struct rte_flow_error *error)
1852 {
1853         RTE_SET_USED(dev);
1854
1855         if (conntrack->state > RTE_FLOW_CONNTRACK_STATE_TIME_WAIT)
1856                 return rte_flow_error_set(error, EINVAL,
1857                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1858                                           "Invalid CT state");
1859         if (conntrack->last_index > RTE_FLOW_CONNTRACK_FLAG_RST)
1860                 return rte_flow_error_set(error, EINVAL,
1861                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1862                                           "Invalid last TCP packet flag");
1863         return 0;
1864 }
1865
1866 /**
1867  * Verify the @p attributes will be correctly understood by the NIC and store
1868  * them in the @p flow if everything is correct.
1869  *
1870  * @param[in] dev
1871  *   Pointer to the Ethernet device structure.
1872  * @param[in] attributes
1873  *   Pointer to flow attributes
1874  * @param[out] error
1875  *   Pointer to error structure.
1876  *
1877  * @return
1878  *   0 on success, a negative errno value otherwise and rte_errno is set.
1879  */
1880 int
1881 mlx5_flow_validate_attributes(struct rte_eth_dev *dev,
1882                               const struct rte_flow_attr *attributes,
1883                               struct rte_flow_error *error)
1884 {
1885         struct mlx5_priv *priv = dev->data->dev_private;
1886         uint32_t priority_max = priv->sh->flow_max_priority - 1;
1887
1888         if (attributes->group)
1889                 return rte_flow_error_set(error, ENOTSUP,
1890                                           RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
1891                                           NULL, "groups is not supported");
1892         if (attributes->priority != MLX5_FLOW_LOWEST_PRIO_INDICATOR &&
1893             attributes->priority >= priority_max)
1894                 return rte_flow_error_set(error, ENOTSUP,
1895                                           RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
1896                                           NULL, "priority out of range");
1897         if (attributes->egress)
1898                 return rte_flow_error_set(error, ENOTSUP,
1899                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1900                                           "egress is not supported");
1901         if (attributes->transfer && !priv->config.dv_esw_en)
1902                 return rte_flow_error_set(error, ENOTSUP,
1903                                           RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
1904                                           NULL, "transfer is not supported");
1905         if (!attributes->ingress)
1906                 return rte_flow_error_set(error, EINVAL,
1907                                           RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
1908                                           NULL,
1909                                           "ingress attribute is mandatory");
1910         return 0;
1911 }
1912
1913 /**
1914  * Validate ICMP6 item.
1915  *
1916  * @param[in] item
1917  *   Item specification.
1918  * @param[in] item_flags
1919  *   Bit-fields that holds the items detected until now.
1920  * @param[in] ext_vlan_sup
1921  *   Whether extended VLAN features are supported or not.
1922  * @param[out] error
1923  *   Pointer to error structure.
1924  *
1925  * @return
1926  *   0 on success, a negative errno value otherwise and rte_errno is set.
1927  */
1928 int
1929 mlx5_flow_validate_item_icmp6(const struct rte_flow_item *item,
1930                                uint64_t item_flags,
1931                                uint8_t target_protocol,
1932                                struct rte_flow_error *error)
1933 {
1934         const struct rte_flow_item_icmp6 *mask = item->mask;
1935         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1936         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
1937                                       MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1938         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
1939                                       MLX5_FLOW_LAYER_OUTER_L4;
1940         int ret;
1941
1942         if (target_protocol != 0xFF && target_protocol != IPPROTO_ICMPV6)
1943                 return rte_flow_error_set(error, EINVAL,
1944                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1945                                           "protocol filtering not compatible"
1946                                           " with ICMP6 layer");
1947         if (!(item_flags & l3m))
1948                 return rte_flow_error_set(error, EINVAL,
1949                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1950                                           "IPv6 is mandatory to filter on"
1951                                           " ICMP6");
1952         if (item_flags & l4m)
1953                 return rte_flow_error_set(error, EINVAL,
1954                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1955                                           "multiple L4 layers not supported");
1956         if (!mask)
1957                 mask = &rte_flow_item_icmp6_mask;
1958         ret = mlx5_flow_item_acceptable
1959                 (item, (const uint8_t *)mask,
1960                  (const uint8_t *)&rte_flow_item_icmp6_mask,
1961                  sizeof(struct rte_flow_item_icmp6),
1962                  MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
1963         if (ret < 0)
1964                 return ret;
1965         return 0;
1966 }
1967
1968 /**
1969  * Validate ICMP item.
1970  *
1971  * @param[in] item
1972  *   Item specification.
1973  * @param[in] item_flags
1974  *   Bit-fields that holds the items detected until now.
1975  * @param[out] error
1976  *   Pointer to error structure.
1977  *
1978  * @return
1979  *   0 on success, a negative errno value otherwise and rte_errno is set.
1980  */
1981 int
1982 mlx5_flow_validate_item_icmp(const struct rte_flow_item *item,
1983                              uint64_t item_flags,
1984                              uint8_t target_protocol,
1985                              struct rte_flow_error *error)
1986 {
1987         const struct rte_flow_item_icmp *mask = item->mask;
1988         const struct rte_flow_item_icmp nic_mask = {
1989                 .hdr.icmp_type = 0xff,
1990                 .hdr.icmp_code = 0xff,
1991                 .hdr.icmp_ident = RTE_BE16(0xffff),
1992                 .hdr.icmp_seq_nb = RTE_BE16(0xffff),
1993         };
1994         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1995         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
1996                                       MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1997         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
1998                                       MLX5_FLOW_LAYER_OUTER_L4;
1999         int ret;
2000
2001         if (target_protocol != 0xFF && target_protocol != IPPROTO_ICMP)
2002                 return rte_flow_error_set(error, EINVAL,
2003                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2004                                           "protocol filtering not compatible"
2005                                           " with ICMP layer");
2006         if (!(item_flags & l3m))
2007                 return rte_flow_error_set(error, EINVAL,
2008                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2009                                           "IPv4 is mandatory to filter"
2010                                           " on ICMP");
2011         if (item_flags & l4m)
2012                 return rte_flow_error_set(error, EINVAL,
2013                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2014                                           "multiple L4 layers not supported");
2015         if (!mask)
2016                 mask = &nic_mask;
2017         ret = mlx5_flow_item_acceptable
2018                 (item, (const uint8_t *)mask,
2019                  (const uint8_t *)&nic_mask,
2020                  sizeof(struct rte_flow_item_icmp),
2021                  MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2022         if (ret < 0)
2023                 return ret;
2024         return 0;
2025 }
2026
2027 /**
2028  * Validate Ethernet item.
2029  *
2030  * @param[in] item
2031  *   Item specification.
2032  * @param[in] item_flags
2033  *   Bit-fields that holds the items detected until now.
2034  * @param[out] error
2035  *   Pointer to error structure.
2036  *
2037  * @return
2038  *   0 on success, a negative errno value otherwise and rte_errno is set.
2039  */
2040 int
2041 mlx5_flow_validate_item_eth(const struct rte_flow_item *item,
2042                             uint64_t item_flags, bool ext_vlan_sup,
2043                             struct rte_flow_error *error)
2044 {
2045         const struct rte_flow_item_eth *mask = item->mask;
2046         const struct rte_flow_item_eth nic_mask = {
2047                 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
2048                 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
2049                 .type = RTE_BE16(0xffff),
2050                 .has_vlan = ext_vlan_sup ? 1 : 0,
2051         };
2052         int ret;
2053         int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2054         const uint64_t ethm = tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
2055                                        MLX5_FLOW_LAYER_OUTER_L2;
2056
2057         if (item_flags & ethm)
2058                 return rte_flow_error_set(error, ENOTSUP,
2059                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2060                                           "multiple L2 layers not supported");
2061         if ((!tunnel && (item_flags & MLX5_FLOW_LAYER_OUTER_L3)) ||
2062             (tunnel && (item_flags & MLX5_FLOW_LAYER_INNER_L3)))
2063                 return rte_flow_error_set(error, EINVAL,
2064                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2065                                           "L2 layer should not follow "
2066                                           "L3 layers");
2067         if ((!tunnel && (item_flags & MLX5_FLOW_LAYER_OUTER_VLAN)) ||
2068             (tunnel && (item_flags & MLX5_FLOW_LAYER_INNER_VLAN)))
2069                 return rte_flow_error_set(error, EINVAL,
2070                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2071                                           "L2 layer should not follow VLAN");
2072         if (item_flags & MLX5_FLOW_LAYER_GTP)
2073                 return rte_flow_error_set(error, EINVAL,
2074                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2075                                           "L2 layer should not follow GTP");
2076         if (!mask)
2077                 mask = &rte_flow_item_eth_mask;
2078         ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
2079                                         (const uint8_t *)&nic_mask,
2080                                         sizeof(struct rte_flow_item_eth),
2081                                         MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2082         return ret;
2083 }
2084
2085 /**
2086  * Validate VLAN item.
2087  *
2088  * @param[in] item
2089  *   Item specification.
2090  * @param[in] item_flags
2091  *   Bit-fields that holds the items detected until now.
2092  * @param[in] dev
2093  *   Ethernet device flow is being created on.
2094  * @param[out] error
2095  *   Pointer to error structure.
2096  *
2097  * @return
2098  *   0 on success, a negative errno value otherwise and rte_errno is set.
2099  */
2100 int
2101 mlx5_flow_validate_item_vlan(const struct rte_flow_item *item,
2102                              uint64_t item_flags,
2103                              struct rte_eth_dev *dev,
2104                              struct rte_flow_error *error)
2105 {
2106         const struct rte_flow_item_vlan *spec = item->spec;
2107         const struct rte_flow_item_vlan *mask = item->mask;
2108         const struct rte_flow_item_vlan nic_mask = {
2109                 .tci = RTE_BE16(UINT16_MAX),
2110                 .inner_type = RTE_BE16(UINT16_MAX),
2111         };
2112         uint16_t vlan_tag = 0;
2113         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2114         int ret;
2115         const uint64_t l34m = tunnel ? (MLX5_FLOW_LAYER_INNER_L3 |
2116                                         MLX5_FLOW_LAYER_INNER_L4) :
2117                                        (MLX5_FLOW_LAYER_OUTER_L3 |
2118                                         MLX5_FLOW_LAYER_OUTER_L4);
2119         const uint64_t vlanm = tunnel ? MLX5_FLOW_LAYER_INNER_VLAN :
2120                                         MLX5_FLOW_LAYER_OUTER_VLAN;
2121
2122         if (item_flags & vlanm)
2123                 return rte_flow_error_set(error, EINVAL,
2124                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2125                                           "multiple VLAN layers not supported");
2126         else if ((item_flags & l34m) != 0)
2127                 return rte_flow_error_set(error, EINVAL,
2128                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2129                                           "VLAN cannot follow L3/L4 layer");
2130         if (!mask)
2131                 mask = &rte_flow_item_vlan_mask;
2132         ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
2133                                         (const uint8_t *)&nic_mask,
2134                                         sizeof(struct rte_flow_item_vlan),
2135                                         MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2136         if (ret)
2137                 return ret;
2138         if (!tunnel && mask->tci != RTE_BE16(0x0fff)) {
2139                 struct mlx5_priv *priv = dev->data->dev_private;
2140
2141                 if (priv->vmwa_context) {
2142                         /*
2143                          * Non-NULL context means we have a virtual machine
2144                          * and SR-IOV enabled, we have to create VLAN interface
2145                          * to make hypervisor to setup E-Switch vport
2146                          * context correctly. We avoid creating the multiple
2147                          * VLAN interfaces, so we cannot support VLAN tag mask.
2148                          */
2149                         return rte_flow_error_set(error, EINVAL,
2150                                                   RTE_FLOW_ERROR_TYPE_ITEM,
2151                                                   item,
2152                                                   "VLAN tag mask is not"
2153                                                   " supported in virtual"
2154                                                   " environment");
2155                 }
2156         }
2157         if (spec) {
2158                 vlan_tag = spec->tci;
2159                 vlan_tag &= mask->tci;
2160         }
2161         /*
2162          * From verbs perspective an empty VLAN is equivalent
2163          * to a packet without VLAN layer.
2164          */
2165         if (!vlan_tag)
2166                 return rte_flow_error_set(error, EINVAL,
2167                                           RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
2168                                           item->spec,
2169                                           "VLAN cannot be empty");
2170         return 0;
2171 }
2172
2173 /**
2174  * Validate IPV4 item.
2175  *
2176  * @param[in] item
2177  *   Item specification.
2178  * @param[in] item_flags
2179  *   Bit-fields that holds the items detected until now.
2180  * @param[in] last_item
2181  *   Previous validated item in the pattern items.
2182  * @param[in] ether_type
2183  *   Type in the ethernet layer header (including dot1q).
2184  * @param[in] acc_mask
2185  *   Acceptable mask, if NULL default internal default mask
2186  *   will be used to check whether item fields are supported.
2187  * @param[in] range_accepted
2188  *   True if range of values is accepted for specific fields, false otherwise.
2189  * @param[out] error
2190  *   Pointer to error structure.
2191  *
2192  * @return
2193  *   0 on success, a negative errno value otherwise and rte_errno is set.
2194  */
2195 int
2196 mlx5_flow_validate_item_ipv4(const struct rte_flow_item *item,
2197                              uint64_t item_flags,
2198                              uint64_t last_item,
2199                              uint16_t ether_type,
2200                              const struct rte_flow_item_ipv4 *acc_mask,
2201                              bool range_accepted,
2202                              struct rte_flow_error *error)
2203 {
2204         const struct rte_flow_item_ipv4 *mask = item->mask;
2205         const struct rte_flow_item_ipv4 *spec = item->spec;
2206         const struct rte_flow_item_ipv4 nic_mask = {
2207                 .hdr = {
2208                         .src_addr = RTE_BE32(0xffffffff),
2209                         .dst_addr = RTE_BE32(0xffffffff),
2210                         .type_of_service = 0xff,
2211                         .next_proto_id = 0xff,
2212                 },
2213         };
2214         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2215         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
2216                                       MLX5_FLOW_LAYER_OUTER_L3;
2217         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2218                                       MLX5_FLOW_LAYER_OUTER_L4;
2219         int ret;
2220         uint8_t next_proto = 0xFF;
2221         const uint64_t l2_vlan = (MLX5_FLOW_LAYER_L2 |
2222                                   MLX5_FLOW_LAYER_OUTER_VLAN |
2223                                   MLX5_FLOW_LAYER_INNER_VLAN);
2224
2225         if ((last_item & l2_vlan) && ether_type &&
2226             ether_type != RTE_ETHER_TYPE_IPV4)
2227                 return rte_flow_error_set(error, EINVAL,
2228                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2229                                           "IPv4 cannot follow L2/VLAN layer "
2230                                           "which ether type is not IPv4");
2231         if (item_flags & MLX5_FLOW_LAYER_TUNNEL) {
2232                 if (mask && spec)
2233                         next_proto = mask->hdr.next_proto_id &
2234                                      spec->hdr.next_proto_id;
2235                 if (next_proto == IPPROTO_IPIP || next_proto == IPPROTO_IPV6)
2236                         return rte_flow_error_set(error, EINVAL,
2237                                                   RTE_FLOW_ERROR_TYPE_ITEM,
2238                                                   item,
2239                                                   "multiple tunnel "
2240                                                   "not supported");
2241         }
2242         if (item_flags & MLX5_FLOW_LAYER_IPV6_ENCAP)
2243                 return rte_flow_error_set(error, EINVAL,
2244                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2245                                           "wrong tunnel type - IPv6 specified "
2246                                           "but IPv4 item provided");
2247         if (item_flags & l3m)
2248                 return rte_flow_error_set(error, ENOTSUP,
2249                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2250                                           "multiple L3 layers not supported");
2251         else if (item_flags & l4m)
2252                 return rte_flow_error_set(error, EINVAL,
2253                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2254                                           "L3 cannot follow an L4 layer.");
2255         else if ((item_flags & MLX5_FLOW_LAYER_NVGRE) &&
2256                   !(item_flags & MLX5_FLOW_LAYER_INNER_L2))
2257                 return rte_flow_error_set(error, EINVAL,
2258                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2259                                           "L3 cannot follow an NVGRE layer.");
2260         if (!mask)
2261                 mask = &rte_flow_item_ipv4_mask;
2262         else if (mask->hdr.next_proto_id != 0 &&
2263                  mask->hdr.next_proto_id != 0xff)
2264                 return rte_flow_error_set(error, EINVAL,
2265                                           RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
2266                                           "partial mask is not supported"
2267                                           " for protocol");
2268         ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
2269                                         acc_mask ? (const uint8_t *)acc_mask
2270                                                  : (const uint8_t *)&nic_mask,
2271                                         sizeof(struct rte_flow_item_ipv4),
2272                                         range_accepted, error);
2273         if (ret < 0)
2274                 return ret;
2275         return 0;
2276 }
2277
2278 /**
2279  * Validate IPV6 item.
2280  *
2281  * @param[in] item
2282  *   Item specification.
2283  * @param[in] item_flags
2284  *   Bit-fields that holds the items detected until now.
2285  * @param[in] last_item
2286  *   Previous validated item in the pattern items.
2287  * @param[in] ether_type
2288  *   Type in the ethernet layer header (including dot1q).
2289  * @param[in] acc_mask
2290  *   Acceptable mask, if NULL default internal default mask
2291  *   will be used to check whether item fields are supported.
2292  * @param[out] error
2293  *   Pointer to error structure.
2294  *
2295  * @return
2296  *   0 on success, a negative errno value otherwise and rte_errno is set.
2297  */
2298 int
2299 mlx5_flow_validate_item_ipv6(const struct rte_flow_item *item,
2300                              uint64_t item_flags,
2301                              uint64_t last_item,
2302                              uint16_t ether_type,
2303                              const struct rte_flow_item_ipv6 *acc_mask,
2304                              struct rte_flow_error *error)
2305 {
2306         const struct rte_flow_item_ipv6 *mask = item->mask;
2307         const struct rte_flow_item_ipv6 *spec = item->spec;
2308         const struct rte_flow_item_ipv6 nic_mask = {
2309                 .hdr = {
2310                         .src_addr =
2311                                 "\xff\xff\xff\xff\xff\xff\xff\xff"
2312                                 "\xff\xff\xff\xff\xff\xff\xff\xff",
2313                         .dst_addr =
2314                                 "\xff\xff\xff\xff\xff\xff\xff\xff"
2315                                 "\xff\xff\xff\xff\xff\xff\xff\xff",
2316                         .vtc_flow = RTE_BE32(0xffffffff),
2317                         .proto = 0xff,
2318                 },
2319         };
2320         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2321         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
2322                                       MLX5_FLOW_LAYER_OUTER_L3;
2323         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2324                                       MLX5_FLOW_LAYER_OUTER_L4;
2325         int ret;
2326         uint8_t next_proto = 0xFF;
2327         const uint64_t l2_vlan = (MLX5_FLOW_LAYER_L2 |
2328                                   MLX5_FLOW_LAYER_OUTER_VLAN |
2329                                   MLX5_FLOW_LAYER_INNER_VLAN);
2330
2331         if ((last_item & l2_vlan) && ether_type &&
2332             ether_type != RTE_ETHER_TYPE_IPV6)
2333                 return rte_flow_error_set(error, EINVAL,
2334                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2335                                           "IPv6 cannot follow L2/VLAN layer "
2336                                           "which ether type is not IPv6");
2337         if (mask && mask->hdr.proto == UINT8_MAX && spec)
2338                 next_proto = spec->hdr.proto;
2339         if (item_flags & MLX5_FLOW_LAYER_TUNNEL) {
2340                 if (next_proto == IPPROTO_IPIP || next_proto == IPPROTO_IPV6)
2341                         return rte_flow_error_set(error, EINVAL,
2342                                                   RTE_FLOW_ERROR_TYPE_ITEM,
2343                                                   item,
2344                                                   "multiple tunnel "
2345                                                   "not supported");
2346         }
2347         if (next_proto == IPPROTO_HOPOPTS  ||
2348             next_proto == IPPROTO_ROUTING  ||
2349             next_proto == IPPROTO_FRAGMENT ||
2350             next_proto == IPPROTO_ESP      ||
2351             next_proto == IPPROTO_AH       ||
2352             next_proto == IPPROTO_DSTOPTS)
2353                 return rte_flow_error_set(error, EINVAL,
2354                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2355                                           "IPv6 proto (next header) should "
2356                                           "not be set as extension header");
2357         if (item_flags & MLX5_FLOW_LAYER_IPIP)
2358                 return rte_flow_error_set(error, EINVAL,
2359                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2360                                           "wrong tunnel type - IPv4 specified "
2361                                           "but IPv6 item provided");
2362         if (item_flags & l3m)
2363                 return rte_flow_error_set(error, ENOTSUP,
2364                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2365                                           "multiple L3 layers not supported");
2366         else if (item_flags & l4m)
2367                 return rte_flow_error_set(error, EINVAL,
2368                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2369                                           "L3 cannot follow an L4 layer.");
2370         else if ((item_flags & MLX5_FLOW_LAYER_NVGRE) &&
2371                   !(item_flags & MLX5_FLOW_LAYER_INNER_L2))
2372                 return rte_flow_error_set(error, EINVAL,
2373                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2374                                           "L3 cannot follow an NVGRE layer.");
2375         if (!mask)
2376                 mask = &rte_flow_item_ipv6_mask;
2377         ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
2378                                         acc_mask ? (const uint8_t *)acc_mask
2379                                                  : (const uint8_t *)&nic_mask,
2380                                         sizeof(struct rte_flow_item_ipv6),
2381                                         MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2382         if (ret < 0)
2383                 return ret;
2384         return 0;
2385 }
2386
2387 /**
2388  * Validate UDP item.
2389  *
2390  * @param[in] item
2391  *   Item specification.
2392  * @param[in] item_flags
2393  *   Bit-fields that holds the items detected until now.
2394  * @param[in] target_protocol
2395  *   The next protocol in the previous item.
2396  * @param[in] flow_mask
2397  *   mlx5 flow-specific (DV, verbs, etc.) supported header fields mask.
2398  * @param[out] error
2399  *   Pointer to error structure.
2400  *
2401  * @return
2402  *   0 on success, a negative errno value otherwise and rte_errno is set.
2403  */
2404 int
2405 mlx5_flow_validate_item_udp(const struct rte_flow_item *item,
2406                             uint64_t item_flags,
2407                             uint8_t target_protocol,
2408                             struct rte_flow_error *error)
2409 {
2410         const struct rte_flow_item_udp *mask = item->mask;
2411         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2412         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
2413                                       MLX5_FLOW_LAYER_OUTER_L3;
2414         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2415                                       MLX5_FLOW_LAYER_OUTER_L4;
2416         int ret;
2417
2418         if (target_protocol != 0xff && target_protocol != IPPROTO_UDP)
2419                 return rte_flow_error_set(error, EINVAL,
2420                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2421                                           "protocol filtering not compatible"
2422                                           " with UDP layer");
2423         if (!(item_flags & l3m))
2424                 return rte_flow_error_set(error, EINVAL,
2425                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2426                                           "L3 is mandatory to filter on L4");
2427         if (item_flags & l4m)
2428                 return rte_flow_error_set(error, EINVAL,
2429                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2430                                           "multiple L4 layers not supported");
2431         if (!mask)
2432                 mask = &rte_flow_item_udp_mask;
2433         ret = mlx5_flow_item_acceptable
2434                 (item, (const uint8_t *)mask,
2435                  (const uint8_t *)&rte_flow_item_udp_mask,
2436                  sizeof(struct rte_flow_item_udp), MLX5_ITEM_RANGE_NOT_ACCEPTED,
2437                  error);
2438         if (ret < 0)
2439                 return ret;
2440         return 0;
2441 }
2442
2443 /**
2444  * Validate TCP item.
2445  *
2446  * @param[in] item
2447  *   Item specification.
2448  * @param[in] item_flags
2449  *   Bit-fields that holds the items detected until now.
2450  * @param[in] target_protocol
2451  *   The next protocol in the previous item.
2452  * @param[out] error
2453  *   Pointer to error structure.
2454  *
2455  * @return
2456  *   0 on success, a negative errno value otherwise and rte_errno is set.
2457  */
2458 int
2459 mlx5_flow_validate_item_tcp(const struct rte_flow_item *item,
2460                             uint64_t item_flags,
2461                             uint8_t target_protocol,
2462                             const struct rte_flow_item_tcp *flow_mask,
2463                             struct rte_flow_error *error)
2464 {
2465         const struct rte_flow_item_tcp *mask = item->mask;
2466         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2467         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
2468                                       MLX5_FLOW_LAYER_OUTER_L3;
2469         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2470                                       MLX5_FLOW_LAYER_OUTER_L4;
2471         int ret;
2472
2473         MLX5_ASSERT(flow_mask);
2474         if (target_protocol != 0xff && target_protocol != IPPROTO_TCP)
2475                 return rte_flow_error_set(error, EINVAL,
2476                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2477                                           "protocol filtering not compatible"
2478                                           " with TCP layer");
2479         if (!(item_flags & l3m))
2480                 return rte_flow_error_set(error, EINVAL,
2481                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2482                                           "L3 is mandatory to filter on L4");
2483         if (item_flags & l4m)
2484                 return rte_flow_error_set(error, EINVAL,
2485                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2486                                           "multiple L4 layers not supported");
2487         if (!mask)
2488                 mask = &rte_flow_item_tcp_mask;
2489         ret = mlx5_flow_item_acceptable
2490                 (item, (const uint8_t *)mask,
2491                  (const uint8_t *)flow_mask,
2492                  sizeof(struct rte_flow_item_tcp), MLX5_ITEM_RANGE_NOT_ACCEPTED,
2493                  error);
2494         if (ret < 0)
2495                 return ret;
2496         return 0;
2497 }
2498
2499 /**
2500  * Validate VXLAN item.
2501  *
2502  * @param[in] dev
2503  *   Pointer to the Ethernet device structure.
2504  * @param[in] udp_dport
2505  *   UDP destination port
2506  * @param[in] item
2507  *   Item specification.
2508  * @param[in] item_flags
2509  *   Bit-fields that holds the items detected until now.
2510  * @param[in] attr
2511  *   Flow rule attributes.
2512  * @param[out] error
2513  *   Pointer to error structure.
2514  *
2515  * @return
2516  *   0 on success, a negative errno value otherwise and rte_errno is set.
2517  */
2518 int
2519 mlx5_flow_validate_item_vxlan(struct rte_eth_dev *dev,
2520                               uint16_t udp_dport,
2521                               const struct rte_flow_item *item,
2522                               uint64_t item_flags,
2523                               const struct rte_flow_attr *attr,
2524                               struct rte_flow_error *error)
2525 {
2526         const struct rte_flow_item_vxlan *spec = item->spec;
2527         const struct rte_flow_item_vxlan *mask = item->mask;
2528         int ret;
2529         struct mlx5_priv *priv = dev->data->dev_private;
2530         union vni {
2531                 uint32_t vlan_id;
2532                 uint8_t vni[4];
2533         } id = { .vlan_id = 0, };
2534         const struct rte_flow_item_vxlan nic_mask = {
2535                 .vni = "\xff\xff\xff",
2536                 .rsvd1 = 0xff,
2537         };
2538         const struct rte_flow_item_vxlan *valid_mask;
2539
2540         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2541                 return rte_flow_error_set(error, ENOTSUP,
2542                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2543                                           "multiple tunnel layers not"
2544                                           " supported");
2545         valid_mask = &rte_flow_item_vxlan_mask;
2546         /*
2547          * Verify only UDPv4 is present as defined in
2548          * https://tools.ietf.org/html/rfc7348
2549          */
2550         if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
2551                 return rte_flow_error_set(error, EINVAL,
2552                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2553                                           "no outer UDP layer found");
2554         if (!mask)
2555                 mask = &rte_flow_item_vxlan_mask;
2556
2557         if (priv->sh->steering_format_version !=
2558             MLX5_STEERING_LOGIC_FORMAT_CONNECTX_5 ||
2559             !udp_dport || udp_dport == MLX5_UDP_PORT_VXLAN) {
2560                 /* FDB domain & NIC domain non-zero group */
2561                 if ((attr->transfer || attr->group) && priv->sh->misc5_cap)
2562                         valid_mask = &nic_mask;
2563                 /* Group zero in NIC domain */
2564                 if (!attr->group && !attr->transfer &&
2565                     priv->sh->tunnel_header_0_1)
2566                         valid_mask = &nic_mask;
2567         }
2568         ret = mlx5_flow_item_acceptable
2569                 (item, (const uint8_t *)mask,
2570                  (const uint8_t *)valid_mask,
2571                  sizeof(struct rte_flow_item_vxlan),
2572                  MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2573         if (ret < 0)
2574                 return ret;
2575         if (spec) {
2576                 memcpy(&id.vni[1], spec->vni, 3);
2577                 memcpy(&id.vni[1], mask->vni, 3);
2578         }
2579         if (!(item_flags & MLX5_FLOW_LAYER_OUTER))
2580                 return rte_flow_error_set(error, ENOTSUP,
2581                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2582                                           "VXLAN tunnel must be fully defined");
2583         return 0;
2584 }
2585
2586 /**
2587  * Validate VXLAN_GPE item.
2588  *
2589  * @param[in] item
2590  *   Item specification.
2591  * @param[in] item_flags
2592  *   Bit-fields that holds the items detected until now.
2593  * @param[in] priv
2594  *   Pointer to the private data structure.
2595  * @param[in] target_protocol
2596  *   The next protocol in the previous item.
2597  * @param[out] error
2598  *   Pointer to error structure.
2599  *
2600  * @return
2601  *   0 on success, a negative errno value otherwise and rte_errno is set.
2602  */
2603 int
2604 mlx5_flow_validate_item_vxlan_gpe(const struct rte_flow_item *item,
2605                                   uint64_t item_flags,
2606                                   struct rte_eth_dev *dev,
2607                                   struct rte_flow_error *error)
2608 {
2609         struct mlx5_priv *priv = dev->data->dev_private;
2610         const struct rte_flow_item_vxlan_gpe *spec = item->spec;
2611         const struct rte_flow_item_vxlan_gpe *mask = item->mask;
2612         int ret;
2613         union vni {
2614                 uint32_t vlan_id;
2615                 uint8_t vni[4];
2616         } id = { .vlan_id = 0, };
2617
2618         if (!priv->config.l3_vxlan_en)
2619                 return rte_flow_error_set(error, ENOTSUP,
2620                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2621                                           "L3 VXLAN is not enabled by device"
2622                                           " parameter and/or not configured in"
2623                                           " firmware");
2624         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2625                 return rte_flow_error_set(error, ENOTSUP,
2626                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2627                                           "multiple tunnel layers not"
2628                                           " supported");
2629         /*
2630          * Verify only UDPv4 is present as defined in
2631          * https://tools.ietf.org/html/rfc7348
2632          */
2633         if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
2634                 return rte_flow_error_set(error, EINVAL,
2635                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2636                                           "no outer UDP layer found");
2637         if (!mask)
2638                 mask = &rte_flow_item_vxlan_gpe_mask;
2639         ret = mlx5_flow_item_acceptable
2640                 (item, (const uint8_t *)mask,
2641                  (const uint8_t *)&rte_flow_item_vxlan_gpe_mask,
2642                  sizeof(struct rte_flow_item_vxlan_gpe),
2643                  MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2644         if (ret < 0)
2645                 return ret;
2646         if (spec) {
2647                 if (spec->protocol)
2648                         return rte_flow_error_set(error, ENOTSUP,
2649                                                   RTE_FLOW_ERROR_TYPE_ITEM,
2650                                                   item,
2651                                                   "VxLAN-GPE protocol"
2652                                                   " not supported");
2653                 memcpy(&id.vni[1], spec->vni, 3);
2654                 memcpy(&id.vni[1], mask->vni, 3);
2655         }
2656         if (!(item_flags & MLX5_FLOW_LAYER_OUTER))
2657                 return rte_flow_error_set(error, ENOTSUP,
2658                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2659                                           "VXLAN-GPE tunnel must be fully"
2660                                           " defined");
2661         return 0;
2662 }
2663 /**
2664  * Validate GRE Key item.
2665  *
2666  * @param[in] item
2667  *   Item specification.
2668  * @param[in] item_flags
2669  *   Bit flags to mark detected items.
2670  * @param[in] gre_item
2671  *   Pointer to gre_item
2672  * @param[out] error
2673  *   Pointer to error structure.
2674  *
2675  * @return
2676  *   0 on success, a negative errno value otherwise and rte_errno is set.
2677  */
2678 int
2679 mlx5_flow_validate_item_gre_key(const struct rte_flow_item *item,
2680                                 uint64_t item_flags,
2681                                 const struct rte_flow_item *gre_item,
2682                                 struct rte_flow_error *error)
2683 {
2684         const rte_be32_t *mask = item->mask;
2685         int ret = 0;
2686         rte_be32_t gre_key_default_mask = RTE_BE32(UINT32_MAX);
2687         const struct rte_flow_item_gre *gre_spec;
2688         const struct rte_flow_item_gre *gre_mask;
2689
2690         if (item_flags & MLX5_FLOW_LAYER_GRE_KEY)
2691                 return rte_flow_error_set(error, ENOTSUP,
2692                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2693                                           "Multiple GRE key not support");
2694         if (!(item_flags & MLX5_FLOW_LAYER_GRE))
2695                 return rte_flow_error_set(error, ENOTSUP,
2696                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2697                                           "No preceding GRE header");
2698         if (item_flags & MLX5_FLOW_LAYER_INNER)
2699                 return rte_flow_error_set(error, ENOTSUP,
2700                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2701                                           "GRE key following a wrong item");
2702         gre_mask = gre_item->mask;
2703         if (!gre_mask)
2704                 gre_mask = &rte_flow_item_gre_mask;
2705         gre_spec = gre_item->spec;
2706         if (gre_spec && (gre_mask->c_rsvd0_ver & RTE_BE16(0x2000)) &&
2707                          !(gre_spec->c_rsvd0_ver & RTE_BE16(0x2000)))
2708                 return rte_flow_error_set(error, EINVAL,
2709                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2710                                           "Key bit must be on");
2711
2712         if (!mask)
2713                 mask = &gre_key_default_mask;
2714         ret = mlx5_flow_item_acceptable
2715                 (item, (const uint8_t *)mask,
2716                  (const uint8_t *)&gre_key_default_mask,
2717                  sizeof(rte_be32_t), MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2718         return ret;
2719 }
2720
2721 /**
2722  * Validate GRE item.
2723  *
2724  * @param[in] item
2725  *   Item specification.
2726  * @param[in] item_flags
2727  *   Bit flags to mark detected items.
2728  * @param[in] target_protocol
2729  *   The next protocol in the previous item.
2730  * @param[out] error
2731  *   Pointer to error structure.
2732  *
2733  * @return
2734  *   0 on success, a negative errno value otherwise and rte_errno is set.
2735  */
2736 int
2737 mlx5_flow_validate_item_gre(const struct rte_flow_item *item,
2738                             uint64_t item_flags,
2739                             uint8_t target_protocol,
2740                             struct rte_flow_error *error)
2741 {
2742         const struct rte_flow_item_gre *spec __rte_unused = item->spec;
2743         const struct rte_flow_item_gre *mask = item->mask;
2744         int ret;
2745         const struct rte_flow_item_gre nic_mask = {
2746                 .c_rsvd0_ver = RTE_BE16(0xB000),
2747                 .protocol = RTE_BE16(UINT16_MAX),
2748         };
2749
2750         if (target_protocol != 0xff && target_protocol != IPPROTO_GRE)
2751                 return rte_flow_error_set(error, EINVAL,
2752                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2753                                           "protocol filtering not compatible"
2754                                           " with this GRE layer");
2755         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2756                 return rte_flow_error_set(error, ENOTSUP,
2757                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2758                                           "multiple tunnel layers not"
2759                                           " supported");
2760         if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3))
2761                 return rte_flow_error_set(error, ENOTSUP,
2762                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2763                                           "L3 Layer is missing");
2764         if (!mask)
2765                 mask = &rte_flow_item_gre_mask;
2766         ret = mlx5_flow_item_acceptable
2767                 (item, (const uint8_t *)mask,
2768                  (const uint8_t *)&nic_mask,
2769                  sizeof(struct rte_flow_item_gre), MLX5_ITEM_RANGE_NOT_ACCEPTED,
2770                  error);
2771         if (ret < 0)
2772                 return ret;
2773 #ifndef HAVE_MLX5DV_DR
2774 #ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT
2775         if (spec && (spec->protocol & mask->protocol))
2776                 return rte_flow_error_set(error, ENOTSUP,
2777                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2778                                           "without MPLS support the"
2779                                           " specification cannot be used for"
2780                                           " filtering");
2781 #endif
2782 #endif
2783         return 0;
2784 }
2785
2786 /**
2787  * Validate Geneve item.
2788  *
2789  * @param[in] item
2790  *   Item specification.
2791  * @param[in] itemFlags
2792  *   Bit-fields that holds the items detected until now.
2793  * @param[in] enPriv
2794  *   Pointer to the private data structure.
2795  * @param[out] error
2796  *   Pointer to error structure.
2797  *
2798  * @return
2799  *   0 on success, a negative errno value otherwise and rte_errno is set.
2800  */
2801
2802 int
2803 mlx5_flow_validate_item_geneve(const struct rte_flow_item *item,
2804                                uint64_t item_flags,
2805                                struct rte_eth_dev *dev,
2806                                struct rte_flow_error *error)
2807 {
2808         struct mlx5_priv *priv = dev->data->dev_private;
2809         const struct rte_flow_item_geneve *spec = item->spec;
2810         const struct rte_flow_item_geneve *mask = item->mask;
2811         int ret;
2812         uint16_t gbhdr;
2813         uint8_t opt_len = priv->config.hca_attr.geneve_max_opt_len ?
2814                           MLX5_GENEVE_OPT_LEN_1 : MLX5_GENEVE_OPT_LEN_0;
2815         const struct rte_flow_item_geneve nic_mask = {
2816                 .ver_opt_len_o_c_rsvd0 = RTE_BE16(0x3f80),
2817                 .vni = "\xff\xff\xff",
2818                 .protocol = RTE_BE16(UINT16_MAX),
2819         };
2820
2821         if (!priv->config.hca_attr.tunnel_stateless_geneve_rx)
2822                 return rte_flow_error_set(error, ENOTSUP,
2823                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2824                                           "L3 Geneve is not enabled by device"
2825                                           " parameter and/or not configured in"
2826                                           " firmware");
2827         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2828                 return rte_flow_error_set(error, ENOTSUP,
2829                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2830                                           "multiple tunnel layers not"
2831                                           " supported");
2832         /*
2833          * Verify only UDPv4 is present as defined in
2834          * https://tools.ietf.org/html/rfc7348
2835          */
2836         if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
2837                 return rte_flow_error_set(error, EINVAL,
2838                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2839                                           "no outer UDP layer found");
2840         if (!mask)
2841                 mask = &rte_flow_item_geneve_mask;
2842         ret = mlx5_flow_item_acceptable
2843                                   (item, (const uint8_t *)mask,
2844                                    (const uint8_t *)&nic_mask,
2845                                    sizeof(struct rte_flow_item_geneve),
2846                                    MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2847         if (ret)
2848                 return ret;
2849         if (spec) {
2850                 gbhdr = rte_be_to_cpu_16(spec->ver_opt_len_o_c_rsvd0);
2851                 if (MLX5_GENEVE_VER_VAL(gbhdr) ||
2852                      MLX5_GENEVE_CRITO_VAL(gbhdr) ||
2853                      MLX5_GENEVE_RSVD_VAL(gbhdr) || spec->rsvd1)
2854                         return rte_flow_error_set(error, ENOTSUP,
2855                                                   RTE_FLOW_ERROR_TYPE_ITEM,
2856                                                   item,
2857                                                   "Geneve protocol unsupported"
2858                                                   " fields are being used");
2859                 if (MLX5_GENEVE_OPTLEN_VAL(gbhdr) > opt_len)
2860                         return rte_flow_error_set
2861                                         (error, ENOTSUP,
2862                                          RTE_FLOW_ERROR_TYPE_ITEM,
2863                                          item,
2864                                          "Unsupported Geneve options length");
2865         }
2866         if (!(item_flags & MLX5_FLOW_LAYER_OUTER))
2867                 return rte_flow_error_set
2868                                     (error, ENOTSUP,
2869                                      RTE_FLOW_ERROR_TYPE_ITEM, item,
2870                                      "Geneve tunnel must be fully defined");
2871         return 0;
2872 }
2873
2874 /**
2875  * Validate Geneve TLV option item.
2876  *
2877  * @param[in] item
2878  *   Item specification.
2879  * @param[in] last_item
2880  *   Previous validated item in the pattern items.
2881  * @param[in] geneve_item
2882  *   Previous GENEVE item specification.
2883  * @param[in] dev
2884  *   Pointer to the rte_eth_dev structure.
2885  * @param[out] error
2886  *   Pointer to error structure.
2887  *
2888  * @return
2889  *   0 on success, a negative errno value otherwise and rte_errno is set.
2890  */
2891 int
2892 mlx5_flow_validate_item_geneve_opt(const struct rte_flow_item *item,
2893                                    uint64_t last_item,
2894                                    const struct rte_flow_item *geneve_item,
2895                                    struct rte_eth_dev *dev,
2896                                    struct rte_flow_error *error)
2897 {
2898         struct mlx5_priv *priv = dev->data->dev_private;
2899         struct mlx5_dev_ctx_shared *sh = priv->sh;
2900         struct mlx5_geneve_tlv_option_resource *geneve_opt_resource;
2901         struct mlx5_hca_attr *hca_attr = &priv->config.hca_attr;
2902         uint8_t data_max_supported =
2903                         hca_attr->max_geneve_tlv_option_data_len * 4;
2904         struct mlx5_dev_config *config = &priv->config;
2905         const struct rte_flow_item_geneve *geneve_spec;
2906         const struct rte_flow_item_geneve *geneve_mask;
2907         const struct rte_flow_item_geneve_opt *spec = item->spec;
2908         const struct rte_flow_item_geneve_opt *mask = item->mask;
2909         unsigned int i;
2910         unsigned int data_len;
2911         uint8_t tlv_option_len;
2912         uint16_t optlen_m, optlen_v;
2913         const struct rte_flow_item_geneve_opt full_mask = {
2914                 .option_class = RTE_BE16(0xffff),
2915                 .option_type = 0xff,
2916                 .option_len = 0x1f,
2917         };
2918
2919         if (!mask)
2920                 mask = &rte_flow_item_geneve_opt_mask;
2921         if (!spec)
2922                 return rte_flow_error_set
2923                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
2924                         "Geneve TLV opt class/type/length must be specified");
2925         if ((uint32_t)spec->option_len > MLX5_GENEVE_OPTLEN_MASK)
2926                 return rte_flow_error_set
2927                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
2928                         "Geneve TLV opt length exceeeds the limit (31)");
2929         /* Check if class type and length masks are full. */
2930         if (full_mask.option_class != mask->option_class ||
2931             full_mask.option_type != mask->option_type ||
2932             full_mask.option_len != (mask->option_len & full_mask.option_len))
2933                 return rte_flow_error_set
2934                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
2935                         "Geneve TLV opt class/type/length masks must be full");
2936         /* Check if length is supported */
2937         if ((uint32_t)spec->option_len >
2938                         config->hca_attr.max_geneve_tlv_option_data_len)
2939                 return rte_flow_error_set
2940                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
2941                         "Geneve TLV opt length not supported");
2942         if (config->hca_attr.max_geneve_tlv_options > 1)
2943                 DRV_LOG(DEBUG,
2944                         "max_geneve_tlv_options supports more than 1 option");
2945         /* Check GENEVE item preceding. */
2946         if (!geneve_item || !(last_item & MLX5_FLOW_LAYER_GENEVE))
2947                 return rte_flow_error_set
2948                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
2949                         "Geneve opt item must be preceded with Geneve item");
2950         geneve_spec = geneve_item->spec;
2951         geneve_mask = geneve_item->mask ? geneve_item->mask :
2952                                           &rte_flow_item_geneve_mask;
2953         /* Check if GENEVE TLV option size doesn't exceed option length */
2954         if (geneve_spec && (geneve_mask->ver_opt_len_o_c_rsvd0 ||
2955                             geneve_spec->ver_opt_len_o_c_rsvd0)) {
2956                 tlv_option_len = spec->option_len & mask->option_len;
2957                 optlen_v = rte_be_to_cpu_16(geneve_spec->ver_opt_len_o_c_rsvd0);
2958                 optlen_v = MLX5_GENEVE_OPTLEN_VAL(optlen_v);
2959                 optlen_m = rte_be_to_cpu_16(geneve_mask->ver_opt_len_o_c_rsvd0);
2960                 optlen_m = MLX5_GENEVE_OPTLEN_VAL(optlen_m);
2961                 if ((optlen_v & optlen_m) <= tlv_option_len)
2962                         return rte_flow_error_set
2963                                 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
2964                                  "GENEVE TLV option length exceeds optlen");
2965         }
2966         /* Check if length is 0 or data is 0. */
2967         if (spec->data == NULL || spec->option_len == 0)
2968                 return rte_flow_error_set
2969                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
2970                         "Geneve TLV opt with zero data/length not supported");
2971         /* Check not all data & mask are 0. */
2972         data_len = spec->option_len * 4;
2973         if (mask->data == NULL) {
2974                 for (i = 0; i < data_len; i++)
2975                         if (spec->data[i])
2976                                 break;
2977                 if (i == data_len)
2978                         return rte_flow_error_set(error, ENOTSUP,
2979                                 RTE_FLOW_ERROR_TYPE_ITEM, item,
2980                                 "Can't match on Geneve option data 0");
2981         } else {
2982                 for (i = 0; i < data_len; i++)
2983                         if (spec->data[i] & mask->data[i])
2984                                 break;
2985                 if (i == data_len)
2986                         return rte_flow_error_set(error, ENOTSUP,
2987                                 RTE_FLOW_ERROR_TYPE_ITEM, item,
2988                                 "Can't match on Geneve option data and mask 0");
2989                 /* Check data mask supported. */
2990                 for (i = data_max_supported; i < data_len ; i++)
2991                         if (mask->data[i])
2992                                 return rte_flow_error_set(error, ENOTSUP,
2993                                         RTE_FLOW_ERROR_TYPE_ITEM, item,
2994                                         "Data mask is of unsupported size");
2995         }
2996         /* Check GENEVE option is supported in NIC. */
2997         if (!config->hca_attr.geneve_tlv_opt)
2998                 return rte_flow_error_set
2999                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
3000                         "Geneve TLV opt not supported");
3001         /* Check if we already have geneve option with different type/class. */
3002         rte_spinlock_lock(&sh->geneve_tlv_opt_sl);
3003         geneve_opt_resource = sh->geneve_tlv_option_resource;
3004         if (geneve_opt_resource != NULL)
3005                 if (geneve_opt_resource->option_class != spec->option_class ||
3006                     geneve_opt_resource->option_type != spec->option_type ||
3007                     geneve_opt_resource->length != spec->option_len) {
3008                         rte_spinlock_unlock(&sh->geneve_tlv_opt_sl);
3009                         return rte_flow_error_set(error, ENOTSUP,
3010                                 RTE_FLOW_ERROR_TYPE_ITEM, item,
3011                                 "Only one Geneve TLV option supported");
3012                 }
3013         rte_spinlock_unlock(&sh->geneve_tlv_opt_sl);
3014         return 0;
3015 }
3016
3017 /**
3018  * Validate MPLS item.
3019  *
3020  * @param[in] dev
3021  *   Pointer to the rte_eth_dev structure.
3022  * @param[in] item
3023  *   Item specification.
3024  * @param[in] item_flags
3025  *   Bit-fields that holds the items detected until now.
3026  * @param[in] prev_layer
3027  *   The protocol layer indicated in previous item.
3028  * @param[out] error
3029  *   Pointer to error structure.
3030  *
3031  * @return
3032  *   0 on success, a negative errno value otherwise and rte_errno is set.
3033  */
3034 int
3035 mlx5_flow_validate_item_mpls(struct rte_eth_dev *dev __rte_unused,
3036                              const struct rte_flow_item *item __rte_unused,
3037                              uint64_t item_flags __rte_unused,
3038                              uint64_t prev_layer __rte_unused,
3039                              struct rte_flow_error *error)
3040 {
3041 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
3042         const struct rte_flow_item_mpls *mask = item->mask;
3043         struct mlx5_priv *priv = dev->data->dev_private;
3044         int ret;
3045
3046         if (!priv->config.mpls_en)
3047                 return rte_flow_error_set(error, ENOTSUP,
3048                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3049                                           "MPLS not supported or"
3050                                           " disabled in firmware"
3051                                           " configuration.");
3052         /* MPLS over UDP, GRE is allowed */
3053         if (!(prev_layer & (MLX5_FLOW_LAYER_OUTER_L4_UDP |
3054                             MLX5_FLOW_LAYER_GRE |
3055                             MLX5_FLOW_LAYER_GRE_KEY)))
3056                 return rte_flow_error_set(error, EINVAL,
3057                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3058                                           "protocol filtering not compatible"
3059                                           " with MPLS layer");
3060         /* Multi-tunnel isn't allowed but MPLS over GRE is an exception. */
3061         if ((item_flags & MLX5_FLOW_LAYER_TUNNEL) &&
3062             !(item_flags & MLX5_FLOW_LAYER_GRE))
3063                 return rte_flow_error_set(error, ENOTSUP,
3064                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3065                                           "multiple tunnel layers not"
3066                                           " supported");
3067         if (!mask)
3068                 mask = &rte_flow_item_mpls_mask;
3069         ret = mlx5_flow_item_acceptable
3070                 (item, (const uint8_t *)mask,
3071                  (const uint8_t *)&rte_flow_item_mpls_mask,
3072                  sizeof(struct rte_flow_item_mpls),
3073                  MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
3074         if (ret < 0)
3075                 return ret;
3076         return 0;
3077 #else
3078         return rte_flow_error_set(error, ENOTSUP,
3079                                   RTE_FLOW_ERROR_TYPE_ITEM, item,
3080                                   "MPLS is not supported by Verbs, please"
3081                                   " update.");
3082 #endif
3083 }
3084
3085 /**
3086  * Validate NVGRE item.
3087  *
3088  * @param[in] item
3089  *   Item specification.
3090  * @param[in] item_flags
3091  *   Bit flags to mark detected items.
3092  * @param[in] target_protocol
3093  *   The next protocol in the previous item.
3094  * @param[out] error
3095  *   Pointer to error structure.
3096  *
3097  * @return
3098  *   0 on success, a negative errno value otherwise and rte_errno is set.
3099  */
3100 int
3101 mlx5_flow_validate_item_nvgre(const struct rte_flow_item *item,
3102                               uint64_t item_flags,
3103                               uint8_t target_protocol,
3104                               struct rte_flow_error *error)
3105 {
3106         const struct rte_flow_item_nvgre *mask = item->mask;
3107         int ret;
3108
3109         if (target_protocol != 0xff && target_protocol != IPPROTO_GRE)
3110                 return rte_flow_error_set(error, EINVAL,
3111                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3112                                           "protocol filtering not compatible"
3113                                           " with this GRE layer");
3114         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
3115                 return rte_flow_error_set(error, ENOTSUP,
3116                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3117                                           "multiple tunnel layers not"
3118                                           " supported");
3119         if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3))
3120                 return rte_flow_error_set(error, ENOTSUP,
3121                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3122                                           "L3 Layer is missing");
3123         if (!mask)
3124                 mask = &rte_flow_item_nvgre_mask;
3125         ret = mlx5_flow_item_acceptable
3126                 (item, (const uint8_t *)mask,
3127                  (const uint8_t *)&rte_flow_item_nvgre_mask,
3128                  sizeof(struct rte_flow_item_nvgre),
3129                  MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
3130         if (ret < 0)
3131                 return ret;
3132         return 0;
3133 }
3134
3135 /**
3136  * Validate eCPRI item.
3137  *
3138  * @param[in] item
3139  *   Item specification.
3140  * @param[in] item_flags
3141  *   Bit-fields that holds the items detected until now.
3142  * @param[in] last_item
3143  *   Previous validated item in the pattern items.
3144  * @param[in] ether_type
3145  *   Type in the ethernet layer header (including dot1q).
3146  * @param[in] acc_mask
3147  *   Acceptable mask, if NULL default internal default mask
3148  *   will be used to check whether item fields are supported.
3149  * @param[out] error
3150  *   Pointer to error structure.
3151  *
3152  * @return
3153  *   0 on success, a negative errno value otherwise and rte_errno is set.
3154  */
3155 int
3156 mlx5_flow_validate_item_ecpri(const struct rte_flow_item *item,
3157                               uint64_t item_flags,
3158                               uint64_t last_item,
3159                               uint16_t ether_type,
3160                               const struct rte_flow_item_ecpri *acc_mask,
3161                               struct rte_flow_error *error)
3162 {
3163         const struct rte_flow_item_ecpri *mask = item->mask;
3164         const struct rte_flow_item_ecpri nic_mask = {
3165                 .hdr = {
3166                         .common = {
3167                                 .u32 =
3168                                 RTE_BE32(((const struct rte_ecpri_common_hdr) {
3169                                         .type = 0xFF,
3170                                         }).u32),
3171                         },
3172                         .dummy[0] = 0xFFFFFFFF,
3173                 },
3174         };
3175         const uint64_t outer_l2_vlan = (MLX5_FLOW_LAYER_OUTER_L2 |
3176                                         MLX5_FLOW_LAYER_OUTER_VLAN);
3177         struct rte_flow_item_ecpri mask_lo;
3178
3179         if (!(last_item & outer_l2_vlan) &&
3180             last_item != MLX5_FLOW_LAYER_OUTER_L4_UDP)
3181                 return rte_flow_error_set(error, EINVAL,
3182                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3183                                           "eCPRI can only follow L2/VLAN layer or UDP layer");
3184         if ((last_item & outer_l2_vlan) && ether_type &&
3185             ether_type != RTE_ETHER_TYPE_ECPRI)
3186                 return rte_flow_error_set(error, EINVAL,
3187                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3188                                           "eCPRI cannot follow L2/VLAN layer which ether type is not 0xAEFE");
3189         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
3190                 return rte_flow_error_set(error, EINVAL,
3191                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3192                                           "eCPRI with tunnel is not supported right now");
3193         if (item_flags & MLX5_FLOW_LAYER_OUTER_L3)
3194                 return rte_flow_error_set(error, ENOTSUP,
3195                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3196                                           "multiple L3 layers not supported");
3197         else if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_TCP)
3198                 return rte_flow_error_set(error, EINVAL,
3199                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3200                                           "eCPRI cannot coexist with a TCP layer");
3201         /* In specification, eCPRI could be over UDP layer. */
3202         else if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)
3203                 return rte_flow_error_set(error, EINVAL,
3204                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3205                                           "eCPRI over UDP layer is not yet supported right now");
3206         /* Mask for type field in common header could be zero. */
3207         if (!mask)
3208                 mask = &rte_flow_item_ecpri_mask;
3209         mask_lo.hdr.common.u32 = rte_be_to_cpu_32(mask->hdr.common.u32);
3210         /* Input mask is in big-endian format. */
3211         if (mask_lo.hdr.common.type != 0 && mask_lo.hdr.common.type != 0xff)
3212                 return rte_flow_error_set(error, EINVAL,
3213                                           RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
3214                                           "partial mask is not supported for protocol");
3215         else if (mask_lo.hdr.common.type == 0 && mask->hdr.dummy[0] != 0)
3216                 return rte_flow_error_set(error, EINVAL,
3217                                           RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
3218                                           "message header mask must be after a type mask");
3219         return mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
3220                                          acc_mask ? (const uint8_t *)acc_mask
3221                                                   : (const uint8_t *)&nic_mask,
3222                                          sizeof(struct rte_flow_item_ecpri),
3223                                          MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
3224 }
3225
3226 static int
3227 flow_null_validate(struct rte_eth_dev *dev __rte_unused,
3228                    const struct rte_flow_attr *attr __rte_unused,
3229                    const struct rte_flow_item items[] __rte_unused,
3230                    const struct rte_flow_action actions[] __rte_unused,
3231                    bool external __rte_unused,
3232                    int hairpin __rte_unused,
3233                    struct rte_flow_error *error)
3234 {
3235         return rte_flow_error_set(error, ENOTSUP,
3236                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
3237 }
3238
3239 static struct mlx5_flow *
3240 flow_null_prepare(struct rte_eth_dev *dev __rte_unused,
3241                   const struct rte_flow_attr *attr __rte_unused,
3242                   const struct rte_flow_item items[] __rte_unused,
3243                   const struct rte_flow_action actions[] __rte_unused,
3244                   struct rte_flow_error *error)
3245 {
3246         rte_flow_error_set(error, ENOTSUP,
3247                            RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
3248         return NULL;
3249 }
3250
3251 static int
3252 flow_null_translate(struct rte_eth_dev *dev __rte_unused,
3253                     struct mlx5_flow *dev_flow __rte_unused,
3254                     const struct rte_flow_attr *attr __rte_unused,
3255                     const struct rte_flow_item items[] __rte_unused,
3256                     const struct rte_flow_action actions[] __rte_unused,
3257                     struct rte_flow_error *error)
3258 {
3259         return rte_flow_error_set(error, ENOTSUP,
3260                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
3261 }
3262
3263 static int
3264 flow_null_apply(struct rte_eth_dev *dev __rte_unused,
3265                 struct rte_flow *flow __rte_unused,
3266                 struct rte_flow_error *error)
3267 {
3268         return rte_flow_error_set(error, ENOTSUP,
3269                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
3270 }
3271
3272 static void
3273 flow_null_remove(struct rte_eth_dev *dev __rte_unused,
3274                  struct rte_flow *flow __rte_unused)
3275 {
3276 }
3277
3278 static void
3279 flow_null_destroy(struct rte_eth_dev *dev __rte_unused,
3280                   struct rte_flow *flow __rte_unused)
3281 {
3282 }
3283
3284 static int
3285 flow_null_query(struct rte_eth_dev *dev __rte_unused,
3286                 struct rte_flow *flow __rte_unused,
3287                 const struct rte_flow_action *actions __rte_unused,
3288                 void *data __rte_unused,
3289                 struct rte_flow_error *error)
3290 {
3291         return rte_flow_error_set(error, ENOTSUP,
3292                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
3293 }
3294
3295 static int
3296 flow_null_sync_domain(struct rte_eth_dev *dev __rte_unused,
3297                       uint32_t domains __rte_unused,
3298                       uint32_t flags __rte_unused)
3299 {
3300         return 0;
3301 }
3302
3303 /* Void driver to protect from null pointer reference. */
3304 const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops = {
3305         .validate = flow_null_validate,
3306         .prepare = flow_null_prepare,
3307         .translate = flow_null_translate,
3308         .apply = flow_null_apply,
3309         .remove = flow_null_remove,
3310         .destroy = flow_null_destroy,
3311         .query = flow_null_query,
3312         .sync_domain = flow_null_sync_domain,
3313 };
3314
3315 /**
3316  * Select flow driver type according to flow attributes and device
3317  * configuration.
3318  *
3319  * @param[in] dev
3320  *   Pointer to the dev structure.
3321  * @param[in] attr
3322  *   Pointer to the flow attributes.
3323  *
3324  * @return
3325  *   flow driver type, MLX5_FLOW_TYPE_MAX otherwise.
3326  */
3327 static enum mlx5_flow_drv_type
3328 flow_get_drv_type(struct rte_eth_dev *dev, const struct rte_flow_attr *attr)
3329 {
3330         struct mlx5_priv *priv = dev->data->dev_private;
3331         /* The OS can determine first a specific flow type (DV, VERBS) */
3332         enum mlx5_flow_drv_type type = mlx5_flow_os_get_type();
3333
3334         if (type != MLX5_FLOW_TYPE_MAX)
3335                 return type;
3336         /* If no OS specific type - continue with DV/VERBS selection */
3337         if (attr->transfer && priv->config.dv_esw_en)
3338                 type = MLX5_FLOW_TYPE_DV;
3339         if (!attr->transfer)
3340                 type = priv->config.dv_flow_en ? MLX5_FLOW_TYPE_DV :
3341                                                  MLX5_FLOW_TYPE_VERBS;
3342         return type;
3343 }
3344
3345 #define flow_get_drv_ops(type) flow_drv_ops[type]
3346
3347 /**
3348  * Flow driver validation API. This abstracts calling driver specific functions.
3349  * The type of flow driver is determined according to flow attributes.
3350  *
3351  * @param[in] dev
3352  *   Pointer to the dev structure.
3353  * @param[in] attr
3354  *   Pointer to the flow attributes.
3355  * @param[in] items
3356  *   Pointer to the list of items.
3357  * @param[in] actions
3358  *   Pointer to the list of actions.
3359  * @param[in] external
3360  *   This flow rule is created by request external to PMD.
3361  * @param[in] hairpin
3362  *   Number of hairpin TX actions, 0 means classic flow.
3363  * @param[out] error
3364  *   Pointer to the error structure.
3365  *
3366  * @return
3367  *   0 on success, a negative errno value otherwise and rte_errno is set.
3368  */
3369 static inline int
3370 flow_drv_validate(struct rte_eth_dev *dev,
3371                   const struct rte_flow_attr *attr,
3372                   const struct rte_flow_item items[],
3373                   const struct rte_flow_action actions[],
3374                   bool external, int hairpin, struct rte_flow_error *error)
3375 {
3376         const struct mlx5_flow_driver_ops *fops;
3377         enum mlx5_flow_drv_type type = flow_get_drv_type(dev, attr);
3378
3379         fops = flow_get_drv_ops(type);
3380         return fops->validate(dev, attr, items, actions, external,
3381                               hairpin, error);
3382 }
3383
3384 /**
3385  * Flow driver preparation API. This abstracts calling driver specific
3386  * functions. Parent flow (rte_flow) should have driver type (drv_type). It
3387  * calculates the size of memory required for device flow, allocates the memory,
3388  * initializes the device flow and returns the pointer.
3389  *
3390  * @note
3391  *   This function initializes device flow structure such as dv or verbs in
3392  *   struct mlx5_flow. However, it is caller's responsibility to initialize the
3393  *   rest. For example, adding returning device flow to flow->dev_flow list and
3394  *   setting backward reference to the flow should be done out of this function.
3395  *   layers field is not filled either.
3396  *
3397  * @param[in] dev
3398  *   Pointer to the dev structure.
3399  * @param[in] attr
3400  *   Pointer to the flow attributes.
3401  * @param[in] items
3402  *   Pointer to the list of items.
3403  * @param[in] actions
3404  *   Pointer to the list of actions.
3405  * @param[in] flow_idx
3406  *   This memory pool index to the flow.
3407  * @param[out] error
3408  *   Pointer to the error structure.
3409  *
3410  * @return
3411  *   Pointer to device flow on success, otherwise NULL and rte_errno is set.
3412  */
3413 static inline struct mlx5_flow *
3414 flow_drv_prepare(struct rte_eth_dev *dev,
3415                  const struct rte_flow *flow,
3416                  const struct rte_flow_attr *attr,
3417                  const struct rte_flow_item items[],
3418                  const struct rte_flow_action actions[],
3419                  uint32_t flow_idx,
3420                  struct rte_flow_error *error)
3421 {
3422         const struct mlx5_flow_driver_ops *fops;
3423         enum mlx5_flow_drv_type type = flow->drv_type;
3424         struct mlx5_flow *mlx5_flow = NULL;
3425
3426         MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
3427         fops = flow_get_drv_ops(type);
3428         mlx5_flow = fops->prepare(dev, attr, items, actions, error);
3429         if (mlx5_flow)
3430                 mlx5_flow->flow_idx = flow_idx;
3431         return mlx5_flow;
3432 }
3433
3434 /**
3435  * Flow driver translation API. This abstracts calling driver specific
3436  * functions. Parent flow (rte_flow) should have driver type (drv_type). It
3437  * translates a generic flow into a driver flow. flow_drv_prepare() must
3438  * precede.
3439  *
3440  * @note
3441  *   dev_flow->layers could be filled as a result of parsing during translation
3442  *   if needed by flow_drv_apply(). dev_flow->flow->actions can also be filled
3443  *   if necessary. As a flow can have multiple dev_flows by RSS flow expansion,
3444  *   flow->actions could be overwritten even though all the expanded dev_flows
3445  *   have the same actions.
3446  *
3447  * @param[in] dev
3448  *   Pointer to the rte dev structure.
3449  * @param[in, out] dev_flow
3450  *   Pointer to the mlx5 flow.
3451  * @param[in] attr
3452  *   Pointer to the flow attributes.
3453  * @param[in] items
3454  *   Pointer to the list of items.
3455  * @param[in] actions
3456  *   Pointer to the list of actions.
3457  * @param[out] error
3458  *   Pointer to the error structure.
3459  *
3460  * @return
3461  *   0 on success, a negative errno value otherwise and rte_errno is set.
3462  */
3463 static inline int
3464 flow_drv_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
3465                    const struct rte_flow_attr *attr,
3466                    const struct rte_flow_item items[],
3467                    const struct rte_flow_action actions[],
3468                    struct rte_flow_error *error)
3469 {
3470         const struct mlx5_flow_driver_ops *fops;
3471         enum mlx5_flow_drv_type type = dev_flow->flow->drv_type;
3472
3473         MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
3474         fops = flow_get_drv_ops(type);
3475         return fops->translate(dev, dev_flow, attr, items, actions, error);
3476 }
3477
3478 /**
3479  * Flow driver apply API. This abstracts calling driver specific functions.
3480  * Parent flow (rte_flow) should have driver type (drv_type). It applies
3481  * translated driver flows on to device. flow_drv_translate() must precede.
3482  *
3483  * @param[in] dev
3484  *   Pointer to Ethernet device structure.
3485  * @param[in, out] flow
3486  *   Pointer to flow structure.
3487  * @param[out] error
3488  *   Pointer to error structure.
3489  *
3490  * @return
3491  *   0 on success, a negative errno value otherwise and rte_errno is set.
3492  */
3493 static inline int
3494 flow_drv_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
3495                struct rte_flow_error *error)
3496 {
3497         const struct mlx5_flow_driver_ops *fops;
3498         enum mlx5_flow_drv_type type = flow->drv_type;
3499
3500         MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
3501         fops = flow_get_drv_ops(type);
3502         return fops->apply(dev, flow, error);
3503 }
3504
3505 /**
3506  * Flow driver destroy API. This abstracts calling driver specific functions.
3507  * Parent flow (rte_flow) should have driver type (drv_type). It removes a flow
3508  * on device and releases resources of the flow.
3509  *
3510  * @param[in] dev
3511  *   Pointer to Ethernet device.
3512  * @param[in, out] flow
3513  *   Pointer to flow structure.
3514  */
3515 static inline void
3516 flow_drv_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
3517 {
3518         const struct mlx5_flow_driver_ops *fops;
3519         enum mlx5_flow_drv_type type = flow->drv_type;
3520
3521         MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
3522         fops = flow_get_drv_ops(type);
3523         fops->destroy(dev, flow);
3524 }
3525
3526 /**
3527  * Flow driver find RSS policy tbl API. This abstracts calling driver
3528  * specific functions. Parent flow (rte_flow) should have driver
3529  * type (drv_type). It will find the RSS policy table that has the rss_desc.
3530  *
3531  * @param[in] dev
3532  *   Pointer to Ethernet device.
3533  * @param[in, out] flow
3534  *   Pointer to flow structure.
3535  * @param[in] policy
3536  *   Pointer to meter policy table.
3537  * @param[in] rss_desc
3538  *   Pointer to rss_desc
3539  */
3540 static struct mlx5_flow_meter_sub_policy *
3541 flow_drv_meter_sub_policy_rss_prepare(struct rte_eth_dev *dev,
3542                 struct rte_flow *flow,
3543                 struct mlx5_flow_meter_policy *policy,
3544                 struct mlx5_flow_rss_desc *rss_desc[MLX5_MTR_RTE_COLORS])
3545 {
3546         const struct mlx5_flow_driver_ops *fops;
3547         enum mlx5_flow_drv_type type = flow->drv_type;
3548
3549         MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
3550         fops = flow_get_drv_ops(type);
3551         return fops->meter_sub_policy_rss_prepare(dev, policy, rss_desc);
3552 }
3553
3554 /**
3555  * Flow driver color tag rule API. This abstracts calling driver
3556  * specific functions. Parent flow (rte_flow) should have driver
3557  * type (drv_type). It will create the color tag rules in hierarchy meter.
3558  *
3559  * @param[in] dev
3560  *   Pointer to Ethernet device.
3561  * @param[in, out] flow
3562  *   Pointer to flow structure.
3563  * @param[in] fm
3564  *   Pointer to flow meter structure.
3565  * @param[in] src_port
3566  *   The src port this extra rule should use.
3567  * @param[in] item
3568  *   The src port id match item.
3569  * @param[out] error
3570  *   Pointer to error structure.
3571  */
3572 static int
3573 flow_drv_mtr_hierarchy_rule_create(struct rte_eth_dev *dev,
3574                 struct rte_flow *flow,
3575                 struct mlx5_flow_meter_info *fm,
3576                 int32_t src_port,
3577                 const struct rte_flow_item *item,
3578                 struct rte_flow_error *error)
3579 {
3580         const struct mlx5_flow_driver_ops *fops;
3581         enum mlx5_flow_drv_type type = flow->drv_type;
3582
3583         MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
3584         fops = flow_get_drv_ops(type);
3585         return fops->meter_hierarchy_rule_create(dev, fm,
3586                                                 src_port, item, error);
3587 }
3588
3589 /**
3590  * Get RSS action from the action list.
3591  *
3592  * @param[in] dev
3593  *   Pointer to Ethernet device.
3594  * @param[in] actions
3595  *   Pointer to the list of actions.
3596  * @param[in] flow
3597  *   Parent flow structure pointer.
3598  *
3599  * @return
3600  *   Pointer to the RSS action if exist, else return NULL.
3601  */
3602 static const struct rte_flow_action_rss*
3603 flow_get_rss_action(struct rte_eth_dev *dev,
3604                     const struct rte_flow_action actions[])
3605 {
3606         struct mlx5_priv *priv = dev->data->dev_private;
3607         const struct rte_flow_action_rss *rss = NULL;
3608         struct mlx5_meter_policy_action_container *acg;
3609         struct mlx5_meter_policy_action_container *acy;
3610
3611         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
3612                 switch (actions->type) {
3613                 case RTE_FLOW_ACTION_TYPE_RSS:
3614                         rss = actions->conf;
3615                         break;
3616                 case RTE_FLOW_ACTION_TYPE_SAMPLE:
3617                 {
3618                         const struct rte_flow_action_sample *sample =
3619                                                                 actions->conf;
3620                         const struct rte_flow_action *act = sample->actions;
3621                         for (; act->type != RTE_FLOW_ACTION_TYPE_END; act++)
3622                                 if (act->type == RTE_FLOW_ACTION_TYPE_RSS)
3623                                         rss = act->conf;
3624                         break;
3625                 }
3626                 case RTE_FLOW_ACTION_TYPE_METER:
3627                 {
3628                         uint32_t mtr_idx;
3629                         struct mlx5_flow_meter_info *fm;
3630                         struct mlx5_flow_meter_policy *policy;
3631                         const struct rte_flow_action_meter *mtr = actions->conf;
3632
3633                         fm = mlx5_flow_meter_find(priv, mtr->mtr_id, &mtr_idx);
3634                         if (fm && !fm->def_policy) {
3635                                 policy = mlx5_flow_meter_policy_find(dev,
3636                                                 fm->policy_id, NULL);
3637                                 MLX5_ASSERT(policy);
3638                                 if (policy->is_hierarchy) {
3639                                         policy =
3640                                 mlx5_flow_meter_hierarchy_get_final_policy(dev,
3641                                                                         policy);
3642                                         if (!policy)
3643                                                 return NULL;
3644                                 }
3645                                 if (policy->is_rss) {
3646                                         acg =
3647                                         &policy->act_cnt[RTE_COLOR_GREEN];
3648                                         acy =
3649                                         &policy->act_cnt[RTE_COLOR_YELLOW];
3650                                         if (acg->fate_action ==
3651                                             MLX5_FLOW_FATE_SHARED_RSS)
3652                                                 rss = acg->rss->conf;
3653                                         else if (acy->fate_action ==
3654                                                  MLX5_FLOW_FATE_SHARED_RSS)
3655                                                 rss = acy->rss->conf;
3656                                 }
3657                         }
3658                         break;
3659                 }
3660                 default:
3661                         break;
3662                 }
3663         }
3664         return rss;
3665 }
3666
3667 /**
3668  * Get ASO age action by index.
3669  *
3670  * @param[in] dev
3671  *   Pointer to the Ethernet device structure.
3672  * @param[in] age_idx
3673  *   Index to the ASO age action.
3674  *
3675  * @return
3676  *   The specified ASO age action.
3677  */
3678 struct mlx5_aso_age_action*
3679 flow_aso_age_get_by_idx(struct rte_eth_dev *dev, uint32_t age_idx)
3680 {
3681         uint16_t pool_idx = age_idx & UINT16_MAX;
3682         uint16_t offset = (age_idx >> 16) & UINT16_MAX;
3683         struct mlx5_priv *priv = dev->data->dev_private;
3684         struct mlx5_aso_age_mng *mng = priv->sh->aso_age_mng;
3685         struct mlx5_aso_age_pool *pool;
3686
3687         rte_rwlock_read_lock(&mng->resize_rwl);
3688         pool = mng->pools[pool_idx];
3689         rte_rwlock_read_unlock(&mng->resize_rwl);
3690         return &pool->actions[offset - 1];
3691 }
3692
3693 /* maps indirect action to translated direct in some actions array */
3694 struct mlx5_translated_action_handle {
3695         struct rte_flow_action_handle *action; /**< Indirect action handle. */
3696         int index; /**< Index in related array of rte_flow_action. */
3697 };
3698
3699 /**
3700  * Translates actions of type RTE_FLOW_ACTION_TYPE_INDIRECT to related
3701  * direct action if translation possible.
3702  * This functionality used to run same execution path for both direct and
3703  * indirect actions on flow create. All necessary preparations for indirect
3704  * action handling should be performed on *handle* actions list returned
3705  * from this call.
3706  *
3707  * @param[in] dev
3708  *   Pointer to Ethernet device.
3709  * @param[in] actions
3710  *   List of actions to translate.
3711  * @param[out] handle
3712  *   List to store translated indirect action object handles.
3713  * @param[in, out] indir_n
3714  *   Size of *handle* array. On return should be updated with number of
3715  *   indirect actions retrieved from the *actions* list.
3716  * @param[out] translated_actions
3717  *   List of actions where all indirect actions were translated to direct
3718  *   if possible. NULL if no translation took place.
3719  * @param[out] error
3720  *   Pointer to the error structure.
3721  *
3722  * @return
3723  *   0 on success, a negative errno value otherwise and rte_errno is set.
3724  */
3725 static int
3726 flow_action_handles_translate(struct rte_eth_dev *dev,
3727                               const struct rte_flow_action actions[],
3728                               struct mlx5_translated_action_handle *handle,
3729                               int *indir_n,
3730                               struct rte_flow_action **translated_actions,
3731                               struct rte_flow_error *error)
3732 {
3733         struct mlx5_priv *priv = dev->data->dev_private;
3734         struct rte_flow_action *translated = NULL;
3735         size_t actions_size;
3736         int n;
3737         int copied_n = 0;
3738         struct mlx5_translated_action_handle *handle_end = NULL;
3739
3740         for (n = 0; actions[n].type != RTE_FLOW_ACTION_TYPE_END; n++) {
3741                 if (actions[n].type != RTE_FLOW_ACTION_TYPE_INDIRECT)
3742                         continue;
3743                 if (copied_n == *indir_n) {
3744                         return rte_flow_error_set
3745                                 (error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION_NUM,
3746                                  NULL, "too many shared actions");
3747                 }
3748                 rte_memcpy(&handle[copied_n].action, &actions[n].conf,
3749                            sizeof(actions[n].conf));
3750                 handle[copied_n].index = n;
3751                 copied_n++;
3752         }
3753         n++;
3754         *indir_n = copied_n;
3755         if (!copied_n)
3756                 return 0;
3757         actions_size = sizeof(struct rte_flow_action) * n;
3758         translated = mlx5_malloc(MLX5_MEM_ZERO, actions_size, 0, SOCKET_ID_ANY);
3759         if (!translated) {
3760                 rte_errno = ENOMEM;
3761                 return -ENOMEM;
3762         }
3763         memcpy(translated, actions, actions_size);
3764         for (handle_end = handle + copied_n; handle < handle_end; handle++) {
3765                 struct mlx5_shared_action_rss *shared_rss;
3766                 uint32_t act_idx = (uint32_t)(uintptr_t)handle->action;
3767                 uint32_t type = act_idx >> MLX5_INDIRECT_ACTION_TYPE_OFFSET;
3768                 uint32_t idx = act_idx &
3769                                ((1u << MLX5_INDIRECT_ACTION_TYPE_OFFSET) - 1);
3770
3771                 switch (type) {
3772                 case MLX5_INDIRECT_ACTION_TYPE_RSS:
3773                         shared_rss = mlx5_ipool_get
3774                           (priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS], idx);
3775                         translated[handle->index].type =
3776                                 RTE_FLOW_ACTION_TYPE_RSS;
3777                         translated[handle->index].conf =
3778                                 &shared_rss->origin;
3779                         break;
3780                 case MLX5_INDIRECT_ACTION_TYPE_COUNT:
3781                         translated[handle->index].type =
3782                                                 (enum rte_flow_action_type)
3783                                                 MLX5_RTE_FLOW_ACTION_TYPE_COUNT;
3784                         translated[handle->index].conf = (void *)(uintptr_t)idx;
3785                         break;
3786                 case MLX5_INDIRECT_ACTION_TYPE_AGE:
3787                         if (priv->sh->flow_hit_aso_en) {
3788                                 translated[handle->index].type =
3789                                         (enum rte_flow_action_type)
3790                                         MLX5_RTE_FLOW_ACTION_TYPE_AGE;
3791                                 translated[handle->index].conf =
3792                                                          (void *)(uintptr_t)idx;
3793                                 break;
3794                         }
3795                         /* Fall-through */
3796                 case MLX5_INDIRECT_ACTION_TYPE_CT:
3797                         if (priv->sh->ct_aso_en) {
3798                                 translated[handle->index].type =
3799                                         RTE_FLOW_ACTION_TYPE_CONNTRACK;
3800                                 translated[handle->index].conf =
3801                                                          (void *)(uintptr_t)idx;
3802                                 break;
3803                         }
3804                         /* Fall-through */
3805                 default:
3806                         mlx5_free(translated);
3807                         return rte_flow_error_set
3808                                 (error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION,
3809                                  NULL, "invalid indirect action type");
3810                 }
3811         }
3812         *translated_actions = translated;
3813         return 0;
3814 }
3815
3816 /**
3817  * Get Shared RSS action from the action list.
3818  *
3819  * @param[in] dev
3820  *   Pointer to Ethernet device.
3821  * @param[in] shared
3822  *   Pointer to the list of actions.
3823  * @param[in] shared_n
3824  *   Actions list length.
3825  *
3826  * @return
3827  *   The MLX5 RSS action ID if exists, otherwise return 0.
3828  */
3829 static uint32_t
3830 flow_get_shared_rss_action(struct rte_eth_dev *dev,
3831                            struct mlx5_translated_action_handle *handle,
3832                            int shared_n)
3833 {
3834         struct mlx5_translated_action_handle *handle_end;
3835         struct mlx5_priv *priv = dev->data->dev_private;
3836         struct mlx5_shared_action_rss *shared_rss;
3837
3838
3839         for (handle_end = handle + shared_n; handle < handle_end; handle++) {
3840                 uint32_t act_idx = (uint32_t)(uintptr_t)handle->action;
3841                 uint32_t type = act_idx >> MLX5_INDIRECT_ACTION_TYPE_OFFSET;
3842                 uint32_t idx = act_idx &
3843                                ((1u << MLX5_INDIRECT_ACTION_TYPE_OFFSET) - 1);
3844                 switch (type) {
3845                 case MLX5_INDIRECT_ACTION_TYPE_RSS:
3846                         shared_rss = mlx5_ipool_get
3847                                 (priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS],
3848                                                                            idx);
3849                         __atomic_add_fetch(&shared_rss->refcnt, 1,
3850                                            __ATOMIC_RELAXED);
3851                         return idx;
3852                 default:
3853                         break;
3854                 }
3855         }
3856         return 0;
3857 }
3858
3859 static unsigned int
3860 find_graph_root(uint32_t rss_level)
3861 {
3862         return rss_level < 2 ? MLX5_EXPANSION_ROOT :
3863                                MLX5_EXPANSION_ROOT_OUTER;
3864 }
3865
3866 /**
3867  *  Get layer flags from the prefix flow.
3868  *
3869  *  Some flows may be split to several subflows, the prefix subflow gets the
3870  *  match items and the suffix sub flow gets the actions.
3871  *  Some actions need the user defined match item flags to get the detail for
3872  *  the action.
3873  *  This function helps the suffix flow to get the item layer flags from prefix
3874  *  subflow.
3875  *
3876  * @param[in] dev_flow
3877  *   Pointer the created preifx subflow.
3878  *
3879  * @return
3880  *   The layers get from prefix subflow.
3881  */
3882 static inline uint64_t
3883 flow_get_prefix_layer_flags(struct mlx5_flow *dev_flow)
3884 {
3885         uint64_t layers = 0;
3886
3887         /*
3888          * Layers bits could be localization, but usually the compiler will
3889          * help to do the optimization work for source code.
3890          * If no decap actions, use the layers directly.
3891          */
3892         if (!(dev_flow->act_flags & MLX5_FLOW_ACTION_DECAP))
3893                 return dev_flow->handle->layers;
3894         /* Convert L3 layers with decap action. */
3895         if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L3_IPV4)
3896                 layers |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
3897         else if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L3_IPV6)
3898                 layers |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
3899         /* Convert L4 layers with decap action.  */
3900         if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L4_TCP)
3901                 layers |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
3902         else if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L4_UDP)
3903                 layers |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
3904         return layers;
3905 }
3906
3907 /**
3908  * Get metadata split action information.
3909  *
3910  * @param[in] actions
3911  *   Pointer to the list of actions.
3912  * @param[out] qrss
3913  *   Pointer to the return pointer.
3914  * @param[out] qrss_type
3915  *   Pointer to the action type to return. RTE_FLOW_ACTION_TYPE_END is returned
3916  *   if no QUEUE/RSS is found.
3917  * @param[out] encap_idx
3918  *   Pointer to the index of the encap action if exists, otherwise the last
3919  *   action index.
3920  *
3921  * @return
3922  *   Total number of actions.
3923  */
3924 static int
3925 flow_parse_metadata_split_actions_info(const struct rte_flow_action actions[],
3926                                        const struct rte_flow_action **qrss,
3927                                        int *encap_idx)
3928 {
3929         const struct rte_flow_action_raw_encap *raw_encap;
3930         int actions_n = 0;
3931         int raw_decap_idx = -1;
3932
3933         *encap_idx = -1;
3934         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
3935                 switch (actions->type) {
3936                 case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
3937                 case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
3938                         *encap_idx = actions_n;
3939                         break;
3940                 case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
3941                         raw_decap_idx = actions_n;
3942                         break;
3943                 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
3944                         raw_encap = actions->conf;
3945                         if (raw_encap->size > MLX5_ENCAPSULATION_DECISION_SIZE)
3946                                 *encap_idx = raw_decap_idx != -1 ?
3947                                                       raw_decap_idx : actions_n;
3948                         break;
3949                 case RTE_FLOW_ACTION_TYPE_QUEUE:
3950                 case RTE_FLOW_ACTION_TYPE_RSS:
3951                         *qrss = actions;
3952                         break;
3953                 default:
3954                         break;
3955                 }
3956                 actions_n++;
3957         }
3958         if (*encap_idx == -1)
3959                 *encap_idx = actions_n;
3960         /* Count RTE_FLOW_ACTION_TYPE_END. */
3961         return actions_n + 1;
3962 }
3963
3964 /**
3965  * Check if the action will change packet.
3966  *
3967  * @param dev
3968  *   Pointer to Ethernet device.
3969  * @param[in] type
3970  *   action type.
3971  *
3972  * @return
3973  *   true if action will change packet, false otherwise.
3974  */
3975 static bool flow_check_modify_action_type(struct rte_eth_dev *dev,
3976                                           enum rte_flow_action_type type)
3977 {
3978         struct mlx5_priv *priv = dev->data->dev_private;
3979
3980         switch (type) {
3981         case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
3982         case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
3983         case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
3984         case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
3985         case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
3986         case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
3987         case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
3988         case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
3989         case RTE_FLOW_ACTION_TYPE_DEC_TTL:
3990         case RTE_FLOW_ACTION_TYPE_SET_TTL:
3991         case RTE_FLOW_ACTION_TYPE_INC_TCP_SEQ:
3992         case RTE_FLOW_ACTION_TYPE_DEC_TCP_SEQ:
3993         case RTE_FLOW_ACTION_TYPE_INC_TCP_ACK:
3994         case RTE_FLOW_ACTION_TYPE_DEC_TCP_ACK:
3995         case RTE_FLOW_ACTION_TYPE_SET_IPV4_DSCP:
3996         case RTE_FLOW_ACTION_TYPE_SET_IPV6_DSCP:
3997         case RTE_FLOW_ACTION_TYPE_SET_META:
3998         case RTE_FLOW_ACTION_TYPE_SET_TAG:
3999         case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
4000         case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
4001         case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
4002         case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
4003         case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
4004         case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP:
4005         case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
4006         case RTE_FLOW_ACTION_TYPE_NVGRE_DECAP:
4007         case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
4008         case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
4009         case RTE_FLOW_ACTION_TYPE_MODIFY_FIELD:
4010                 return true;
4011         case RTE_FLOW_ACTION_TYPE_FLAG:
4012         case RTE_FLOW_ACTION_TYPE_MARK:
4013                 if (priv->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY)
4014                         return true;
4015                 else
4016                         return false;
4017         default:
4018                 return false;
4019         }
4020 }
4021
4022 /**
4023  * Check meter action from the action list.
4024  *
4025  * @param dev
4026  *   Pointer to Ethernet device.
4027  * @param[in] actions
4028  *   Pointer to the list of actions.
4029  * @param[out] has_mtr
4030  *   Pointer to the meter exist flag.
4031  * @param[out] has_modify
4032  *   Pointer to the flag showing there's packet change action.
4033  * @param[out] meter_id
4034  *   Pointer to the meter id.
4035  *
4036  * @return
4037  *   Total number of actions.
4038  */
4039 static int
4040 flow_check_meter_action(struct rte_eth_dev *dev,
4041                         const struct rte_flow_action actions[],
4042                         bool *has_mtr, bool *has_modify, uint32_t *meter_id)
4043 {
4044         const struct rte_flow_action_meter *mtr = NULL;
4045         int actions_n = 0;
4046
4047         MLX5_ASSERT(has_mtr);
4048         *has_mtr = false;
4049         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
4050                 switch (actions->type) {
4051                 case RTE_FLOW_ACTION_TYPE_METER:
4052                         mtr = actions->conf;
4053                         *meter_id = mtr->mtr_id;
4054                         *has_mtr = true;
4055                         break;
4056                 default:
4057                         break;
4058                 }
4059                 if (!*has_mtr)
4060                         *has_modify |= flow_check_modify_action_type(dev,
4061                                                                 actions->type);
4062                 actions_n++;
4063         }
4064         /* Count RTE_FLOW_ACTION_TYPE_END. */
4065         return actions_n + 1;
4066 }
4067
4068 /**
4069  * Check if the flow should be split due to hairpin.
4070  * The reason for the split is that in current HW we can't
4071  * support encap and push-vlan on Rx, so if a flow contains
4072  * these actions we move it to Tx.
4073  *
4074  * @param dev
4075  *   Pointer to Ethernet device.
4076  * @param[in] attr
4077  *   Flow rule attributes.
4078  * @param[in] actions
4079  *   Associated actions (list terminated by the END action).
4080  *
4081  * @return
4082  *   > 0 the number of actions and the flow should be split,
4083  *   0 when no split required.
4084  */
4085 static int
4086 flow_check_hairpin_split(struct rte_eth_dev *dev,
4087                          const struct rte_flow_attr *attr,
4088                          const struct rte_flow_action actions[])
4089 {
4090         int queue_action = 0;
4091         int action_n = 0;
4092         int split = 0;
4093         const struct rte_flow_action_queue *queue;
4094         const struct rte_flow_action_rss *rss;
4095         const struct rte_flow_action_raw_encap *raw_encap;
4096         const struct rte_eth_hairpin_conf *conf;
4097
4098         if (!attr->ingress)
4099                 return 0;
4100         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
4101                 switch (actions->type) {
4102                 case RTE_FLOW_ACTION_TYPE_QUEUE:
4103                         queue = actions->conf;
4104                         if (queue == NULL)
4105                                 return 0;
4106                         conf = mlx5_rxq_get_hairpin_conf(dev, queue->index);
4107                         if (conf == NULL || conf->tx_explicit != 0)
4108                                 return 0;
4109                         queue_action = 1;
4110                         action_n++;
4111                         break;
4112                 case RTE_FLOW_ACTION_TYPE_RSS:
4113                         rss = actions->conf;
4114                         if (rss == NULL || rss->queue_num == 0)
4115                                 return 0;
4116                         conf = mlx5_rxq_get_hairpin_conf(dev, rss->queue[0]);
4117                         if (conf == NULL || conf->tx_explicit != 0)
4118                                 return 0;
4119                         queue_action = 1;
4120                         action_n++;
4121                         break;
4122                 case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
4123                 case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
4124                 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
4125                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
4126                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
4127                         split++;
4128                         action_n++;
4129                         break;
4130                 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
4131                         raw_encap = actions->conf;
4132                         if (raw_encap->size > MLX5_ENCAPSULATION_DECISION_SIZE)
4133                                 split++;
4134                         action_n++;
4135                         break;
4136                 default:
4137                         action_n++;
4138                         break;
4139                 }
4140         }
4141         if (split && queue_action)
4142                 return action_n;
4143         return 0;
4144 }
4145
4146 /* Declare flow create/destroy prototype in advance. */
4147 static uint32_t
4148 flow_list_create(struct rte_eth_dev *dev, enum mlx5_flow_type type,
4149                  const struct rte_flow_attr *attr,
4150                  const struct rte_flow_item items[],
4151                  const struct rte_flow_action actions[],
4152                  bool external, struct rte_flow_error *error);
4153
4154 static void
4155 flow_list_destroy(struct rte_eth_dev *dev, enum mlx5_flow_type type,
4156                   uint32_t flow_idx);
4157
4158 int
4159 flow_dv_mreg_match_cb(void *tool_ctx __rte_unused,
4160                       struct mlx5_list_entry *entry, void *cb_ctx)
4161 {
4162         struct mlx5_flow_cb_ctx *ctx = cb_ctx;
4163         struct mlx5_flow_mreg_copy_resource *mcp_res =
4164                                container_of(entry, typeof(*mcp_res), hlist_ent);
4165
4166         return mcp_res->mark_id != *(uint32_t *)(ctx->data);
4167 }
4168
4169 struct mlx5_list_entry *
4170 flow_dv_mreg_create_cb(void *tool_ctx, void *cb_ctx)
4171 {
4172         struct rte_eth_dev *dev = tool_ctx;
4173         struct mlx5_priv *priv = dev->data->dev_private;
4174         struct mlx5_flow_cb_ctx *ctx = cb_ctx;
4175         struct mlx5_flow_mreg_copy_resource *mcp_res;
4176         struct rte_flow_error *error = ctx->error;
4177         uint32_t idx = 0;
4178         int ret;
4179         uint32_t mark_id = *(uint32_t *)(ctx->data);
4180         struct rte_flow_attr attr = {
4181                 .group = MLX5_FLOW_MREG_CP_TABLE_GROUP,
4182                 .ingress = 1,
4183         };
4184         struct mlx5_rte_flow_item_tag tag_spec = {
4185                 .data = mark_id,
4186         };
4187         struct rte_flow_item items[] = {
4188                 [1] = { .type = RTE_FLOW_ITEM_TYPE_END, },
4189         };
4190         struct rte_flow_action_mark ftag = {
4191                 .id = mark_id,
4192         };
4193         struct mlx5_flow_action_copy_mreg cp_mreg = {
4194                 .dst = REG_B,
4195                 .src = REG_NON,
4196         };
4197         struct rte_flow_action_jump jump = {
4198                 .group = MLX5_FLOW_MREG_ACT_TABLE_GROUP,
4199         };
4200         struct rte_flow_action actions[] = {
4201                 [3] = { .type = RTE_FLOW_ACTION_TYPE_END, },
4202         };
4203
4204         /* Fill the register fileds in the flow. */
4205         ret = mlx5_flow_get_reg_id(dev, MLX5_FLOW_MARK, 0, error);
4206         if (ret < 0)
4207                 return NULL;
4208         tag_spec.id = ret;
4209         ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_RX, 0, error);
4210         if (ret < 0)
4211                 return NULL;
4212         cp_mreg.src = ret;
4213         /* Provide the full width of FLAG specific value. */
4214         if (mark_id == (priv->sh->dv_regc0_mask & MLX5_FLOW_MARK_DEFAULT))
4215                 tag_spec.data = MLX5_FLOW_MARK_DEFAULT;
4216         /* Build a new flow. */
4217         if (mark_id != MLX5_DEFAULT_COPY_ID) {
4218                 items[0] = (struct rte_flow_item){
4219                         .type = (enum rte_flow_item_type)
4220                                 MLX5_RTE_FLOW_ITEM_TYPE_TAG,
4221                         .spec = &tag_spec,
4222                 };
4223                 items[1] = (struct rte_flow_item){
4224                         .type = RTE_FLOW_ITEM_TYPE_END,
4225                 };
4226                 actions[0] = (struct rte_flow_action){
4227                         .type = (enum rte_flow_action_type)
4228                                 MLX5_RTE_FLOW_ACTION_TYPE_MARK,
4229                         .conf = &ftag,
4230                 };
4231                 actions[1] = (struct rte_flow_action){
4232                         .type = (enum rte_flow_action_type)
4233                                 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
4234                         .conf = &cp_mreg,
4235                 };
4236                 actions[2] = (struct rte_flow_action){
4237                         .type = RTE_FLOW_ACTION_TYPE_JUMP,
4238                         .conf = &jump,
4239                 };
4240                 actions[3] = (struct rte_flow_action){
4241                         .type = RTE_FLOW_ACTION_TYPE_END,
4242                 };
4243         } else {
4244                 /* Default rule, wildcard match. */
4245                 attr.priority = MLX5_FLOW_LOWEST_PRIO_INDICATOR;
4246                 items[0] = (struct rte_flow_item){
4247                         .type = RTE_FLOW_ITEM_TYPE_END,
4248                 };
4249                 actions[0] = (struct rte_flow_action){
4250                         .type = (enum rte_flow_action_type)
4251                                 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
4252                         .conf = &cp_mreg,
4253                 };
4254                 actions[1] = (struct rte_flow_action){
4255                         .type = RTE_FLOW_ACTION_TYPE_JUMP,
4256                         .conf = &jump,
4257                 };
4258                 actions[2] = (struct rte_flow_action){
4259                         .type = RTE_FLOW_ACTION_TYPE_END,
4260                 };
4261         }
4262         /* Build a new entry. */
4263         mcp_res = mlx5_ipool_zmalloc(priv->sh->ipool[MLX5_IPOOL_MCP], &idx);
4264         if (!mcp_res) {
4265                 rte_errno = ENOMEM;
4266                 return NULL;
4267         }
4268         mcp_res->idx = idx;
4269         mcp_res->mark_id = mark_id;
4270         /*
4271          * The copy Flows are not included in any list. There
4272          * ones are referenced from other Flows and can not
4273          * be applied, removed, deleted in ardbitrary order
4274          * by list traversing.
4275          */
4276         mcp_res->rix_flow = flow_list_create(dev, MLX5_FLOW_TYPE_MCP,
4277                                         &attr, items, actions, false, error);
4278         if (!mcp_res->rix_flow) {
4279                 mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MCP], idx);
4280                 return NULL;
4281         }
4282         return &mcp_res->hlist_ent;
4283 }
4284
4285 struct mlx5_list_entry *
4286 flow_dv_mreg_clone_cb(void *tool_ctx, struct mlx5_list_entry *oentry,
4287                       void *cb_ctx __rte_unused)
4288 {
4289         struct rte_eth_dev *dev = tool_ctx;
4290         struct mlx5_priv *priv = dev->data->dev_private;
4291         struct mlx5_flow_mreg_copy_resource *mcp_res;
4292         uint32_t idx = 0;
4293
4294         mcp_res = mlx5_ipool_malloc(priv->sh->ipool[MLX5_IPOOL_MCP], &idx);
4295         if (!mcp_res) {
4296                 rte_errno = ENOMEM;
4297                 return NULL;
4298         }
4299         memcpy(mcp_res, oentry, sizeof(*mcp_res));
4300         mcp_res->idx = idx;
4301         return &mcp_res->hlist_ent;
4302 }
4303
4304 void
4305 flow_dv_mreg_clone_free_cb(void *tool_ctx, struct mlx5_list_entry *entry)
4306 {
4307         struct mlx5_flow_mreg_copy_resource *mcp_res =
4308                                container_of(entry, typeof(*mcp_res), hlist_ent);
4309         struct rte_eth_dev *dev = tool_ctx;
4310         struct mlx5_priv *priv = dev->data->dev_private;
4311
4312         mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MCP], mcp_res->idx);
4313 }
4314
4315 /**
4316  * Add a flow of copying flow metadata registers in RX_CP_TBL.
4317  *
4318  * As mark_id is unique, if there's already a registered flow for the mark_id,
4319  * return by increasing the reference counter of the resource. Otherwise, create
4320  * the resource (mcp_res) and flow.
4321  *
4322  * Flow looks like,
4323  *   - If ingress port is ANY and reg_c[1] is mark_id,
4324  *     flow_tag := mark_id, reg_b := reg_c[0] and jump to RX_ACT_TBL.
4325  *
4326  * For default flow (zero mark_id), flow is like,
4327  *   - If ingress port is ANY,
4328  *     reg_b := reg_c[0] and jump to RX_ACT_TBL.
4329  *
4330  * @param dev
4331  *   Pointer to Ethernet device.
4332  * @param mark_id
4333  *   ID of MARK action, zero means default flow for META.
4334  * @param[out] error
4335  *   Perform verbose error reporting if not NULL.
4336  *
4337  * @return
4338  *   Associated resource on success, NULL otherwise and rte_errno is set.
4339  */
4340 static struct mlx5_flow_mreg_copy_resource *
4341 flow_mreg_add_copy_action(struct rte_eth_dev *dev, uint32_t mark_id,
4342                           struct rte_flow_error *error)
4343 {
4344         struct mlx5_priv *priv = dev->data->dev_private;
4345         struct mlx5_list_entry *entry;
4346         struct mlx5_flow_cb_ctx ctx = {
4347                 .dev = dev,
4348                 .error = error,
4349                 .data = &mark_id,
4350         };
4351
4352         /* Check if already registered. */
4353         MLX5_ASSERT(priv->mreg_cp_tbl);
4354         entry = mlx5_hlist_register(priv->mreg_cp_tbl, mark_id, &ctx);
4355         if (!entry)
4356                 return NULL;
4357         return container_of(entry, struct mlx5_flow_mreg_copy_resource,
4358                             hlist_ent);
4359 }
4360
4361 void
4362 flow_dv_mreg_remove_cb(void *tool_ctx, struct mlx5_list_entry *entry)
4363 {
4364         struct mlx5_flow_mreg_copy_resource *mcp_res =
4365                                container_of(entry, typeof(*mcp_res), hlist_ent);
4366         struct rte_eth_dev *dev = tool_ctx;
4367         struct mlx5_priv *priv = dev->data->dev_private;
4368
4369         MLX5_ASSERT(mcp_res->rix_flow);
4370         flow_list_destroy(dev, MLX5_FLOW_TYPE_MCP, mcp_res->rix_flow);
4371         mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MCP], mcp_res->idx);
4372 }
4373
4374 /**
4375  * Release flow in RX_CP_TBL.
4376  *
4377  * @param dev
4378  *   Pointer to Ethernet device.
4379  * @flow
4380  *   Parent flow for wich copying is provided.
4381  */
4382 static void
4383 flow_mreg_del_copy_action(struct rte_eth_dev *dev,
4384                           struct rte_flow *flow)
4385 {
4386         struct mlx5_flow_mreg_copy_resource *mcp_res;
4387         struct mlx5_priv *priv = dev->data->dev_private;
4388
4389         if (!flow->rix_mreg_copy)
4390                 return;
4391         mcp_res = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_MCP],
4392                                  flow->rix_mreg_copy);
4393         if (!mcp_res || !priv->mreg_cp_tbl)
4394                 return;
4395         MLX5_ASSERT(mcp_res->rix_flow);
4396         mlx5_hlist_unregister(priv->mreg_cp_tbl, &mcp_res->hlist_ent);
4397         flow->rix_mreg_copy = 0;
4398 }
4399
4400 /**
4401  * Remove the default copy action from RX_CP_TBL.
4402  *
4403  * This functions is called in the mlx5_dev_start(). No thread safe
4404  * is guaranteed.
4405  *
4406  * @param dev
4407  *   Pointer to Ethernet device.
4408  */
4409 static void
4410 flow_mreg_del_default_copy_action(struct rte_eth_dev *dev)
4411 {
4412         struct mlx5_list_entry *entry;
4413         struct mlx5_priv *priv = dev->data->dev_private;
4414         struct mlx5_flow_cb_ctx ctx;
4415         uint32_t mark_id;
4416
4417         /* Check if default flow is registered. */
4418         if (!priv->mreg_cp_tbl)
4419                 return;
4420         mark_id = MLX5_DEFAULT_COPY_ID;
4421         ctx.data = &mark_id;
4422         entry = mlx5_hlist_lookup(priv->mreg_cp_tbl, mark_id, &ctx);
4423         if (!entry)
4424                 return;
4425         mlx5_hlist_unregister(priv->mreg_cp_tbl, entry);
4426 }
4427
4428 /**
4429  * Add the default copy action in in RX_CP_TBL.
4430  *
4431  * This functions is called in the mlx5_dev_start(). No thread safe
4432  * is guaranteed.
4433  *
4434  * @param dev
4435  *   Pointer to Ethernet device.
4436  * @param[out] error
4437  *   Perform verbose error reporting if not NULL.
4438  *
4439  * @return
4440  *   0 for success, negative value otherwise and rte_errno is set.
4441  */
4442 static int
4443 flow_mreg_add_default_copy_action(struct rte_eth_dev *dev,
4444                                   struct rte_flow_error *error)
4445 {
4446         struct mlx5_priv *priv = dev->data->dev_private;
4447         struct mlx5_flow_mreg_copy_resource *mcp_res;
4448         struct mlx5_flow_cb_ctx ctx;
4449         uint32_t mark_id;
4450
4451         /* Check whether extensive metadata feature is engaged. */
4452         if (!priv->config.dv_flow_en ||
4453             priv->config.dv_xmeta_en == MLX5_XMETA_MODE_LEGACY ||
4454             !mlx5_flow_ext_mreg_supported(dev) ||
4455             !priv->sh->dv_regc0_mask)
4456                 return 0;
4457         /*
4458          * Add default mreg copy flow may be called multiple time, but
4459          * only be called once in stop. Avoid register it twice.
4460          */
4461         mark_id = MLX5_DEFAULT_COPY_ID;
4462         ctx.data = &mark_id;
4463         if (mlx5_hlist_lookup(priv->mreg_cp_tbl, mark_id, &ctx))
4464                 return 0;
4465         mcp_res = flow_mreg_add_copy_action(dev, mark_id, error);
4466         if (!mcp_res)
4467                 return -rte_errno;
4468         return 0;
4469 }
4470
4471 /**
4472  * Add a flow of copying flow metadata registers in RX_CP_TBL.
4473  *
4474  * All the flow having Q/RSS action should be split by
4475  * flow_mreg_split_qrss_prep() to pass by RX_CP_TBL. A flow in the RX_CP_TBL
4476  * performs the following,
4477  *   - CQE->flow_tag := reg_c[1] (MARK)
4478  *   - CQE->flow_table_metadata (reg_b) := reg_c[0] (META)
4479  * As CQE's flow_tag is not a register, it can't be simply copied from reg_c[1]
4480  * but there should be a flow per each MARK ID set by MARK action.
4481  *
4482  * For the aforementioned reason, if there's a MARK action in flow's action
4483  * list, a corresponding flow should be added to the RX_CP_TBL in order to copy
4484  * the MARK ID to CQE's flow_tag like,
4485  *   - If reg_c[1] is mark_id,
4486  *     flow_tag := mark_id, reg_b := reg_c[0] and jump to RX_ACT_TBL.
4487  *
4488  * For SET_META action which stores value in reg_c[0], as the destination is
4489  * also a flow metadata register (reg_b), adding a default flow is enough. Zero
4490  * MARK ID means the default flow. The default flow looks like,
4491  *   - For all flow, reg_b := reg_c[0] and jump to RX_ACT_TBL.
4492  *
4493  * @param dev
4494  *   Pointer to Ethernet device.
4495  * @param flow
4496  *   Pointer to flow structure.
4497  * @param[in] actions
4498  *   Pointer to the list of actions.
4499  * @param[out] error
4500  *   Perform verbose error reporting if not NULL.
4501  *
4502  * @return
4503  *   0 on success, negative value otherwise and rte_errno is set.
4504  */
4505 static int
4506 flow_mreg_update_copy_table(struct rte_eth_dev *dev,
4507                             struct rte_flow *flow,
4508                             const struct rte_flow_action *actions,
4509                             struct rte_flow_error *error)
4510 {
4511         struct mlx5_priv *priv = dev->data->dev_private;
4512         struct mlx5_dev_config *config = &priv->config;
4513         struct mlx5_flow_mreg_copy_resource *mcp_res;
4514         const struct rte_flow_action_mark *mark;
4515
4516         /* Check whether extensive metadata feature is engaged. */
4517         if (!config->dv_flow_en ||
4518             config->dv_xmeta_en == MLX5_XMETA_MODE_LEGACY ||
4519             !mlx5_flow_ext_mreg_supported(dev) ||
4520             !priv->sh->dv_regc0_mask)
4521                 return 0;
4522         /* Find MARK action. */
4523         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
4524                 switch (actions->type) {
4525                 case RTE_FLOW_ACTION_TYPE_FLAG:
4526                         mcp_res = flow_mreg_add_copy_action
4527                                 (dev, MLX5_FLOW_MARK_DEFAULT, error);
4528                         if (!mcp_res)
4529                                 return -rte_errno;
4530                         flow->rix_mreg_copy = mcp_res->idx;
4531                         return 0;
4532                 case RTE_FLOW_ACTION_TYPE_MARK:
4533                         mark = (const struct rte_flow_action_mark *)
4534                                 actions->conf;
4535                         mcp_res =
4536                                 flow_mreg_add_copy_action(dev, mark->id, error);
4537                         if (!mcp_res)
4538                                 return -rte_errno;
4539                         flow->rix_mreg_copy = mcp_res->idx;
4540                         return 0;
4541                 default:
4542                         break;
4543                 }
4544         }
4545         return 0;
4546 }
4547
4548 #define MLX5_MAX_SPLIT_ACTIONS 24
4549 #define MLX5_MAX_SPLIT_ITEMS 24
4550
4551 /**
4552  * Split the hairpin flow.
4553  * Since HW can't support encap and push-vlan on Rx, we move these
4554  * actions to Tx.
4555  * If the count action is after the encap then we also
4556  * move the count action. in this case the count will also measure
4557  * the outer bytes.
4558  *
4559  * @param dev
4560  *   Pointer to Ethernet device.
4561  * @param[in] actions
4562  *   Associated actions (list terminated by the END action).
4563  * @param[out] actions_rx
4564  *   Rx flow actions.
4565  * @param[out] actions_tx
4566  *   Tx flow actions..
4567  * @param[out] pattern_tx
4568  *   The pattern items for the Tx flow.
4569  * @param[out] flow_id
4570  *   The flow ID connected to this flow.
4571  *
4572  * @return
4573  *   0 on success.
4574  */
4575 static int
4576 flow_hairpin_split(struct rte_eth_dev *dev,
4577                    const struct rte_flow_action actions[],
4578                    struct rte_flow_action actions_rx[],
4579                    struct rte_flow_action actions_tx[],
4580                    struct rte_flow_item pattern_tx[],
4581                    uint32_t flow_id)
4582 {
4583         const struct rte_flow_action_raw_encap *raw_encap;
4584         const struct rte_flow_action_raw_decap *raw_decap;
4585         struct mlx5_rte_flow_action_set_tag *set_tag;
4586         struct rte_flow_action *tag_action;
4587         struct mlx5_rte_flow_item_tag *tag_item;
4588         struct rte_flow_item *item;
4589         char *addr;
4590         int encap = 0;
4591
4592         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
4593                 switch (actions->type) {
4594                 case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
4595                 case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
4596                 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
4597                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
4598                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
4599                         rte_memcpy(actions_tx, actions,
4600                                sizeof(struct rte_flow_action));
4601                         actions_tx++;
4602                         break;
4603                 case RTE_FLOW_ACTION_TYPE_COUNT:
4604                         if (encap) {
4605                                 rte_memcpy(actions_tx, actions,
4606                                            sizeof(struct rte_flow_action));
4607                                 actions_tx++;
4608                         } else {
4609                                 rte_memcpy(actions_rx, actions,
4610                                            sizeof(struct rte_flow_action));
4611                                 actions_rx++;
4612                         }
4613                         break;
4614                 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
4615                         raw_encap = actions->conf;
4616                         if (raw_encap->size > MLX5_ENCAPSULATION_DECISION_SIZE) {
4617                                 memcpy(actions_tx, actions,
4618                                        sizeof(struct rte_flow_action));
4619                                 actions_tx++;
4620                                 encap = 1;
4621                         } else {
4622                                 rte_memcpy(actions_rx, actions,
4623                                            sizeof(struct rte_flow_action));
4624                                 actions_rx++;
4625                         }
4626                         break;
4627                 case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
4628                         raw_decap = actions->conf;
4629                         if (raw_decap->size < MLX5_ENCAPSULATION_DECISION_SIZE) {
4630                                 memcpy(actions_tx, actions,
4631                                        sizeof(struct rte_flow_action));
4632                                 actions_tx++;
4633                         } else {
4634                                 rte_memcpy(actions_rx, actions,
4635                                            sizeof(struct rte_flow_action));
4636                                 actions_rx++;
4637                         }
4638                         break;
4639                 default:
4640                         rte_memcpy(actions_rx, actions,
4641                                    sizeof(struct rte_flow_action));
4642                         actions_rx++;
4643                         break;
4644                 }
4645         }
4646         /* Add set meta action and end action for the Rx flow. */
4647         tag_action = actions_rx;
4648         tag_action->type = (enum rte_flow_action_type)
4649                            MLX5_RTE_FLOW_ACTION_TYPE_TAG;
4650         actions_rx++;
4651         rte_memcpy(actions_rx, actions, sizeof(struct rte_flow_action));
4652         actions_rx++;
4653         set_tag = (void *)actions_rx;
4654         *set_tag = (struct mlx5_rte_flow_action_set_tag) {
4655                 .id = mlx5_flow_get_reg_id(dev, MLX5_HAIRPIN_RX, 0, NULL),
4656                 .data = flow_id,
4657         };
4658         MLX5_ASSERT(set_tag->id > REG_NON);
4659         tag_action->conf = set_tag;
4660         /* Create Tx item list. */
4661         rte_memcpy(actions_tx, actions, sizeof(struct rte_flow_action));
4662         addr = (void *)&pattern_tx[2];
4663         item = pattern_tx;
4664         item->type = (enum rte_flow_item_type)
4665                      MLX5_RTE_FLOW_ITEM_TYPE_TAG;
4666         tag_item = (void *)addr;
4667         tag_item->data = flow_id;
4668         tag_item->id = mlx5_flow_get_reg_id(dev, MLX5_HAIRPIN_TX, 0, NULL);
4669         MLX5_ASSERT(set_tag->id > REG_NON);
4670         item->spec = tag_item;
4671         addr += sizeof(struct mlx5_rte_flow_item_tag);
4672         tag_item = (void *)addr;
4673         tag_item->data = UINT32_MAX;
4674         tag_item->id = UINT16_MAX;
4675         item->mask = tag_item;
4676         item->last = NULL;
4677         item++;
4678         item->type = RTE_FLOW_ITEM_TYPE_END;
4679         return 0;
4680 }
4681
4682 /**
4683  * The last stage of splitting chain, just creates the subflow
4684  * without any modification.
4685  *
4686  * @param[in] dev
4687  *   Pointer to Ethernet device.
4688  * @param[in] flow
4689  *   Parent flow structure pointer.
4690  * @param[in, out] sub_flow
4691  *   Pointer to return the created subflow, may be NULL.
4692  * @param[in] attr
4693  *   Flow rule attributes.
4694  * @param[in] items
4695  *   Pattern specification (list terminated by the END pattern item).
4696  * @param[in] actions
4697  *   Associated actions (list terminated by the END action).
4698  * @param[in] flow_split_info
4699  *   Pointer to flow split info structure.
4700  * @param[out] error
4701  *   Perform verbose error reporting if not NULL.
4702  * @return
4703  *   0 on success, negative value otherwise
4704  */
4705 static int
4706 flow_create_split_inner(struct rte_eth_dev *dev,
4707                         struct rte_flow *flow,
4708                         struct mlx5_flow **sub_flow,
4709                         const struct rte_flow_attr *attr,
4710                         const struct rte_flow_item items[],
4711                         const struct rte_flow_action actions[],
4712                         struct mlx5_flow_split_info *flow_split_info,
4713                         struct rte_flow_error *error)
4714 {
4715         struct mlx5_flow *dev_flow;
4716
4717         dev_flow = flow_drv_prepare(dev, flow, attr, items, actions,
4718                                     flow_split_info->flow_idx, error);
4719         if (!dev_flow)
4720                 return -rte_errno;
4721         dev_flow->flow = flow;
4722         dev_flow->external = flow_split_info->external;
4723         dev_flow->skip_scale = flow_split_info->skip_scale;
4724         /* Subflow object was created, we must include one in the list. */
4725         SILIST_INSERT(&flow->dev_handles, dev_flow->handle_idx,
4726                       dev_flow->handle, next);
4727         /*
4728          * If dev_flow is as one of the suffix flow, some actions in suffix
4729          * flow may need some user defined item layer flags, and pass the
4730          * Metadate rxq mark flag to suffix flow as well.
4731          */
4732         if (flow_split_info->prefix_layers)
4733                 dev_flow->handle->layers = flow_split_info->prefix_layers;
4734         if (flow_split_info->prefix_mark)
4735                 dev_flow->handle->mark = 1;
4736         if (sub_flow)
4737                 *sub_flow = dev_flow;
4738 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
4739         dev_flow->dv.table_id = flow_split_info->table_id;
4740 #endif
4741         return flow_drv_translate(dev, dev_flow, attr, items, actions, error);
4742 }
4743
4744 /**
4745  * Get the sub policy of a meter.
4746  *
4747  * @param[in] dev
4748  *   Pointer to Ethernet device.
4749  * @param[in] flow
4750  *   Parent flow structure pointer.
4751  * @param wks
4752  *   Pointer to thread flow work space.
4753  * @param[in] attr
4754  *   Flow rule attributes.
4755  * @param[in] items
4756  *   Pattern specification (list terminated by the END pattern item).
4757  * @param[out] error
4758  *   Perform verbose error reporting if not NULL.
4759  *
4760  * @return
4761  *   Pointer to the meter sub policy, NULL otherwise and rte_errno is set.
4762  */
4763 static struct mlx5_flow_meter_sub_policy *
4764 get_meter_sub_policy(struct rte_eth_dev *dev,
4765                      struct rte_flow *flow,
4766                      struct mlx5_flow_workspace *wks,
4767                      const struct rte_flow_attr *attr,
4768                      const struct rte_flow_item items[],
4769                      struct rte_flow_error *error)
4770 {
4771         struct mlx5_flow_meter_policy *policy;
4772         struct mlx5_flow_meter_policy *final_policy;
4773         struct mlx5_flow_meter_sub_policy *sub_policy = NULL;
4774
4775         policy = wks->policy;
4776         final_policy = policy->is_hierarchy ? wks->final_policy : policy;
4777         if (final_policy->is_rss || final_policy->is_queue) {
4778                 struct mlx5_flow_rss_desc rss_desc_v[MLX5_MTR_RTE_COLORS];
4779                 struct mlx5_flow_rss_desc *rss_desc[MLX5_MTR_RTE_COLORS] = {0};
4780                 uint32_t i;
4781
4782                 /*
4783                  * This is a tmp dev_flow,
4784                  * no need to register any matcher for it in translate.
4785                  */
4786                 wks->skip_matcher_reg = 1;
4787                 for (i = 0; i < MLX5_MTR_RTE_COLORS; i++) {
4788                         struct mlx5_flow dev_flow = {0};
4789                         struct mlx5_flow_handle dev_handle = { {0} };
4790                         uint8_t fate = final_policy->act_cnt[i].fate_action;
4791
4792                         if (fate == MLX5_FLOW_FATE_SHARED_RSS) {
4793                                 const struct rte_flow_action_rss *rss_act =
4794                                         final_policy->act_cnt[i].rss->conf;
4795                                 struct rte_flow_action rss_actions[2] = {
4796                                         [0] = {
4797                                         .type = RTE_FLOW_ACTION_TYPE_RSS,
4798                                         .conf = rss_act,
4799                                         },
4800                                         [1] = {
4801                                         .type = RTE_FLOW_ACTION_TYPE_END,
4802                                         .conf = NULL,
4803                                         }
4804                                 };
4805
4806                                 dev_flow.handle = &dev_handle;
4807                                 dev_flow.ingress = attr->ingress;
4808                                 dev_flow.flow = flow;
4809                                 dev_flow.external = 0;
4810 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
4811                                 dev_flow.dv.transfer = attr->transfer;
4812 #endif
4813                                 /**
4814                                  * Translate RSS action to get rss hash fields.
4815                                  */
4816                                 if (flow_drv_translate(dev, &dev_flow, attr,
4817                                                 items, rss_actions, error))
4818                                         goto exit;
4819                                 rss_desc_v[i] = wks->rss_desc;
4820                                 rss_desc_v[i].key_len = MLX5_RSS_HASH_KEY_LEN;
4821                                 rss_desc_v[i].hash_fields =
4822                                                 dev_flow.hash_fields;
4823                                 rss_desc_v[i].queue_num =
4824                                                 rss_desc_v[i].hash_fields ?
4825                                                 rss_desc_v[i].queue_num : 1;
4826                                 rss_desc_v[i].tunnel =
4827                                                 !!(dev_flow.handle->layers &
4828                                                    MLX5_FLOW_LAYER_TUNNEL);
4829                                 /* Use the RSS queues in the containers. */
4830                                 rss_desc_v[i].queue =
4831                                         (uint16_t *)(uintptr_t)rss_act->queue;
4832                                 rss_desc[i] = &rss_desc_v[i];
4833                         } else if (fate == MLX5_FLOW_FATE_QUEUE) {
4834                                 /* This is queue action. */
4835                                 rss_desc_v[i] = wks->rss_desc;
4836                                 rss_desc_v[i].key_len = 0;
4837                                 rss_desc_v[i].hash_fields = 0;
4838                                 rss_desc_v[i].queue =
4839                                         &final_policy->act_cnt[i].queue;
4840                                 rss_desc_v[i].queue_num = 1;
4841                                 rss_desc[i] = &rss_desc_v[i];
4842                         } else {
4843                                 rss_desc[i] = NULL;
4844                         }
4845                 }
4846                 sub_policy = flow_drv_meter_sub_policy_rss_prepare(dev,
4847                                                 flow, policy, rss_desc);
4848         } else {
4849                 enum mlx5_meter_domain mtr_domain =
4850                         attr->transfer ? MLX5_MTR_DOMAIN_TRANSFER :
4851                                 (attr->egress ? MLX5_MTR_DOMAIN_EGRESS :
4852                                                 MLX5_MTR_DOMAIN_INGRESS);
4853                 sub_policy = policy->sub_policys[mtr_domain][0];
4854         }
4855         if (!sub_policy)
4856                 rte_flow_error_set(error, EINVAL,
4857                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
4858                                    "Failed to get meter sub-policy.");
4859 exit:
4860         return sub_policy;
4861 }
4862
4863 /**
4864  * Split the meter flow.
4865  *
4866  * As meter flow will split to three sub flow, other than meter
4867  * action, the other actions make sense to only meter accepts
4868  * the packet. If it need to be dropped, no other additional
4869  * actions should be take.
4870  *
4871  * One kind of special action which decapsulates the L3 tunnel
4872  * header will be in the prefix sub flow, as not to take the
4873  * L3 tunnel header into account.
4874  *
4875  * @param[in] dev
4876  *   Pointer to Ethernet device.
4877  * @param[in] flow
4878  *   Parent flow structure pointer.
4879  * @param wks
4880  *   Pointer to thread flow work space.
4881  * @param[in] attr
4882  *   Flow rule attributes.
4883  * @param[in] items
4884  *   Pattern specification (list terminated by the END pattern item).
4885  * @param[out] sfx_items
4886  *   Suffix flow match items (list terminated by the END pattern item).
4887  * @param[in] actions
4888  *   Associated actions (list terminated by the END action).
4889  * @param[out] actions_sfx
4890  *   Suffix flow actions.
4891  * @param[out] actions_pre
4892  *   Prefix flow actions.
4893  * @param[out] mtr_flow_id
4894  *   Pointer to meter flow id.
4895  * @param[out] error
4896  *   Perform verbose error reporting if not NULL.
4897  *
4898  * @return
4899  *   0 on success, a negative errno value otherwise and rte_errno is set.
4900  */
4901 static int
4902 flow_meter_split_prep(struct rte_eth_dev *dev,
4903                       struct rte_flow *flow,
4904                       struct mlx5_flow_workspace *wks,
4905                       const struct rte_flow_attr *attr,
4906                       const struct rte_flow_item items[],
4907                       struct rte_flow_item sfx_items[],
4908                       const struct rte_flow_action actions[],
4909                       struct rte_flow_action actions_sfx[],
4910                       struct rte_flow_action actions_pre[],
4911                       uint32_t *mtr_flow_id,
4912                       struct rte_flow_error *error)
4913 {
4914         struct mlx5_priv *priv = dev->data->dev_private;
4915         struct mlx5_flow_meter_info *fm = wks->fm;
4916         struct rte_flow_action *tag_action = NULL;
4917         struct rte_flow_item *tag_item;
4918         struct mlx5_rte_flow_action_set_tag *set_tag;
4919         const struct rte_flow_action_raw_encap *raw_encap;
4920         const struct rte_flow_action_raw_decap *raw_decap;
4921         struct mlx5_rte_flow_item_tag *tag_item_spec;
4922         struct mlx5_rte_flow_item_tag *tag_item_mask;
4923         uint32_t tag_id = 0;
4924         struct rte_flow_item *vlan_item_dst = NULL;
4925         const struct rte_flow_item *vlan_item_src = NULL;
4926         struct rte_flow_action *hw_mtr_action;
4927         struct rte_flow_action *action_pre_head = NULL;
4928         int32_t flow_src_port = priv->representor_id;
4929         bool mtr_first;
4930         uint8_t mtr_id_offset = priv->mtr_reg_share ? MLX5_MTR_COLOR_BITS : 0;
4931         uint8_t mtr_reg_bits = priv->mtr_reg_share ?
4932                                 MLX5_MTR_IDLE_BITS_IN_COLOR_REG : MLX5_REG_BITS;
4933         uint32_t flow_id = 0;
4934         uint32_t flow_id_reversed = 0;
4935         uint8_t flow_id_bits = 0;
4936         int shift;
4937
4938         /* Prepare the suffix subflow items. */
4939         tag_item = sfx_items++;
4940         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
4941                 struct mlx5_priv *port_priv;
4942                 const struct rte_flow_item_port_id *pid_v;
4943                 int item_type = items->type;
4944
4945                 switch (item_type) {
4946                 case RTE_FLOW_ITEM_TYPE_PORT_ID:
4947                         pid_v = items->spec;
4948                         MLX5_ASSERT(pid_v);
4949                         port_priv = mlx5_port_to_eswitch_info(pid_v->id, false);
4950                         if (!port_priv)
4951                                 return rte_flow_error_set(error,
4952                                                 rte_errno,
4953                                                 RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
4954                                                 pid_v,
4955                                                 "Failed to get port info.");
4956                         flow_src_port = port_priv->representor_id;
4957                         if (!fm->def_policy && wks->policy->is_hierarchy &&
4958                             flow_src_port != priv->representor_id) {
4959                                 if (flow_drv_mtr_hierarchy_rule_create(dev,
4960                                                                 flow, fm,
4961                                                                 flow_src_port,
4962                                                                 items,
4963                                                                 error))
4964                                         return -rte_errno;
4965                         }
4966                         memcpy(sfx_items, items, sizeof(*sfx_items));
4967                         sfx_items++;
4968                         break;
4969                 case RTE_FLOW_ITEM_TYPE_VLAN:
4970                         /* Determine if copy vlan item below. */
4971                         vlan_item_src = items;
4972                         vlan_item_dst = sfx_items++;
4973                         vlan_item_dst->type = RTE_FLOW_ITEM_TYPE_VOID;
4974                         break;
4975                 default:
4976                         break;
4977                 }
4978         }
4979         sfx_items->type = RTE_FLOW_ITEM_TYPE_END;
4980         sfx_items++;
4981         mtr_first = priv->sh->meter_aso_en &&
4982                 (attr->egress || (attr->transfer && flow_src_port != UINT16_MAX));
4983         /* For ASO meter, meter must be before tag in TX direction. */
4984         if (mtr_first) {
4985                 action_pre_head = actions_pre++;
4986                 /* Leave space for tag action. */
4987                 tag_action = actions_pre++;
4988         }
4989         /* Prepare the actions for prefix and suffix flow. */
4990         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
4991                 struct rte_flow_action *action_cur = NULL;
4992
4993                 switch (actions->type) {
4994                 case RTE_FLOW_ACTION_TYPE_METER:
4995                         if (mtr_first) {
4996                                 action_cur = action_pre_head;
4997                         } else {
4998                                 /* Leave space for tag action. */
4999                                 tag_action = actions_pre++;
5000                                 action_cur = actions_pre++;
5001                         }
5002                         break;
5003                 case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP:
5004                 case RTE_FLOW_ACTION_TYPE_NVGRE_DECAP:
5005                         action_cur = actions_pre++;
5006                         break;
5007                 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
5008                         raw_encap = actions->conf;
5009                         if (raw_encap->size < MLX5_ENCAPSULATION_DECISION_SIZE)
5010                                 action_cur = actions_pre++;
5011                         break;
5012                 case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
5013                         raw_decap = actions->conf;
5014                         if (raw_decap->size > MLX5_ENCAPSULATION_DECISION_SIZE)
5015                                 action_cur = actions_pre++;
5016                         break;
5017                 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
5018                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
5019                         if (vlan_item_dst && vlan_item_src) {
5020                                 memcpy(vlan_item_dst, vlan_item_src,
5021                                         sizeof(*vlan_item_dst));
5022                                 /*
5023                                  * Convert to internal match item, it is used
5024                                  * for vlan push and set vid.
5025                                  */
5026                                 vlan_item_dst->type = (enum rte_flow_item_type)
5027                                                 MLX5_RTE_FLOW_ITEM_TYPE_VLAN;
5028                         }
5029                         break;
5030                 default:
5031                         break;
5032                 }
5033                 if (!action_cur)
5034                         action_cur = (fm->def_policy) ?
5035                                         actions_sfx++ : actions_pre++;
5036                 memcpy(action_cur, actions, sizeof(struct rte_flow_action));
5037         }
5038         /* Add end action to the actions. */
5039         actions_sfx->type = RTE_FLOW_ACTION_TYPE_END;
5040         if (priv->sh->meter_aso_en) {
5041                 /**
5042                  * For ASO meter, need to add an extra jump action explicitly,
5043                  * to jump from meter to policer table.
5044                  */
5045                 struct mlx5_flow_meter_sub_policy *sub_policy;
5046                 struct mlx5_flow_tbl_data_entry *tbl_data;
5047
5048                 if (!fm->def_policy) {
5049                         sub_policy = get_meter_sub_policy(dev, flow, wks,
5050                                                           attr, items, error);
5051                         if (!sub_policy)
5052                                 return -rte_errno;
5053                 } else {
5054                         enum mlx5_meter_domain mtr_domain =
5055                         attr->transfer ? MLX5_MTR_DOMAIN_TRANSFER :
5056                                 (attr->egress ? MLX5_MTR_DOMAIN_EGRESS :
5057                                                 MLX5_MTR_DOMAIN_INGRESS);
5058
5059                         sub_policy =
5060                         &priv->sh->mtrmng->def_policy[mtr_domain]->sub_policy;
5061                 }
5062                 tbl_data = container_of(sub_policy->tbl_rsc,
5063                                         struct mlx5_flow_tbl_data_entry, tbl);
5064                 hw_mtr_action = actions_pre++;
5065                 hw_mtr_action->type = (enum rte_flow_action_type)
5066                                       MLX5_RTE_FLOW_ACTION_TYPE_JUMP;
5067                 hw_mtr_action->conf = tbl_data->jump.action;
5068         }
5069         actions_pre->type = RTE_FLOW_ACTION_TYPE_END;
5070         actions_pre++;
5071         if (!tag_action)
5072                 return rte_flow_error_set(error, ENOMEM,
5073                                           RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
5074                                           NULL, "No tag action space.");
5075         if (!mtr_flow_id) {
5076                 tag_action->type = RTE_FLOW_ACTION_TYPE_VOID;
5077                 goto exit;
5078         }
5079         /* Only default-policy Meter creates mtr flow id. */
5080         if (fm->def_policy) {
5081                 mlx5_ipool_malloc(fm->flow_ipool, &tag_id);
5082                 if (!tag_id)
5083                         return rte_flow_error_set(error, ENOMEM,
5084                                         RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
5085                                         "Failed to allocate meter flow id.");
5086                 flow_id = tag_id - 1;
5087                 flow_id_bits = (!flow_id) ? 1 :
5088                                 (MLX5_REG_BITS - __builtin_clz(flow_id));
5089                 if ((flow_id_bits + priv->sh->mtrmng->max_mtr_bits) >
5090                     mtr_reg_bits) {
5091                         mlx5_ipool_free(fm->flow_ipool, tag_id);
5092                         return rte_flow_error_set(error, EINVAL,
5093                                         RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
5094                                         "Meter flow id exceeds max limit.");
5095                 }
5096                 if (flow_id_bits > priv->sh->mtrmng->max_mtr_flow_bits)
5097                         priv->sh->mtrmng->max_mtr_flow_bits = flow_id_bits;
5098         }
5099         /* Build tag actions and items for meter_id/meter flow_id. */
5100         set_tag = (struct mlx5_rte_flow_action_set_tag *)actions_pre;
5101         tag_item_spec = (struct mlx5_rte_flow_item_tag *)sfx_items;
5102         tag_item_mask = tag_item_spec + 1;
5103         /* Both flow_id and meter_id share the same register. */
5104         *set_tag = (struct mlx5_rte_flow_action_set_tag) {
5105                 .id = (enum modify_reg)mlx5_flow_get_reg_id(dev, MLX5_MTR_ID,
5106                                                             0, error),
5107                 .offset = mtr_id_offset,
5108                 .length = mtr_reg_bits,
5109                 .data = flow->meter,
5110         };
5111         /*
5112          * The color Reg bits used by flow_id are growing from
5113          * msb to lsb, so must do bit reverse for flow_id val in RegC.
5114          */
5115         for (shift = 0; shift < flow_id_bits; shift++)
5116                 flow_id_reversed = (flow_id_reversed << 1) |
5117                                 ((flow_id >> shift) & 0x1);
5118         set_tag->data |=
5119                 flow_id_reversed << (mtr_reg_bits - flow_id_bits);
5120         tag_item_spec->id = set_tag->id;
5121         tag_item_spec->data = set_tag->data << mtr_id_offset;
5122         tag_item_mask->data = UINT32_MAX << mtr_id_offset;
5123         tag_action->type = (enum rte_flow_action_type)
5124                                 MLX5_RTE_FLOW_ACTION_TYPE_TAG;
5125         tag_action->conf = set_tag;
5126         tag_item->type = (enum rte_flow_item_type)
5127                                 MLX5_RTE_FLOW_ITEM_TYPE_TAG;
5128         tag_item->spec = tag_item_spec;
5129         tag_item->last = NULL;
5130         tag_item->mask = tag_item_mask;
5131 exit:
5132         if (mtr_flow_id)
5133                 *mtr_flow_id = tag_id;
5134         return 0;
5135 }
5136
5137 /**
5138  * Split action list having QUEUE/RSS for metadata register copy.
5139  *
5140  * Once Q/RSS action is detected in user's action list, the flow action
5141  * should be split in order to copy metadata registers, which will happen in
5142  * RX_CP_TBL like,
5143  *   - CQE->flow_tag := reg_c[1] (MARK)
5144  *   - CQE->flow_table_metadata (reg_b) := reg_c[0] (META)
5145  * The Q/RSS action will be performed on RX_ACT_TBL after passing by RX_CP_TBL.
5146  * This is because the last action of each flow must be a terminal action
5147  * (QUEUE, RSS or DROP).
5148  *
5149  * Flow ID must be allocated to identify actions in the RX_ACT_TBL and it is
5150  * stored and kept in the mlx5_flow structure per each sub_flow.
5151  *
5152  * The Q/RSS action is replaced with,
5153  *   - SET_TAG, setting the allocated flow ID to reg_c[2].
5154  * And the following JUMP action is added at the end,
5155  *   - JUMP, to RX_CP_TBL.
5156  *
5157  * A flow to perform remained Q/RSS action will be created in RX_ACT_TBL by
5158  * flow_create_split_metadata() routine. The flow will look like,
5159  *   - If flow ID matches (reg_c[2]), perform Q/RSS.
5160  *
5161  * @param dev
5162  *   Pointer to Ethernet device.
5163  * @param[out] split_actions
5164  *   Pointer to store split actions to jump to CP_TBL.
5165  * @param[in] actions
5166  *   Pointer to the list of original flow actions.
5167  * @param[in] qrss
5168  *   Pointer to the Q/RSS action.
5169  * @param[in] actions_n
5170  *   Number of original actions.
5171  * @param[out] error
5172  *   Perform verbose error reporting if not NULL.
5173  *
5174  * @return
5175  *   non-zero unique flow_id on success, otherwise 0 and
5176  *   error/rte_error are set.
5177  */
5178 static uint32_t
5179 flow_mreg_split_qrss_prep(struct rte_eth_dev *dev,
5180                           struct rte_flow_action *split_actions,
5181                           const struct rte_flow_action *actions,
5182                           const struct rte_flow_action *qrss,
5183                           int actions_n, struct rte_flow_error *error)
5184 {
5185         struct mlx5_priv *priv = dev->data->dev_private;
5186         struct mlx5_rte_flow_action_set_tag *set_tag;
5187         struct rte_flow_action_jump *jump;
5188         const int qrss_idx = qrss - actions;
5189         uint32_t flow_id = 0;
5190         int ret = 0;
5191
5192         /*
5193          * Given actions will be split
5194          * - Replace QUEUE/RSS action with SET_TAG to set flow ID.
5195          * - Add jump to mreg CP_TBL.
5196          * As a result, there will be one more action.
5197          */
5198         ++actions_n;
5199         memcpy(split_actions, actions, sizeof(*split_actions) * actions_n);
5200         set_tag = (void *)(split_actions + actions_n);
5201         /*
5202          * If tag action is not set to void(it means we are not the meter
5203          * suffix flow), add the tag action. Since meter suffix flow already
5204          * has the tag added.
5205          */
5206         if (split_actions[qrss_idx].type != RTE_FLOW_ACTION_TYPE_VOID) {
5207                 /*
5208                  * Allocate the new subflow ID. This one is unique within
5209                  * device and not shared with representors. Otherwise,
5210                  * we would have to resolve multi-thread access synch
5211                  * issue. Each flow on the shared device is appended
5212                  * with source vport identifier, so the resulting
5213                  * flows will be unique in the shared (by master and
5214                  * representors) domain even if they have coinciding
5215                  * IDs.
5216                  */
5217                 mlx5_ipool_malloc(priv->sh->ipool
5218                                   [MLX5_IPOOL_RSS_EXPANTION_FLOW_ID], &flow_id);
5219                 if (!flow_id)
5220                         return rte_flow_error_set(error, ENOMEM,
5221                                                   RTE_FLOW_ERROR_TYPE_ACTION,
5222                                                   NULL, "can't allocate id "
5223                                                   "for split Q/RSS subflow");
5224                 /* Internal SET_TAG action to set flow ID. */
5225                 *set_tag = (struct mlx5_rte_flow_action_set_tag){
5226                         .data = flow_id,
5227                 };
5228                 ret = mlx5_flow_get_reg_id(dev, MLX5_COPY_MARK, 0, error);
5229                 if (ret < 0)
5230                         return ret;
5231                 set_tag->id = ret;
5232                 /* Construct new actions array. */
5233                 /* Replace QUEUE/RSS action. */
5234                 split_actions[qrss_idx] = (struct rte_flow_action){
5235                         .type = (enum rte_flow_action_type)
5236                                 MLX5_RTE_FLOW_ACTION_TYPE_TAG,
5237                         .conf = set_tag,
5238                 };
5239         }
5240         /* JUMP action to jump to mreg copy table (CP_TBL). */
5241         jump = (void *)(set_tag + 1);
5242         *jump = (struct rte_flow_action_jump){
5243                 .group = MLX5_FLOW_MREG_CP_TABLE_GROUP,
5244         };
5245         split_actions[actions_n - 2] = (struct rte_flow_action){
5246                 .type = RTE_FLOW_ACTION_TYPE_JUMP,
5247                 .conf = jump,
5248         };
5249         split_actions[actions_n - 1] = (struct rte_flow_action){
5250                 .type = RTE_FLOW_ACTION_TYPE_END,
5251         };
5252         return flow_id;
5253 }
5254
5255 /**
5256  * Extend the given action list for Tx metadata copy.
5257  *
5258  * Copy the given action list to the ext_actions and add flow metadata register
5259  * copy action in order to copy reg_a set by WQE to reg_c[0].
5260  *
5261  * @param[out] ext_actions
5262  *   Pointer to the extended action list.
5263  * @param[in] actions
5264  *   Pointer to the list of actions.
5265  * @param[in] actions_n
5266  *   Number of actions in the list.
5267  * @param[out] error
5268  *   Perform verbose error reporting if not NULL.
5269  * @param[in] encap_idx
5270  *   The encap action inndex.
5271  *
5272  * @return
5273  *   0 on success, negative value otherwise
5274  */
5275 static int
5276 flow_mreg_tx_copy_prep(struct rte_eth_dev *dev,
5277                        struct rte_flow_action *ext_actions,
5278                        const struct rte_flow_action *actions,
5279                        int actions_n, struct rte_flow_error *error,
5280                        int encap_idx)
5281 {
5282         struct mlx5_flow_action_copy_mreg *cp_mreg =
5283                 (struct mlx5_flow_action_copy_mreg *)
5284                         (ext_actions + actions_n + 1);
5285         int ret;
5286
5287         ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_RX, 0, error);
5288         if (ret < 0)
5289                 return ret;
5290         cp_mreg->dst = ret;
5291         ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_TX, 0, error);
5292         if (ret < 0)
5293                 return ret;
5294         cp_mreg->src = ret;
5295         if (encap_idx != 0)
5296                 memcpy(ext_actions, actions, sizeof(*ext_actions) * encap_idx);
5297         if (encap_idx == actions_n - 1) {
5298                 ext_actions[actions_n - 1] = (struct rte_flow_action){
5299                         .type = (enum rte_flow_action_type)
5300                                 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
5301                         .conf = cp_mreg,
5302                 };
5303                 ext_actions[actions_n] = (struct rte_flow_action){
5304                         .type = RTE_FLOW_ACTION_TYPE_END,
5305                 };
5306         } else {
5307                 ext_actions[encap_idx] = (struct rte_flow_action){
5308                         .type = (enum rte_flow_action_type)
5309                                 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
5310                         .conf = cp_mreg,
5311                 };
5312                 memcpy(ext_actions + encap_idx + 1, actions + encap_idx,
5313                                 sizeof(*ext_actions) * (actions_n - encap_idx));
5314         }
5315         return 0;
5316 }
5317
5318 /**
5319  * Check the match action from the action list.
5320  *
5321  * @param[in] actions
5322  *   Pointer to the list of actions.
5323  * @param[in] attr
5324  *   Flow rule attributes.
5325  * @param[in] action
5326  *   The action to be check if exist.
5327  * @param[out] match_action_pos
5328  *   Pointer to the position of the matched action if exists, otherwise is -1.
5329  * @param[out] qrss_action_pos
5330  *   Pointer to the position of the Queue/RSS action if exists, otherwise is -1.
5331  * @param[out] modify_after_mirror
5332  *   Pointer to the flag of modify action after FDB mirroring.
5333  *
5334  * @return
5335  *   > 0 the total number of actions.
5336  *   0 if not found match action in action list.
5337  */
5338 static int
5339 flow_check_match_action(const struct rte_flow_action actions[],
5340                         const struct rte_flow_attr *attr,
5341                         enum rte_flow_action_type action,
5342                         int *match_action_pos, int *qrss_action_pos,
5343                         int *modify_after_mirror)
5344 {
5345         const struct rte_flow_action_sample *sample;
5346         const struct rte_flow_action_raw_decap *decap;
5347         int actions_n = 0;
5348         uint32_t ratio = 0;
5349         int sub_type = 0;
5350         int flag = 0;
5351         int fdb_mirror = 0;
5352
5353         *match_action_pos = -1;
5354         *qrss_action_pos = -1;
5355         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
5356                 if (actions->type == action) {
5357                         flag = 1;
5358                         *match_action_pos = actions_n;
5359                 }
5360                 switch (actions->type) {
5361                 case RTE_FLOW_ACTION_TYPE_QUEUE:
5362                 case RTE_FLOW_ACTION_TYPE_RSS:
5363                         *qrss_action_pos = actions_n;
5364                         break;
5365                 case RTE_FLOW_ACTION_TYPE_SAMPLE:
5366                         sample = actions->conf;
5367                         ratio = sample->ratio;
5368                         sub_type = ((const struct rte_flow_action *)
5369                                         (sample->actions))->type;
5370                         if (ratio == 1 && attr->transfer)
5371                                 fdb_mirror = 1;
5372                         break;
5373                 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
5374                 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
5375                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
5376                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
5377                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
5378                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
5379                 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
5380                 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
5381                 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
5382                 case RTE_FLOW_ACTION_TYPE_SET_TTL:
5383                 case RTE_FLOW_ACTION_TYPE_INC_TCP_SEQ:
5384                 case RTE_FLOW_ACTION_TYPE_DEC_TCP_SEQ:
5385                 case RTE_FLOW_ACTION_TYPE_INC_TCP_ACK:
5386                 case RTE_FLOW_ACTION_TYPE_DEC_TCP_ACK:
5387                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DSCP:
5388                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DSCP:
5389                 case RTE_FLOW_ACTION_TYPE_FLAG:
5390                 case RTE_FLOW_ACTION_TYPE_MARK:
5391                 case RTE_FLOW_ACTION_TYPE_SET_META:
5392                 case RTE_FLOW_ACTION_TYPE_SET_TAG:
5393                 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
5394                 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
5395                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
5396                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
5397                 case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP:
5398                 case RTE_FLOW_ACTION_TYPE_NVGRE_DECAP:
5399                 case RTE_FLOW_ACTION_TYPE_MODIFY_FIELD:
5400                 case RTE_FLOW_ACTION_TYPE_METER:
5401                         if (fdb_mirror)
5402                                 *modify_after_mirror = 1;
5403                         break;
5404                 case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
5405                         decap = actions->conf;
5406                         while ((++actions)->type == RTE_FLOW_ACTION_TYPE_VOID)
5407                                 ;
5408                         actions_n++;
5409                         if (actions->type == RTE_FLOW_ACTION_TYPE_RAW_ENCAP) {
5410                                 const struct rte_flow_action_raw_encap *encap =
5411                                                                 actions->conf;
5412                                 if (decap->size <=
5413                                         MLX5_ENCAPSULATION_DECISION_SIZE &&
5414                                     encap->size >
5415                                         MLX5_ENCAPSULATION_DECISION_SIZE)
5416                                         /* L3 encap. */
5417                                         break;
5418                         }
5419                         if (fdb_mirror)
5420                                 *modify_after_mirror = 1;
5421                         break;
5422                 default:
5423                         break;
5424                 }
5425                 actions_n++;
5426         }
5427         if (flag && fdb_mirror && !*modify_after_mirror) {
5428                 /* FDB mirroring uses the destination array to implement
5429                  * instead of FLOW_SAMPLER object.
5430                  */
5431                 if (sub_type != RTE_FLOW_ACTION_TYPE_END)
5432                         flag = 0;
5433         }
5434         /* Count RTE_FLOW_ACTION_TYPE_END. */
5435         return flag ? actions_n + 1 : 0;
5436 }
5437
5438 #define SAMPLE_SUFFIX_ITEM 2
5439
5440 /**
5441  * Split the sample flow.
5442  *
5443  * As sample flow will split to two sub flow, sample flow with
5444  * sample action, the other actions will move to new suffix flow.
5445  *
5446  * Also add unique tag id with tag action in the sample flow,
5447  * the same tag id will be as match in the suffix flow.
5448  *
5449  * @param dev
5450  *   Pointer to Ethernet device.
5451  * @param[in] add_tag
5452  *   Add extra tag action flag.
5453  * @param[out] sfx_items
5454  *   Suffix flow match items (list terminated by the END pattern item).
5455  * @param[in] actions
5456  *   Associated actions (list terminated by the END action).
5457  * @param[out] actions_sfx
5458  *   Suffix flow actions.
5459  * @param[out] actions_pre
5460  *   Prefix flow actions.
5461  * @param[in] actions_n
5462  *  The total number of actions.
5463  * @param[in] sample_action_pos
5464  *   The sample action position.
5465  * @param[in] qrss_action_pos
5466  *   The Queue/RSS action position.
5467  * @param[in] jump_table
5468  *   Add extra jump action flag.
5469  * @param[out] error
5470  *   Perform verbose error reporting if not NULL.
5471  *
5472  * @return
5473  *   0 on success, or unique flow_id, a negative errno value
5474  *   otherwise and rte_errno is set.
5475  */
5476 static int
5477 flow_sample_split_prep(struct rte_eth_dev *dev,
5478                        int add_tag,
5479                        struct rte_flow_item sfx_items[],
5480                        const struct rte_flow_action actions[],
5481                        struct rte_flow_action actions_sfx[],
5482                        struct rte_flow_action actions_pre[],
5483                        int actions_n,
5484                        int sample_action_pos,
5485                        int qrss_action_pos,
5486                        int jump_table,
5487                        struct rte_flow_error *error)
5488 {
5489         struct mlx5_priv *priv = dev->data->dev_private;
5490         struct mlx5_rte_flow_action_set_tag *set_tag;
5491         struct mlx5_rte_flow_item_tag *tag_spec;
5492         struct mlx5_rte_flow_item_tag *tag_mask;
5493         struct rte_flow_action_jump *jump_action;
5494         uint32_t tag_id = 0;
5495         int index;
5496         int append_index = 0;
5497         int ret;
5498
5499         if (sample_action_pos < 0)
5500                 return rte_flow_error_set(error, EINVAL,
5501                                           RTE_FLOW_ERROR_TYPE_ACTION,
5502                                           NULL, "invalid position of sample "
5503                                           "action in list");
5504         /* Prepare the actions for prefix and suffix flow. */
5505         if (qrss_action_pos >= 0 && qrss_action_pos < sample_action_pos) {
5506                 index = qrss_action_pos;
5507                 /* Put the preceding the Queue/RSS action into prefix flow. */
5508                 if (index != 0)
5509                         memcpy(actions_pre, actions,
5510                                sizeof(struct rte_flow_action) * index);
5511                 /* Put others preceding the sample action into prefix flow. */
5512                 if (sample_action_pos > index + 1)
5513                         memcpy(actions_pre + index, actions + index + 1,
5514                                sizeof(struct rte_flow_action) *
5515                                (sample_action_pos - index - 1));
5516                 index = sample_action_pos - 1;
5517                 /* Put Queue/RSS action into Suffix flow. */
5518                 memcpy(actions_sfx, actions + qrss_action_pos,
5519                        sizeof(struct rte_flow_action));
5520                 actions_sfx++;
5521         } else {
5522                 index = sample_action_pos;
5523                 if (index != 0)
5524                         memcpy(actions_pre, actions,
5525                                sizeof(struct rte_flow_action) * index);
5526         }
5527         /* For CX5, add an extra tag action for NIC-RX and E-Switch ingress.
5528          * For CX6DX and above, metadata registers Cx preserve their value,
5529          * add an extra tag action for NIC-RX and E-Switch Domain.
5530          */
5531         if (add_tag) {
5532                 /* Prepare the prefix tag action. */
5533                 append_index++;
5534                 set_tag = (void *)(actions_pre + actions_n + append_index);
5535                 ret = mlx5_flow_get_reg_id(dev, MLX5_APP_TAG, 0, error);
5536                 if (ret < 0)
5537                         return ret;
5538                 mlx5_ipool_malloc(priv->sh->ipool
5539                                   [MLX5_IPOOL_RSS_EXPANTION_FLOW_ID], &tag_id);
5540                 *set_tag = (struct mlx5_rte_flow_action_set_tag) {
5541                         .id = ret,
5542                         .data = tag_id,
5543                 };
5544                 /* Prepare the suffix subflow items. */
5545                 tag_spec = (void *)(sfx_items + SAMPLE_SUFFIX_ITEM);
5546                 tag_spec->data = tag_id;
5547                 tag_spec->id = set_tag->id;
5548                 tag_mask = tag_spec + 1;
5549                 tag_mask->data = UINT32_MAX;
5550                 sfx_items[0] = (struct rte_flow_item){
5551                         .type = (enum rte_flow_item_type)
5552                                 MLX5_RTE_FLOW_ITEM_TYPE_TAG,
5553                         .spec = tag_spec,
5554                         .last = NULL,
5555                         .mask = tag_mask,
5556                 };
5557                 sfx_items[1] = (struct rte_flow_item){
5558                         .type = (enum rte_flow_item_type)
5559                                 RTE_FLOW_ITEM_TYPE_END,
5560                 };
5561                 /* Prepare the tag action in prefix subflow. */
5562                 actions_pre[index++] =
5563                         (struct rte_flow_action){
5564                         .type = (enum rte_flow_action_type)
5565                                 MLX5_RTE_FLOW_ACTION_TYPE_TAG,
5566                         .conf = set_tag,
5567                 };
5568         }
5569         memcpy(actions_pre + index, actions + sample_action_pos,
5570                sizeof(struct rte_flow_action));
5571         index += 1;
5572         /* For the modify action after the sample action in E-Switch mirroring,
5573          * Add the extra jump action in prefix subflow and jump into the next
5574          * table, then do the modify action in the new table.
5575          */
5576         if (jump_table) {
5577                 /* Prepare the prefix jump action. */
5578                 append_index++;
5579                 jump_action = (void *)(actions_pre + actions_n + append_index);
5580                 jump_action->group = jump_table;
5581                 actions_pre[index++] =
5582                         (struct rte_flow_action){
5583                         .type = (enum rte_flow_action_type)
5584                                 RTE_FLOW_ACTION_TYPE_JUMP,
5585                         .conf = jump_action,
5586                 };
5587         }
5588         actions_pre[index] = (struct rte_flow_action){
5589                 .type = (enum rte_flow_action_type)
5590                         RTE_FLOW_ACTION_TYPE_END,
5591         };
5592         /* Put the actions after sample into Suffix flow. */
5593         memcpy(actions_sfx, actions + sample_action_pos + 1,
5594                sizeof(struct rte_flow_action) *
5595                (actions_n - sample_action_pos - 1));
5596         return tag_id;
5597 }
5598
5599 /**
5600  * The splitting for metadata feature.
5601  *
5602  * - Q/RSS action on NIC Rx should be split in order to pass by
5603  *   the mreg copy table (RX_CP_TBL) and then it jumps to the
5604  *   action table (RX_ACT_TBL) which has the split Q/RSS action.
5605  *
5606  * - All the actions on NIC Tx should have a mreg copy action to
5607  *   copy reg_a from WQE to reg_c[0].
5608  *
5609  * @param dev
5610  *   Pointer to Ethernet device.
5611  * @param[in] flow
5612  *   Parent flow structure pointer.
5613  * @param[in] attr
5614  *   Flow rule attributes.
5615  * @param[in] items
5616  *   Pattern specification (list terminated by the END pattern item).
5617  * @param[in] actions
5618  *   Associated actions (list terminated by the END action).
5619  * @param[in] flow_split_info
5620  *   Pointer to flow split info structure.
5621  * @param[out] error
5622  *   Perform verbose error reporting if not NULL.
5623  * @return
5624  *   0 on success, negative value otherwise
5625  */
5626 static int
5627 flow_create_split_metadata(struct rte_eth_dev *dev,
5628                            struct rte_flow *flow,
5629                            const struct rte_flow_attr *attr,
5630                            const struct rte_flow_item items[],
5631                            const struct rte_flow_action actions[],
5632                            struct mlx5_flow_split_info *flow_split_info,
5633                            struct rte_flow_error *error)
5634 {
5635         struct mlx5_priv *priv = dev->data->dev_private;
5636         struct mlx5_dev_config *config = &priv->config;
5637         const struct rte_flow_action *qrss = NULL;
5638         struct rte_flow_action *ext_actions = NULL;
5639         struct mlx5_flow *dev_flow = NULL;
5640         uint32_t qrss_id = 0;
5641         int mtr_sfx = 0;
5642         size_t act_size;
5643         int actions_n;
5644         int encap_idx;
5645         int ret;
5646
5647         /* Check whether extensive metadata feature is engaged. */
5648         if (!config->dv_flow_en ||
5649             config->dv_xmeta_en == MLX5_XMETA_MODE_LEGACY ||
5650             !mlx5_flow_ext_mreg_supported(dev))
5651                 return flow_create_split_inner(dev, flow, NULL, attr, items,
5652                                                actions, flow_split_info, error);
5653         actions_n = flow_parse_metadata_split_actions_info(actions, &qrss,
5654                                                            &encap_idx);
5655         if (qrss) {
5656                 /* Exclude hairpin flows from splitting. */
5657                 if (qrss->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
5658                         const struct rte_flow_action_queue *queue;
5659
5660                         queue = qrss->conf;
5661                         if (mlx5_rxq_get_type(dev, queue->index) ==
5662                             MLX5_RXQ_TYPE_HAIRPIN)
5663                                 qrss = NULL;
5664                 } else if (qrss->type == RTE_FLOW_ACTION_TYPE_RSS) {
5665                         const struct rte_flow_action_rss *rss;
5666
5667                         rss = qrss->conf;
5668                         if (mlx5_rxq_get_type(dev, rss->queue[0]) ==
5669                             MLX5_RXQ_TYPE_HAIRPIN)
5670                                 qrss = NULL;
5671                 }
5672         }
5673         if (qrss) {
5674                 /* Check if it is in meter suffix table. */
5675                 mtr_sfx = attr->group == (attr->transfer ?
5676                           (MLX5_FLOW_TABLE_LEVEL_METER - 1) :
5677                           MLX5_FLOW_TABLE_LEVEL_METER);
5678                 /*
5679                  * Q/RSS action on NIC Rx should be split in order to pass by
5680                  * the mreg copy table (RX_CP_TBL) and then it jumps to the
5681                  * action table (RX_ACT_TBL) which has the split Q/RSS action.
5682                  */
5683                 act_size = sizeof(struct rte_flow_action) * (actions_n + 1) +
5684                            sizeof(struct rte_flow_action_set_tag) +
5685                            sizeof(struct rte_flow_action_jump);
5686                 ext_actions = mlx5_malloc(MLX5_MEM_ZERO, act_size, 0,
5687                                           SOCKET_ID_ANY);
5688                 if (!ext_actions)
5689                         return rte_flow_error_set(error, ENOMEM,
5690                                                   RTE_FLOW_ERROR_TYPE_ACTION,
5691                                                   NULL, "no memory to split "
5692                                                   "metadata flow");
5693                 /*
5694                  * If we are the suffix flow of meter, tag already exist.
5695                  * Set the tag action to void.
5696                  */
5697                 if (mtr_sfx)
5698                         ext_actions[qrss - actions].type =
5699                                                 RTE_FLOW_ACTION_TYPE_VOID;
5700                 else
5701                         ext_actions[qrss - actions].type =
5702                                                 (enum rte_flow_action_type)
5703                                                 MLX5_RTE_FLOW_ACTION_TYPE_TAG;
5704                 /*
5705                  * Create the new actions list with removed Q/RSS action
5706                  * and appended set tag and jump to register copy table
5707                  * (RX_CP_TBL). We should preallocate unique tag ID here
5708                  * in advance, because it is needed for set tag action.
5709                  */
5710                 qrss_id = flow_mreg_split_qrss_prep(dev, ext_actions, actions,
5711                                                     qrss, actions_n, error);
5712                 if (!mtr_sfx && !qrss_id) {
5713                         ret = -rte_errno;
5714                         goto exit;
5715                 }
5716         } else if (attr->egress && !attr->transfer) {
5717                 /*
5718                  * All the actions on NIC Tx should have a metadata register
5719                  * copy action to copy reg_a from WQE to reg_c[meta]
5720                  */
5721                 act_size = sizeof(struct rte_flow_action) * (actions_n + 1) +
5722                            sizeof(struct mlx5_flow_action_copy_mreg);
5723                 ext_actions = mlx5_malloc(MLX5_MEM_ZERO, act_size, 0,
5724                                           SOCKET_ID_ANY);
5725                 if (!ext_actions)
5726                         return rte_flow_error_set(error, ENOMEM,
5727                                                   RTE_FLOW_ERROR_TYPE_ACTION,
5728                                                   NULL, "no memory to split "
5729                                                   "metadata flow");
5730                 /* Create the action list appended with copy register. */
5731                 ret = flow_mreg_tx_copy_prep(dev, ext_actions, actions,
5732                                              actions_n, error, encap_idx);
5733                 if (ret < 0)
5734                         goto exit;
5735         }
5736         /* Add the unmodified original or prefix subflow. */
5737         ret = flow_create_split_inner(dev, flow, &dev_flow, attr,
5738                                       items, ext_actions ? ext_actions :
5739                                       actions, flow_split_info, error);
5740         if (ret < 0)
5741                 goto exit;
5742         MLX5_ASSERT(dev_flow);
5743         if (qrss) {
5744                 const struct rte_flow_attr q_attr = {
5745                         .group = MLX5_FLOW_MREG_ACT_TABLE_GROUP,
5746                         .ingress = 1,
5747                 };
5748                 /* Internal PMD action to set register. */
5749                 struct mlx5_rte_flow_item_tag q_tag_spec = {
5750                         .data = qrss_id,
5751                         .id = REG_NON,
5752                 };
5753                 struct rte_flow_item q_items[] = {
5754                         {
5755                                 .type = (enum rte_flow_item_type)
5756                                         MLX5_RTE_FLOW_ITEM_TYPE_TAG,
5757                                 .spec = &q_tag_spec,
5758                                 .last = NULL,
5759                                 .mask = NULL,
5760                         },
5761                         {
5762                                 .type = RTE_FLOW_ITEM_TYPE_END,
5763                         },
5764                 };
5765                 struct rte_flow_action q_actions[] = {
5766                         {
5767                                 .type = qrss->type,
5768                                 .conf = qrss->conf,
5769                         },
5770                         {
5771                                 .type = RTE_FLOW_ACTION_TYPE_END,
5772                         },
5773                 };
5774                 uint64_t layers = flow_get_prefix_layer_flags(dev_flow);
5775
5776                 /*
5777                  * Configure the tag item only if there is no meter subflow.
5778                  * Since tag is already marked in the meter suffix subflow
5779                  * we can just use the meter suffix items as is.
5780                  */
5781                 if (qrss_id) {
5782                         /* Not meter subflow. */
5783                         MLX5_ASSERT(!mtr_sfx);
5784                         /*
5785                          * Put unique id in prefix flow due to it is destroyed
5786                          * after suffix flow and id will be freed after there
5787                          * is no actual flows with this id and identifier
5788                          * reallocation becomes possible (for example, for
5789                          * other flows in other threads).
5790                          */
5791                         dev_flow->handle->split_flow_id = qrss_id;
5792                         ret = mlx5_flow_get_reg_id(dev, MLX5_COPY_MARK, 0,
5793                                                    error);
5794                         if (ret < 0)
5795                                 goto exit;
5796                         q_tag_spec.id = ret;
5797                 }
5798                 dev_flow = NULL;
5799                 /* Add suffix subflow to execute Q/RSS. */
5800                 flow_split_info->prefix_layers = layers;
5801                 flow_split_info->prefix_mark = 0;
5802                 ret = flow_create_split_inner(dev, flow, &dev_flow,
5803                                               &q_attr, mtr_sfx ? items :
5804                                               q_items, q_actions,
5805                                               flow_split_info, error);
5806                 if (ret < 0)
5807                         goto exit;
5808                 /* qrss ID should be freed if failed. */
5809                 qrss_id = 0;
5810                 MLX5_ASSERT(dev_flow);
5811         }
5812
5813 exit:
5814         /*
5815          * We do not destroy the partially created sub_flows in case of error.
5816          * These ones are included into parent flow list and will be destroyed
5817          * by flow_drv_destroy.
5818          */
5819         mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_RSS_EXPANTION_FLOW_ID],
5820                         qrss_id);
5821         mlx5_free(ext_actions);
5822         return ret;
5823 }
5824
5825 /**
5826  * Create meter internal drop flow with the original pattern.
5827  *
5828  * @param dev
5829  *   Pointer to Ethernet device.
5830  * @param[in] flow
5831  *   Parent flow structure pointer.
5832  * @param[in] attr
5833  *   Flow rule attributes.
5834  * @param[in] items
5835  *   Pattern specification (list terminated by the END pattern item).
5836  * @param[in] flow_split_info
5837  *   Pointer to flow split info structure.
5838  * @param[in] fm
5839  *   Pointer to flow meter structure.
5840  * @param[out] error
5841  *   Perform verbose error reporting if not NULL.
5842  * @return
5843  *   0 on success, negative value otherwise
5844  */
5845 static uint32_t
5846 flow_meter_create_drop_flow_with_org_pattern(struct rte_eth_dev *dev,
5847                         struct rte_flow *flow,
5848                         const struct rte_flow_attr *attr,
5849                         const struct rte_flow_item items[],
5850                         struct mlx5_flow_split_info *flow_split_info,
5851                         struct mlx5_flow_meter_info *fm,
5852                         struct rte_flow_error *error)
5853 {
5854         struct mlx5_flow *dev_flow = NULL;
5855         struct rte_flow_attr drop_attr = *attr;
5856         struct rte_flow_action drop_actions[3];
5857         struct mlx5_flow_split_info drop_split_info = *flow_split_info;
5858
5859         MLX5_ASSERT(fm->drop_cnt);
5860         drop_actions[0].type =
5861                 (enum rte_flow_action_type)MLX5_RTE_FLOW_ACTION_TYPE_COUNT;
5862         drop_actions[0].conf = (void *)(uintptr_t)fm->drop_cnt;
5863         drop_actions[1].type = RTE_FLOW_ACTION_TYPE_DROP;
5864         drop_actions[1].conf = NULL;
5865         drop_actions[2].type = RTE_FLOW_ACTION_TYPE_END;
5866         drop_actions[2].conf = NULL;
5867         drop_split_info.external = false;
5868         drop_split_info.skip_scale |= 1 << MLX5_SCALE_FLOW_GROUP_BIT;
5869         drop_split_info.table_id = MLX5_MTR_TABLE_ID_DROP;
5870         drop_attr.group = MLX5_FLOW_TABLE_LEVEL_METER;
5871         return flow_create_split_inner(dev, flow, &dev_flow,
5872                                 &drop_attr, items, drop_actions,
5873                                 &drop_split_info, error);
5874 }
5875
5876 /**
5877  * The splitting for meter feature.
5878  *
5879  * - The meter flow will be split to two flows as prefix and
5880  *   suffix flow. The packets make sense only it pass the prefix
5881  *   meter action.
5882  *
5883  * - Reg_C_5 is used for the packet to match betweend prefix and
5884  *   suffix flow.
5885  *
5886  * @param dev
5887  *   Pointer to Ethernet device.
5888  * @param[in] flow
5889  *   Parent flow structure pointer.
5890  * @param[in] attr
5891  *   Flow rule attributes.
5892  * @param[in] items
5893  *   Pattern specification (list terminated by the END pattern item).
5894  * @param[in] actions
5895  *   Associated actions (list terminated by the END action).
5896  * @param[in] flow_split_info
5897  *   Pointer to flow split info structure.
5898  * @param[out] error
5899  *   Perform verbose error reporting if not NULL.
5900  * @return
5901  *   0 on success, negative value otherwise
5902  */
5903 static int
5904 flow_create_split_meter(struct rte_eth_dev *dev,
5905                         struct rte_flow *flow,
5906                         const struct rte_flow_attr *attr,
5907                         const struct rte_flow_item items[],
5908                         const struct rte_flow_action actions[],
5909                         struct mlx5_flow_split_info *flow_split_info,
5910                         struct rte_flow_error *error)
5911 {
5912         struct mlx5_priv *priv = dev->data->dev_private;
5913         struct mlx5_flow_workspace *wks = mlx5_flow_get_thread_workspace();
5914         struct rte_flow_action *sfx_actions = NULL;
5915         struct rte_flow_action *pre_actions = NULL;
5916         struct rte_flow_item *sfx_items = NULL;
5917         struct mlx5_flow *dev_flow = NULL;
5918         struct rte_flow_attr sfx_attr = *attr;
5919         struct mlx5_flow_meter_info *fm = NULL;
5920         uint8_t skip_scale_restore;
5921         bool has_mtr = false;
5922         bool has_modify = false;
5923         bool set_mtr_reg = true;
5924         bool is_mtr_hierarchy = false;
5925         uint32_t meter_id = 0;
5926         uint32_t mtr_idx = 0;
5927         uint32_t mtr_flow_id = 0;
5928         size_t act_size;
5929         size_t item_size;
5930         int actions_n = 0;
5931         int ret = 0;
5932
5933         if (priv->mtr_en)
5934                 actions_n = flow_check_meter_action(dev, actions, &has_mtr,
5935                                                     &has_modify, &meter_id);
5936         if (has_mtr) {
5937                 if (flow->meter) {
5938                         fm = flow_dv_meter_find_by_idx(priv, flow->meter);
5939                         if (!fm)
5940                                 return rte_flow_error_set(error, EINVAL,
5941                                                 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
5942                                                 NULL, "Meter not found.");
5943                 } else {
5944                         fm = mlx5_flow_meter_find(priv, meter_id, &mtr_idx);
5945                         if (!fm)
5946                                 return rte_flow_error_set(error, EINVAL,
5947                                                 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
5948                                                 NULL, "Meter not found.");
5949                         ret = mlx5_flow_meter_attach(priv, fm,
5950                                                      &sfx_attr, error);
5951                         if (ret)
5952                                 return -rte_errno;
5953                         flow->meter = mtr_idx;
5954                 }
5955                 MLX5_ASSERT(wks);
5956                 wks->fm = fm;
5957                 if (!fm->def_policy) {
5958                         wks->policy = mlx5_flow_meter_policy_find(dev,
5959                                                                   fm->policy_id,
5960                                                                   NULL);
5961                         MLX5_ASSERT(wks->policy);
5962                         if (wks->policy->is_hierarchy) {
5963                                 wks->final_policy =
5964                                 mlx5_flow_meter_hierarchy_get_final_policy(dev,
5965                                                                 wks->policy);
5966                                 if (!wks->final_policy)
5967                                         return rte_flow_error_set(error,
5968                                         EINVAL,
5969                                         RTE_FLOW_ERROR_TYPE_ACTION, NULL,
5970                                 "Failed to find terminal policy of hierarchy.");
5971                                 is_mtr_hierarchy = true;
5972                         }
5973                 }
5974                 /*
5975                  * If it isn't default-policy Meter, and
5976                  * 1. There's no action in flow to change
5977                  *    packet (modify/encap/decap etc.), OR
5978                  * 2. No drop count needed for this meter.
5979                  * 3. It's not meter hierarchy.
5980                  * Then no need to use regC to save meter id anymore.
5981                  */
5982                 if (!fm->def_policy && !is_mtr_hierarchy &&
5983                     (!has_modify || !fm->drop_cnt))
5984                         set_mtr_reg = false;
5985                 /* Prefix actions: meter, decap, encap, tag, jump, end. */
5986                 act_size = sizeof(struct rte_flow_action) * (actions_n + 6) +
5987                            sizeof(struct mlx5_rte_flow_action_set_tag);
5988                 /* Suffix items: tag, vlan, port id, end. */
5989 #define METER_SUFFIX_ITEM 4
5990                 item_size = sizeof(struct rte_flow_item) * METER_SUFFIX_ITEM +
5991                             sizeof(struct mlx5_rte_flow_item_tag) * 2;
5992                 sfx_actions = mlx5_malloc(MLX5_MEM_ZERO, (act_size + item_size),
5993                                           0, SOCKET_ID_ANY);
5994                 if (!sfx_actions)
5995                         return rte_flow_error_set(error, ENOMEM,
5996                                                   RTE_FLOW_ERROR_TYPE_ACTION,
5997                                                   NULL, "no memory to split "
5998                                                   "meter flow");
5999                 sfx_items = (struct rte_flow_item *)((char *)sfx_actions +
6000                              act_size);
6001                 /* There's no suffix flow for meter of non-default policy. */
6002                 if (!fm->def_policy)
6003                         pre_actions = sfx_actions + 1;
6004                 else
6005                         pre_actions = sfx_actions + actions_n;
6006                 ret = flow_meter_split_prep(dev, flow, wks, &sfx_attr,
6007                                             items, sfx_items, actions,
6008                                             sfx_actions, pre_actions,
6009                                             (set_mtr_reg ? &mtr_flow_id : NULL),
6010                                             error);
6011                 if (ret) {
6012                         ret = -rte_errno;
6013                         goto exit;
6014                 }
6015                 /* Add the prefix subflow. */
6016                 flow_split_info->prefix_mark = 0;
6017                 skip_scale_restore = flow_split_info->skip_scale;
6018                 flow_split_info->skip_scale |=
6019                         1 << MLX5_SCALE_JUMP_FLOW_GROUP_BIT;
6020                 ret = flow_create_split_inner(dev, flow, &dev_flow,
6021                                               attr, items, pre_actions,
6022                                               flow_split_info, error);
6023                 flow_split_info->skip_scale = skip_scale_restore;
6024                 if (ret) {
6025                         if (mtr_flow_id)
6026                                 mlx5_ipool_free(fm->flow_ipool, mtr_flow_id);
6027                         ret = -rte_errno;
6028                         goto exit;
6029                 }
6030                 if (mtr_flow_id) {
6031                         dev_flow->handle->split_flow_id = mtr_flow_id;
6032                         dev_flow->handle->is_meter_flow_id = 1;
6033                 }
6034                 if (!fm->def_policy) {
6035                         if (!set_mtr_reg && fm->drop_cnt)
6036                                 ret =
6037                         flow_meter_create_drop_flow_with_org_pattern(dev, flow,
6038                                                         &sfx_attr, items,
6039                                                         flow_split_info,
6040                                                         fm, error);
6041                         goto exit;
6042                 }
6043                 /* Setting the sfx group atrr. */
6044                 sfx_attr.group = sfx_attr.transfer ?
6045                                 (MLX5_FLOW_TABLE_LEVEL_METER - 1) :
6046                                  MLX5_FLOW_TABLE_LEVEL_METER;
6047                 flow_split_info->prefix_layers =
6048                                 flow_get_prefix_layer_flags(dev_flow);
6049                 flow_split_info->prefix_mark = dev_flow->handle->mark;
6050                 flow_split_info->table_id = MLX5_MTR_TABLE_ID_SUFFIX;
6051         }
6052         /* Add the prefix subflow. */
6053         ret = flow_create_split_metadata(dev, flow,
6054                                          &sfx_attr, sfx_items ?
6055                                          sfx_items : items,
6056                                          sfx_actions ? sfx_actions : actions,
6057                                          flow_split_info, error);
6058 exit:
6059         if (sfx_actions)
6060                 mlx5_free(sfx_actions);
6061         return ret;
6062 }
6063
6064 /**
6065  * The splitting for sample feature.
6066  *
6067  * Once Sample action is detected in the action list, the flow actions should
6068  * be split into prefix sub flow and suffix sub flow.
6069  *
6070  * The original items remain in the prefix sub flow, all actions preceding the
6071  * sample action and the sample action itself will be copied to the prefix
6072  * sub flow, the actions following the sample action will be copied to the
6073  * suffix sub flow, Queue action always be located in the suffix sub flow.
6074  *
6075  * In order to make the packet from prefix sub flow matches with suffix sub
6076  * flow, an extra tag action be added into prefix sub flow, and the suffix sub
6077  * flow uses tag item with the unique flow id.
6078  *
6079  * @param dev
6080  *   Pointer to Ethernet device.
6081  * @param[in] flow
6082  *   Parent flow structure pointer.
6083  * @param[in] attr
6084  *   Flow rule attributes.
6085  * @param[in] items
6086  *   Pattern specification (list terminated by the END pattern item).
6087  * @param[in] actions
6088  *   Associated actions (list terminated by the END action).
6089  * @param[in] flow_split_info
6090  *   Pointer to flow split info structure.
6091  * @param[out] error
6092  *   Perform verbose error reporting if not NULL.
6093  * @return
6094  *   0 on success, negative value otherwise
6095  */
6096 static int
6097 flow_create_split_sample(struct rte_eth_dev *dev,
6098                          struct rte_flow *flow,
6099                          const struct rte_flow_attr *attr,
6100                          const struct rte_flow_item items[],
6101                          const struct rte_flow_action actions[],
6102                          struct mlx5_flow_split_info *flow_split_info,
6103                          struct rte_flow_error *error)
6104 {
6105         struct mlx5_priv *priv = dev->data->dev_private;
6106         struct rte_flow_action *sfx_actions = NULL;
6107         struct rte_flow_action *pre_actions = NULL;
6108         struct rte_flow_item *sfx_items = NULL;
6109         struct mlx5_flow *dev_flow = NULL;
6110         struct rte_flow_attr sfx_attr = *attr;
6111 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
6112         struct mlx5_flow_dv_sample_resource *sample_res;
6113         struct mlx5_flow_tbl_data_entry *sfx_tbl_data;
6114         struct mlx5_flow_tbl_resource *sfx_tbl;
6115 #endif
6116         size_t act_size;
6117         size_t item_size;
6118         uint32_t fdb_tx = 0;
6119         int32_t tag_id = 0;
6120         int actions_n = 0;
6121         int sample_action_pos;
6122         int qrss_action_pos;
6123         int add_tag = 0;
6124         int modify_after_mirror = 0;
6125         uint16_t jump_table = 0;
6126         const uint32_t next_ft_step = 1;
6127         int ret = 0;
6128
6129         if (priv->sampler_en)
6130                 actions_n = flow_check_match_action(actions, attr,
6131                                         RTE_FLOW_ACTION_TYPE_SAMPLE,
6132                                         &sample_action_pos, &qrss_action_pos,
6133                                         &modify_after_mirror);
6134         if (actions_n) {
6135                 /* The prefix actions must includes sample, tag, end. */
6136                 act_size = sizeof(struct rte_flow_action) * (actions_n * 2 + 1)
6137                            + sizeof(struct mlx5_rte_flow_action_set_tag);
6138                 item_size = sizeof(struct rte_flow_item) * SAMPLE_SUFFIX_ITEM +
6139                             sizeof(struct mlx5_rte_flow_item_tag) * 2;
6140                 sfx_actions = mlx5_malloc(MLX5_MEM_ZERO, (act_size +
6141                                           item_size), 0, SOCKET_ID_ANY);
6142                 if (!sfx_actions)
6143                         return rte_flow_error_set(error, ENOMEM,
6144                                                   RTE_FLOW_ERROR_TYPE_ACTION,
6145                                                   NULL, "no memory to split "
6146                                                   "sample flow");
6147                 /* The representor_id is UINT16_MAX for uplink. */
6148                 fdb_tx = (attr->transfer && priv->representor_id != UINT16_MAX);
6149                 /*
6150                  * When reg_c_preserve is set, metadata registers Cx preserve
6151                  * their value even through packet duplication.
6152                  */
6153                 add_tag = (!fdb_tx || priv->config.hca_attr.reg_c_preserve);
6154                 if (add_tag)
6155                         sfx_items = (struct rte_flow_item *)((char *)sfx_actions
6156                                         + act_size);
6157                 if (modify_after_mirror)
6158                         jump_table = attr->group * MLX5_FLOW_TABLE_FACTOR +
6159                                      next_ft_step;
6160                 pre_actions = sfx_actions + actions_n;
6161                 tag_id = flow_sample_split_prep(dev, add_tag, sfx_items,
6162                                                 actions, sfx_actions,
6163                                                 pre_actions, actions_n,
6164                                                 sample_action_pos,
6165                                                 qrss_action_pos, jump_table,
6166                                                 error);
6167                 if (tag_id < 0 || (add_tag && !tag_id)) {
6168                         ret = -rte_errno;
6169                         goto exit;
6170                 }
6171                 if (modify_after_mirror)
6172                         flow_split_info->skip_scale =
6173                                         1 << MLX5_SCALE_JUMP_FLOW_GROUP_BIT;
6174                 /* Add the prefix subflow. */
6175                 ret = flow_create_split_inner(dev, flow, &dev_flow, attr,
6176                                               items, pre_actions,
6177                                               flow_split_info, error);
6178                 if (ret) {
6179                         ret = -rte_errno;
6180                         goto exit;
6181                 }
6182                 dev_flow->handle->split_flow_id = tag_id;
6183 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
6184                 if (!modify_after_mirror) {
6185                         /* Set the sfx group attr. */
6186                         sample_res = (struct mlx5_flow_dv_sample_resource *)
6187                                                 dev_flow->dv.sample_res;
6188                         sfx_tbl = (struct mlx5_flow_tbl_resource *)
6189                                                 sample_res->normal_path_tbl;
6190                         sfx_tbl_data = container_of(sfx_tbl,
6191                                                 struct mlx5_flow_tbl_data_entry,
6192                                                 tbl);
6193                         sfx_attr.group = sfx_attr.transfer ?
6194                         (sfx_tbl_data->level - 1) : sfx_tbl_data->level;
6195                 } else {
6196                         MLX5_ASSERT(attr->transfer);
6197                         sfx_attr.group = jump_table;
6198                 }
6199                 flow_split_info->prefix_layers =
6200                                 flow_get_prefix_layer_flags(dev_flow);
6201                 flow_split_info->prefix_mark = dev_flow->handle->mark;
6202                 /* Suffix group level already be scaled with factor, set
6203                  * MLX5_SCALE_FLOW_GROUP_BIT of skip_scale to 1 to avoid scale
6204                  * again in translation.
6205                  */
6206                 flow_split_info->skip_scale = 1 << MLX5_SCALE_FLOW_GROUP_BIT;
6207 #endif
6208         }
6209         /* Add the suffix subflow. */
6210         ret = flow_create_split_meter(dev, flow, &sfx_attr,
6211                                       sfx_items ? sfx_items : items,
6212                                       sfx_actions ? sfx_actions : actions,
6213                                       flow_split_info, error);
6214 exit:
6215         if (sfx_actions)
6216                 mlx5_free(sfx_actions);
6217         return ret;
6218 }
6219
6220 /**
6221  * Split the flow to subflow set. The splitters might be linked
6222  * in the chain, like this:
6223  * flow_create_split_outer() calls:
6224  *   flow_create_split_meter() calls:
6225  *     flow_create_split_metadata(meter_subflow_0) calls:
6226  *       flow_create_split_inner(metadata_subflow_0)
6227  *       flow_create_split_inner(metadata_subflow_1)
6228  *       flow_create_split_inner(metadata_subflow_2)
6229  *     flow_create_split_metadata(meter_subflow_1) calls:
6230  *       flow_create_split_inner(metadata_subflow_0)
6231  *       flow_create_split_inner(metadata_subflow_1)
6232  *       flow_create_split_inner(metadata_subflow_2)
6233  *
6234  * This provide flexible way to add new levels of flow splitting.
6235  * The all of successfully created subflows are included to the
6236  * parent flow dev_flow list.
6237  *
6238  * @param dev
6239  *   Pointer to Ethernet device.
6240  * @param[in] flow
6241  *   Parent flow structure pointer.
6242  * @param[in] attr
6243  *   Flow rule attributes.
6244  * @param[in] items
6245  *   Pattern specification (list terminated by the END pattern item).
6246  * @param[in] actions
6247  *   Associated actions (list terminated by the END action).
6248  * @param[in] flow_split_info
6249  *   Pointer to flow split info structure.
6250  * @param[out] error
6251  *   Perform verbose error reporting if not NULL.
6252  * @return
6253  *   0 on success, negative value otherwise
6254  */
6255 static int
6256 flow_create_split_outer(struct rte_eth_dev *dev,
6257                         struct rte_flow *flow,
6258                         const struct rte_flow_attr *attr,
6259                         const struct rte_flow_item items[],
6260                         const struct rte_flow_action actions[],
6261                         struct mlx5_flow_split_info *flow_split_info,
6262                         struct rte_flow_error *error)
6263 {
6264         int ret;
6265
6266         ret = flow_create_split_sample(dev, flow, attr, items,
6267                                        actions, flow_split_info, error);
6268         MLX5_ASSERT(ret <= 0);
6269         return ret;
6270 }
6271
6272 static inline struct mlx5_flow_tunnel *
6273 flow_tunnel_from_rule(const struct mlx5_flow *flow)
6274 {
6275         struct mlx5_flow_tunnel *tunnel;
6276
6277 #pragma GCC diagnostic push
6278 #pragma GCC diagnostic ignored "-Wcast-qual"
6279         tunnel = (typeof(tunnel))flow->tunnel;
6280 #pragma GCC diagnostic pop
6281
6282         return tunnel;
6283 }
6284
6285 /**
6286  * Adjust flow RSS workspace if needed.
6287  *
6288  * @param wks
6289  *   Pointer to thread flow work space.
6290  * @param rss_desc
6291  *   Pointer to RSS descriptor.
6292  * @param[in] nrssq_num
6293  *   New RSS queue number.
6294  *
6295  * @return
6296  *   0 on success, -1 otherwise and rte_errno is set.
6297  */
6298 static int
6299 flow_rss_workspace_adjust(struct mlx5_flow_workspace *wks,
6300                           struct mlx5_flow_rss_desc *rss_desc,
6301                           uint32_t nrssq_num)
6302 {
6303         if (likely(nrssq_num <= wks->rssq_num))
6304                 return 0;
6305         rss_desc->queue = realloc(rss_desc->queue,
6306                           sizeof(*rss_desc->queue) * RTE_ALIGN(nrssq_num, 2));
6307         if (!rss_desc->queue) {
6308                 rte_errno = ENOMEM;
6309                 return -1;
6310         }
6311         wks->rssq_num = RTE_ALIGN(nrssq_num, 2);
6312         return 0;
6313 }
6314
6315 /**
6316  * Create a flow and add it to @p list.
6317  *
6318  * @param dev
6319  *   Pointer to Ethernet device.
6320  * @param list
6321  *   Pointer to a TAILQ flow list. If this parameter NULL,
6322  *   no list insertion occurred, flow is just created,
6323  *   this is caller's responsibility to track the
6324  *   created flow.
6325  * @param[in] attr
6326  *   Flow rule attributes.
6327  * @param[in] items
6328  *   Pattern specification (list terminated by the END pattern item).
6329  * @param[in] actions
6330  *   Associated actions (list terminated by the END action).
6331  * @param[in] external
6332  *   This flow rule is created by request external to PMD.
6333  * @param[out] error
6334  *   Perform verbose error reporting if not NULL.
6335  *
6336  * @return
6337  *   A flow index on success, 0 otherwise and rte_errno is set.
6338  */
6339 static uint32_t
6340 flow_list_create(struct rte_eth_dev *dev, enum mlx5_flow_type type,
6341                  const struct rte_flow_attr *attr,
6342                  const struct rte_flow_item items[],
6343                  const struct rte_flow_action original_actions[],
6344                  bool external, struct rte_flow_error *error)
6345 {
6346         struct mlx5_priv *priv = dev->data->dev_private;
6347         struct rte_flow *flow = NULL;
6348         struct mlx5_flow *dev_flow;
6349         const struct rte_flow_action_rss *rss = NULL;
6350         struct mlx5_translated_action_handle
6351                 indir_actions[MLX5_MAX_INDIRECT_ACTIONS];
6352         int indir_actions_n = MLX5_MAX_INDIRECT_ACTIONS;
6353         union {
6354                 struct mlx5_flow_expand_rss buf;
6355                 uint8_t buffer[4096];
6356         } expand_buffer;
6357         union {
6358                 struct rte_flow_action actions[MLX5_MAX_SPLIT_ACTIONS];
6359                 uint8_t buffer[2048];
6360         } actions_rx;
6361         union {
6362                 struct rte_flow_action actions[MLX5_MAX_SPLIT_ACTIONS];
6363                 uint8_t buffer[2048];
6364         } actions_hairpin_tx;
6365         union {
6366                 struct rte_flow_item items[MLX5_MAX_SPLIT_ITEMS];
6367                 uint8_t buffer[2048];
6368         } items_tx;
6369         struct mlx5_flow_expand_rss *buf = &expand_buffer.buf;
6370         struct mlx5_flow_rss_desc *rss_desc;
6371         const struct rte_flow_action *p_actions_rx;
6372         uint32_t i;
6373         uint32_t idx = 0;
6374         int hairpin_flow;
6375         struct rte_flow_attr attr_tx = { .priority = 0 };
6376         const struct rte_flow_action *actions;
6377         struct rte_flow_action *translated_actions = NULL;
6378         struct mlx5_flow_tunnel *tunnel;
6379         struct tunnel_default_miss_ctx default_miss_ctx = { 0, };
6380         struct mlx5_flow_workspace *wks = mlx5_flow_push_thread_workspace();
6381         struct mlx5_flow_split_info flow_split_info = {
6382                 .external = !!external,
6383                 .skip_scale = 0,
6384                 .flow_idx = 0,
6385                 .prefix_mark = 0,
6386                 .prefix_layers = 0,
6387                 .table_id = 0
6388         };
6389         int ret;
6390
6391         MLX5_ASSERT(wks);
6392         rss_desc = &wks->rss_desc;
6393         ret = flow_action_handles_translate(dev, original_actions,
6394                                             indir_actions,
6395                                             &indir_actions_n,
6396                                             &translated_actions, error);
6397         if (ret < 0) {
6398                 MLX5_ASSERT(translated_actions == NULL);
6399                 return 0;
6400         }
6401         actions = translated_actions ? translated_actions : original_actions;
6402         p_actions_rx = actions;
6403         hairpin_flow = flow_check_hairpin_split(dev, attr, actions);
6404         ret = flow_drv_validate(dev, attr, items, p_actions_rx,
6405                                 external, hairpin_flow, error);
6406         if (ret < 0)
6407                 goto error_before_hairpin_split;
6408         flow = mlx5_ipool_zmalloc(priv->flows[type], &idx);
6409         if (!flow) {
6410                 rte_errno = ENOMEM;
6411                 goto error_before_hairpin_split;
6412         }
6413         if (hairpin_flow > 0) {
6414                 if (hairpin_flow > MLX5_MAX_SPLIT_ACTIONS) {
6415                         rte_errno = EINVAL;
6416                         goto error_before_hairpin_split;
6417                 }
6418                 flow_hairpin_split(dev, actions, actions_rx.actions,
6419                                    actions_hairpin_tx.actions, items_tx.items,
6420                                    idx);
6421                 p_actions_rx = actions_rx.actions;
6422         }
6423         flow_split_info.flow_idx = idx;
6424         flow->drv_type = flow_get_drv_type(dev, attr);
6425         MLX5_ASSERT(flow->drv_type > MLX5_FLOW_TYPE_MIN &&
6426                     flow->drv_type < MLX5_FLOW_TYPE_MAX);
6427         memset(rss_desc, 0, offsetof(struct mlx5_flow_rss_desc, queue));
6428         /* RSS Action only works on NIC RX domain */
6429         if (attr->ingress && !attr->transfer)
6430                 rss = flow_get_rss_action(dev, p_actions_rx);
6431         if (rss) {
6432                 if (flow_rss_workspace_adjust(wks, rss_desc, rss->queue_num))
6433                         return 0;
6434                 /*
6435                  * The following information is required by
6436                  * mlx5_flow_hashfields_adjust() in advance.
6437                  */
6438                 rss_desc->level = rss->level;
6439                 /* RSS type 0 indicates default RSS type (RTE_ETH_RSS_IP). */
6440                 rss_desc->types = !rss->types ? RTE_ETH_RSS_IP : rss->types;
6441         }
6442         flow->dev_handles = 0;
6443         if (rss && rss->types) {
6444                 unsigned int graph_root;
6445
6446                 graph_root = find_graph_root(rss->level);
6447                 ret = mlx5_flow_expand_rss(buf, sizeof(expand_buffer.buffer),
6448                                            items, rss->types,
6449                                            mlx5_support_expansion, graph_root);
6450                 MLX5_ASSERT(ret > 0 &&
6451                        (unsigned int)ret < sizeof(expand_buffer.buffer));
6452                 if (rte_log_can_log(mlx5_logtype, RTE_LOG_DEBUG)) {
6453                         for (i = 0; i < buf->entries; ++i)
6454                                 mlx5_dbg__print_pattern(buf->entry[i].pattern);
6455                 }
6456         } else {
6457                 buf->entries = 1;
6458                 buf->entry[0].pattern = (void *)(uintptr_t)items;
6459         }
6460         rss_desc->shared_rss = flow_get_shared_rss_action(dev, indir_actions,
6461                                                       indir_actions_n);
6462         for (i = 0; i < buf->entries; ++i) {
6463                 /* Initialize flow split data. */
6464                 flow_split_info.prefix_layers = 0;
6465                 flow_split_info.prefix_mark = 0;
6466                 flow_split_info.skip_scale = 0;
6467                 /*
6468                  * The splitter may create multiple dev_flows,
6469                  * depending on configuration. In the simplest
6470                  * case it just creates unmodified original flow.
6471                  */
6472                 ret = flow_create_split_outer(dev, flow, attr,
6473                                               buf->entry[i].pattern,
6474                                               p_actions_rx, &flow_split_info,
6475                                               error);
6476                 if (ret < 0)
6477                         goto error;
6478                 if (is_flow_tunnel_steer_rule(wks->flows[0].tof_type)) {
6479                         ret = flow_tunnel_add_default_miss(dev, flow, attr,
6480                                                            p_actions_rx,
6481                                                            idx,
6482                                                            wks->flows[0].tunnel,
6483                                                            &default_miss_ctx,
6484                                                            error);
6485                         if (ret < 0) {
6486                                 mlx5_free(default_miss_ctx.queue);
6487                                 goto error;
6488                         }
6489                 }
6490         }
6491         /* Create the tx flow. */
6492         if (hairpin_flow) {
6493                 attr_tx.group = MLX5_HAIRPIN_TX_TABLE;
6494                 attr_tx.ingress = 0;
6495                 attr_tx.egress = 1;
6496                 dev_flow = flow_drv_prepare(dev, flow, &attr_tx, items_tx.items,
6497                                          actions_hairpin_tx.actions,
6498                                          idx, error);
6499                 if (!dev_flow)
6500                         goto error;
6501                 dev_flow->flow = flow;
6502                 dev_flow->external = 0;
6503                 SILIST_INSERT(&flow->dev_handles, dev_flow->handle_idx,
6504                               dev_flow->handle, next);
6505                 ret = flow_drv_translate(dev, dev_flow, &attr_tx,
6506                                          items_tx.items,
6507                                          actions_hairpin_tx.actions, error);
6508                 if (ret < 0)
6509                         goto error;
6510         }
6511         /*
6512          * Update the metadata register copy table. If extensive
6513          * metadata feature is enabled and registers are supported
6514          * we might create the extra rte_flow for each unique
6515          * MARK/FLAG action ID.
6516          *
6517          * The table is updated for ingress Flows only, because
6518          * the egress Flows belong to the different device and
6519          * copy table should be updated in peer NIC Rx domain.
6520          */
6521         if (attr->ingress &&
6522             (external || attr->group != MLX5_FLOW_MREG_CP_TABLE_GROUP)) {
6523                 ret = flow_mreg_update_copy_table(dev, flow, actions, error);
6524                 if (ret)
6525                         goto error;
6526         }
6527         /*
6528          * If the flow is external (from application) OR device is started,
6529          * OR mreg discover, then apply immediately.
6530          */
6531         if (external || dev->data->dev_started ||
6532             (attr->group == MLX5_FLOW_MREG_CP_TABLE_GROUP &&
6533              attr->priority == MLX5_FLOW_LOWEST_PRIO_INDICATOR)) {
6534                 ret = flow_drv_apply(dev, flow, error);
6535                 if (ret < 0)
6536                         goto error;
6537         }
6538         flow->type = type;
6539         flow_rxq_flags_set(dev, flow);
6540         rte_free(translated_actions);
6541         tunnel = flow_tunnel_from_rule(wks->flows);
6542         if (tunnel) {
6543                 flow->tunnel = 1;
6544                 flow->tunnel_id = tunnel->tunnel_id;
6545                 __atomic_add_fetch(&tunnel->refctn, 1, __ATOMIC_RELAXED);
6546                 mlx5_free(default_miss_ctx.queue);
6547         }
6548         mlx5_flow_pop_thread_workspace();
6549         return idx;
6550 error:
6551         MLX5_ASSERT(flow);
6552         ret = rte_errno; /* Save rte_errno before cleanup. */
6553         flow_mreg_del_copy_action(dev, flow);
6554         flow_drv_destroy(dev, flow);
6555         if (rss_desc->shared_rss)
6556                 __atomic_sub_fetch(&((struct mlx5_shared_action_rss *)
6557                         mlx5_ipool_get
6558                         (priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS],
6559                         rss_desc->shared_rss))->refcnt, 1, __ATOMIC_RELAXED);
6560         mlx5_ipool_free(priv->flows[type], idx);
6561         rte_errno = ret; /* Restore rte_errno. */
6562         ret = rte_errno;
6563         rte_errno = ret;
6564         mlx5_flow_pop_thread_workspace();
6565 error_before_hairpin_split:
6566         rte_free(translated_actions);
6567         return 0;
6568 }
6569
6570 /**
6571  * Create a dedicated flow rule on e-switch table 0 (root table), to direct all
6572  * incoming packets to table 1.
6573  *
6574  * Other flow rules, requested for group n, will be created in
6575  * e-switch table n+1.
6576  * Jump action to e-switch group n will be created to group n+1.
6577  *
6578  * Used when working in switchdev mode, to utilise advantages of table 1
6579  * and above.
6580  *
6581  * @param dev
6582  *   Pointer to Ethernet device.
6583  *
6584  * @return
6585  *   Pointer to flow on success, NULL otherwise and rte_errno is set.
6586  */
6587 struct rte_flow *
6588 mlx5_flow_create_esw_table_zero_flow(struct rte_eth_dev *dev)
6589 {
6590         const struct rte_flow_attr attr = {
6591                 .group = 0,
6592                 .priority = 0,
6593                 .ingress = 1,
6594                 .egress = 0,
6595                 .transfer = 1,
6596         };
6597         const struct rte_flow_item pattern = {
6598                 .type = RTE_FLOW_ITEM_TYPE_END,
6599         };
6600         struct rte_flow_action_jump jump = {
6601                 .group = 1,
6602         };
6603         const struct rte_flow_action actions[] = {
6604                 {
6605                         .type = RTE_FLOW_ACTION_TYPE_JUMP,
6606                         .conf = &jump,
6607                 },
6608                 {
6609                         .type = RTE_FLOW_ACTION_TYPE_END,
6610                 },
6611         };
6612         struct rte_flow_error error;
6613
6614         return (void *)(uintptr_t)flow_list_create(dev, MLX5_FLOW_TYPE_CTL,
6615                                                    &attr, &pattern,
6616                                                    actions, false, &error);
6617 }
6618
6619 /**
6620  * Create a dedicated flow rule on e-switch table 1, matches ESW manager
6621  * and sq number, directs all packets to peer vport.
6622  *
6623  * @param dev
6624  *   Pointer to Ethernet device.
6625  * @param txq
6626  *   Txq index.
6627  *
6628  * @return
6629  *   Flow ID on success, 0 otherwise and rte_errno is set.
6630  */
6631 uint32_t
6632 mlx5_flow_create_devx_sq_miss_flow(struct rte_eth_dev *dev, uint32_t txq)
6633 {
6634         struct rte_flow_attr attr = {
6635                 .group = 0,
6636                 .priority = MLX5_FLOW_LOWEST_PRIO_INDICATOR,
6637                 .ingress = 1,
6638                 .egress = 0,
6639                 .transfer = 1,
6640         };
6641         struct rte_flow_item_port_id port_spec = {
6642                 .id = MLX5_PORT_ESW_MGR,
6643         };
6644         struct mlx5_rte_flow_item_tx_queue txq_spec = {
6645                 .queue = txq,
6646         };
6647         struct rte_flow_item pattern[] = {
6648                 {
6649                         .type = RTE_FLOW_ITEM_TYPE_PORT_ID,
6650                         .spec = &port_spec,
6651                 },
6652                 {
6653                         .type = (enum rte_flow_item_type)
6654                                 MLX5_RTE_FLOW_ITEM_TYPE_TX_QUEUE,
6655                         .spec = &txq_spec,
6656                 },
6657                 {
6658                         .type = RTE_FLOW_ITEM_TYPE_END,
6659                 },
6660         };
6661         struct rte_flow_action_jump jump = {
6662                 .group = 1,
6663         };
6664         struct rte_flow_action_port_id port = {
6665                 .id = dev->data->port_id,
6666         };
6667         struct rte_flow_action actions[] = {
6668                 {
6669                         .type = RTE_FLOW_ACTION_TYPE_JUMP,
6670                         .conf = &jump,
6671                 },
6672                 {
6673                         .type = RTE_FLOW_ACTION_TYPE_END,
6674                 },
6675         };
6676         struct rte_flow_error error;
6677
6678         /*
6679          * Creates group 0, highest priority jump flow.
6680          * Matches txq to bypass kernel packets.
6681          */
6682         if (flow_list_create(dev, MLX5_FLOW_TYPE_CTL, &attr, pattern, actions,
6683                              false, &error) == 0)
6684                 return 0;
6685         /* Create group 1, lowest priority redirect flow for txq. */
6686         attr.group = 1;
6687         actions[0].conf = &port;
6688         actions[0].type = RTE_FLOW_ACTION_TYPE_PORT_ID;
6689         return flow_list_create(dev, MLX5_FLOW_TYPE_CTL, &attr, pattern,
6690                                 actions, false, &error);
6691 }
6692
6693 /**
6694  * Validate a flow supported by the NIC.
6695  *
6696  * @see rte_flow_validate()
6697  * @see rte_flow_ops
6698  */
6699 int
6700 mlx5_flow_validate(struct rte_eth_dev *dev,
6701                    const struct rte_flow_attr *attr,
6702                    const struct rte_flow_item items[],
6703                    const struct rte_flow_action original_actions[],
6704                    struct rte_flow_error *error)
6705 {
6706         int hairpin_flow;
6707         struct mlx5_translated_action_handle
6708                 indir_actions[MLX5_MAX_INDIRECT_ACTIONS];
6709         int indir_actions_n = MLX5_MAX_INDIRECT_ACTIONS;
6710         const struct rte_flow_action *actions;
6711         struct rte_flow_action *translated_actions = NULL;
6712         int ret = flow_action_handles_translate(dev, original_actions,
6713                                                 indir_actions,
6714                                                 &indir_actions_n,
6715                                                 &translated_actions, error);
6716
6717         if (ret)
6718                 return ret;
6719         actions = translated_actions ? translated_actions : original_actions;
6720         hairpin_flow = flow_check_hairpin_split(dev, attr, actions);
6721         ret = flow_drv_validate(dev, attr, items, actions,
6722                                 true, hairpin_flow, error);
6723         rte_free(translated_actions);
6724         return ret;
6725 }
6726
6727 /**
6728  * Create a flow.
6729  *
6730  * @see rte_flow_create()
6731  * @see rte_flow_ops
6732  */
6733 struct rte_flow *
6734 mlx5_flow_create(struct rte_eth_dev *dev,
6735                  const struct rte_flow_attr *attr,
6736                  const struct rte_flow_item items[],
6737                  const struct rte_flow_action actions[],
6738                  struct rte_flow_error *error)
6739 {
6740         /*
6741          * If the device is not started yet, it is not allowed to created a
6742          * flow from application. PMD default flows and traffic control flows
6743          * are not affected.
6744          */
6745         if (unlikely(!dev->data->dev_started)) {
6746                 DRV_LOG(DEBUG, "port %u is not started when "
6747                         "inserting a flow", dev->data->port_id);
6748                 rte_flow_error_set(error, ENODEV,
6749                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
6750                                    NULL,
6751                                    "port not started");
6752                 return NULL;
6753         }
6754
6755         return (void *)(uintptr_t)flow_list_create(dev, MLX5_FLOW_TYPE_GEN,
6756                                                    attr, items, actions,
6757                                                    true, error);
6758 }
6759
6760 /**
6761  * Destroy a flow in a list.
6762  *
6763  * @param dev
6764  *   Pointer to Ethernet device.
6765  * @param[in] flow_idx
6766  *   Index of flow to destroy.
6767  */
6768 static void
6769 flow_list_destroy(struct rte_eth_dev *dev, enum mlx5_flow_type type,
6770                   uint32_t flow_idx)
6771 {
6772         struct mlx5_priv *priv = dev->data->dev_private;
6773         struct rte_flow *flow = mlx5_ipool_get(priv->flows[type], flow_idx);
6774
6775         if (!flow)
6776                 return;
6777         MLX5_ASSERT(flow->type == type);
6778         /*
6779          * Update RX queue flags only if port is started, otherwise it is
6780          * already clean.
6781          */
6782         if (dev->data->dev_started)
6783                 flow_rxq_flags_trim(dev, flow);
6784         flow_drv_destroy(dev, flow);
6785         if (flow->tunnel) {
6786                 struct mlx5_flow_tunnel *tunnel;
6787
6788                 tunnel = mlx5_find_tunnel_id(dev, flow->tunnel_id);
6789                 RTE_VERIFY(tunnel);
6790                 if (!__atomic_sub_fetch(&tunnel->refctn, 1, __ATOMIC_RELAXED))
6791                         mlx5_flow_tunnel_free(dev, tunnel);
6792         }
6793         flow_mreg_del_copy_action(dev, flow);
6794         mlx5_ipool_free(priv->flows[type], flow_idx);
6795 }
6796
6797 /**
6798  * Destroy all flows.
6799  *
6800  * @param dev
6801  *   Pointer to Ethernet device.
6802  * @param type
6803  *   Flow type to be flushed.
6804  * @param active
6805  *   If flushing is called avtively.
6806  */
6807 void
6808 mlx5_flow_list_flush(struct rte_eth_dev *dev, enum mlx5_flow_type type,
6809                      bool active)
6810 {
6811         struct mlx5_priv *priv = dev->data->dev_private;
6812         uint32_t num_flushed = 0, fidx = 1;
6813         struct rte_flow *flow;
6814
6815         MLX5_IPOOL_FOREACH(priv->flows[type], fidx, flow) {
6816                 flow_list_destroy(dev, type, fidx);
6817                 num_flushed++;
6818         }
6819         if (active) {
6820                 DRV_LOG(INFO, "port %u: %u flows flushed before stopping",
6821                         dev->data->port_id, num_flushed);
6822         }
6823 }
6824
6825 /**
6826  * Stop all default actions for flows.
6827  *
6828  * @param dev
6829  *   Pointer to Ethernet device.
6830  */
6831 void
6832 mlx5_flow_stop_default(struct rte_eth_dev *dev)
6833 {
6834         flow_mreg_del_default_copy_action(dev);
6835         flow_rxq_flags_clear(dev);
6836 }
6837
6838 /**
6839  * Start all default actions for flows.
6840  *
6841  * @param dev
6842  *   Pointer to Ethernet device.
6843  * @return
6844  *   0 on success, a negative errno value otherwise and rte_errno is set.
6845  */
6846 int
6847 mlx5_flow_start_default(struct rte_eth_dev *dev)
6848 {
6849         struct rte_flow_error error;
6850
6851         /* Make sure default copy action (reg_c[0] -> reg_b) is created. */
6852         return flow_mreg_add_default_copy_action(dev, &error);
6853 }
6854
6855 /**
6856  * Release key of thread specific flow workspace data.
6857  */
6858 void
6859 flow_release_workspace(void *data)
6860 {
6861         struct mlx5_flow_workspace *wks = data;
6862         struct mlx5_flow_workspace *next;
6863
6864         while (wks) {
6865                 next = wks->next;
6866                 free(wks->rss_desc.queue);
6867                 free(wks);
6868                 wks = next;
6869         }
6870 }
6871
6872 /**
6873  * Get thread specific current flow workspace.
6874  *
6875  * @return pointer to thread specific flow workspace data, NULL on error.
6876  */
6877 struct mlx5_flow_workspace*
6878 mlx5_flow_get_thread_workspace(void)
6879 {
6880         struct mlx5_flow_workspace *data;
6881
6882         data = mlx5_flow_os_get_specific_workspace();
6883         MLX5_ASSERT(data && data->inuse);
6884         if (!data || !data->inuse)
6885                 DRV_LOG(ERR, "flow workspace not initialized.");
6886         return data;
6887 }
6888
6889 /**
6890  * Allocate and init new flow workspace.
6891  *
6892  * @return pointer to flow workspace data, NULL on error.
6893  */
6894 static struct mlx5_flow_workspace*
6895 flow_alloc_thread_workspace(void)
6896 {
6897         struct mlx5_flow_workspace *data = calloc(1, sizeof(*data));
6898
6899         if (!data) {
6900                 DRV_LOG(ERR, "Failed to allocate flow workspace "
6901                         "memory.");
6902                 return NULL;
6903         }
6904         data->rss_desc.queue = calloc(1,
6905                         sizeof(uint16_t) * MLX5_RSSQ_DEFAULT_NUM);
6906         if (!data->rss_desc.queue)
6907                 goto err;
6908         data->rssq_num = MLX5_RSSQ_DEFAULT_NUM;
6909         return data;
6910 err:
6911         if (data->rss_desc.queue)
6912                 free(data->rss_desc.queue);
6913         free(data);
6914         return NULL;
6915 }
6916
6917 /**
6918  * Get new thread specific flow workspace.
6919  *
6920  * If current workspace inuse, create new one and set as current.
6921  *
6922  * @return pointer to thread specific flow workspace data, NULL on error.
6923  */
6924 static struct mlx5_flow_workspace*
6925 mlx5_flow_push_thread_workspace(void)
6926 {
6927         struct mlx5_flow_workspace *curr;
6928         struct mlx5_flow_workspace *data;
6929
6930         curr = mlx5_flow_os_get_specific_workspace();
6931         if (!curr) {
6932                 data = flow_alloc_thread_workspace();
6933                 if (!data)
6934                         return NULL;
6935         } else if (!curr->inuse) {
6936                 data = curr;
6937         } else if (curr->next) {
6938                 data = curr->next;
6939         } else {
6940                 data = flow_alloc_thread_workspace();
6941                 if (!data)
6942                         return NULL;
6943                 curr->next = data;
6944                 data->prev = curr;
6945         }
6946         data->inuse = 1;
6947         data->flow_idx = 0;
6948         /* Set as current workspace */
6949         if (mlx5_flow_os_set_specific_workspace(data))
6950                 DRV_LOG(ERR, "Failed to set flow workspace to thread.");
6951         return data;
6952 }
6953
6954 /**
6955  * Close current thread specific flow workspace.
6956  *
6957  * If previous workspace available, set it as current.
6958  *
6959  * @return pointer to thread specific flow workspace data, NULL on error.
6960  */
6961 static void
6962 mlx5_flow_pop_thread_workspace(void)
6963 {
6964         struct mlx5_flow_workspace *data = mlx5_flow_get_thread_workspace();
6965
6966         if (!data)
6967                 return;
6968         if (!data->inuse) {
6969                 DRV_LOG(ERR, "Failed to close unused flow workspace.");
6970                 return;
6971         }
6972         data->inuse = 0;
6973         if (!data->prev)
6974                 return;
6975         if (mlx5_flow_os_set_specific_workspace(data->prev))
6976                 DRV_LOG(ERR, "Failed to set flow workspace to thread.");
6977 }
6978
6979 /**
6980  * Verify the flow list is empty
6981  *
6982  * @param dev
6983  *  Pointer to Ethernet device.
6984  *
6985  * @return the number of flows not released.
6986  */
6987 int
6988 mlx5_flow_verify(struct rte_eth_dev *dev __rte_unused)
6989 {
6990         struct mlx5_priv *priv = dev->data->dev_private;
6991         struct rte_flow *flow;
6992         uint32_t idx = 0;
6993         int ret = 0, i;
6994
6995         for (i = 0; i < MLX5_FLOW_TYPE_MAXI; i++) {
6996                 MLX5_IPOOL_FOREACH(priv->flows[i], idx, flow) {
6997                         DRV_LOG(DEBUG, "port %u flow %p still referenced",
6998                                 dev->data->port_id, (void *)flow);
6999                         ret++;
7000                 }
7001         }
7002         return ret;
7003 }
7004
7005 /**
7006  * Enable default hairpin egress flow.
7007  *
7008  * @param dev
7009  *   Pointer to Ethernet device.
7010  * @param queue
7011  *   The queue index.
7012  *
7013  * @return
7014  *   0 on success, a negative errno value otherwise and rte_errno is set.
7015  */
7016 int
7017 mlx5_ctrl_flow_source_queue(struct rte_eth_dev *dev,
7018                             uint32_t queue)
7019 {
7020         const struct rte_flow_attr attr = {
7021                 .egress = 1,
7022                 .priority = 0,
7023         };
7024         struct mlx5_rte_flow_item_tx_queue queue_spec = {
7025                 .queue = queue,
7026         };
7027         struct mlx5_rte_flow_item_tx_queue queue_mask = {
7028                 .queue = UINT32_MAX,
7029         };
7030         struct rte_flow_item items[] = {
7031                 {
7032                         .type = (enum rte_flow_item_type)
7033                                 MLX5_RTE_FLOW_ITEM_TYPE_TX_QUEUE,
7034                         .spec = &queue_spec,
7035                         .last = NULL,
7036                         .mask = &queue_mask,
7037                 },
7038                 {
7039                         .type = RTE_FLOW_ITEM_TYPE_END,
7040                 },
7041         };
7042         struct rte_flow_action_jump jump = {
7043                 .group = MLX5_HAIRPIN_TX_TABLE,
7044         };
7045         struct rte_flow_action actions[2];
7046         uint32_t flow_idx;
7047         struct rte_flow_error error;
7048
7049         actions[0].type = RTE_FLOW_ACTION_TYPE_JUMP;
7050         actions[0].conf = &jump;
7051         actions[1].type = RTE_FLOW_ACTION_TYPE_END;
7052         flow_idx = flow_list_create(dev, MLX5_FLOW_TYPE_CTL,
7053                                     &attr, items, actions, false, &error);
7054         if (!flow_idx) {
7055                 DRV_LOG(DEBUG,
7056                         "Failed to create ctrl flow: rte_errno(%d),"
7057                         " type(%d), message(%s)",
7058                         rte_errno, error.type,
7059                         error.message ? error.message : " (no stated reason)");
7060                 return -rte_errno;
7061         }
7062         return 0;
7063 }
7064
7065 /**
7066  * Enable a control flow configured from the control plane.
7067  *
7068  * @param dev
7069  *   Pointer to Ethernet device.
7070  * @param eth_spec
7071  *   An Ethernet flow spec to apply.
7072  * @param eth_mask
7073  *   An Ethernet flow mask to apply.
7074  * @param vlan_spec
7075  *   A VLAN flow spec to apply.
7076  * @param vlan_mask
7077  *   A VLAN flow mask to apply.
7078  *
7079  * @return
7080  *   0 on success, a negative errno value otherwise and rte_errno is set.
7081  */
7082 int
7083 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
7084                     struct rte_flow_item_eth *eth_spec,
7085                     struct rte_flow_item_eth *eth_mask,
7086                     struct rte_flow_item_vlan *vlan_spec,
7087                     struct rte_flow_item_vlan *vlan_mask)
7088 {
7089         struct mlx5_priv *priv = dev->data->dev_private;
7090         const struct rte_flow_attr attr = {
7091                 .ingress = 1,
7092                 .priority = MLX5_FLOW_LOWEST_PRIO_INDICATOR,
7093         };
7094         struct rte_flow_item items[] = {
7095                 {
7096                         .type = RTE_FLOW_ITEM_TYPE_ETH,
7097                         .spec = eth_spec,
7098                         .last = NULL,
7099                         .mask = eth_mask,
7100                 },
7101                 {
7102                         .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
7103                                               RTE_FLOW_ITEM_TYPE_END,
7104                         .spec = vlan_spec,
7105                         .last = NULL,
7106                         .mask = vlan_mask,
7107                 },
7108                 {
7109                         .type = RTE_FLOW_ITEM_TYPE_END,
7110                 },
7111         };
7112         uint16_t queue[priv->reta_idx_n];
7113         struct rte_flow_action_rss action_rss = {
7114                 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
7115                 .level = 0,
7116                 .types = priv->rss_conf.rss_hf,
7117                 .key_len = priv->rss_conf.rss_key_len,
7118                 .queue_num = priv->reta_idx_n,
7119                 .key = priv->rss_conf.rss_key,
7120                 .queue = queue,
7121         };
7122         struct rte_flow_action actions[] = {
7123                 {
7124                         .type = RTE_FLOW_ACTION_TYPE_RSS,
7125                         .conf = &action_rss,
7126                 },
7127                 {
7128                         .type = RTE_FLOW_ACTION_TYPE_END,
7129                 },
7130         };
7131         uint32_t flow_idx;
7132         struct rte_flow_error error;
7133         unsigned int i;
7134
7135         if (!priv->reta_idx_n || !priv->rxqs_n) {
7136                 return 0;
7137         }
7138         if (!(dev->data->dev_conf.rxmode.mq_mode & RTE_ETH_MQ_RX_RSS_FLAG))
7139                 action_rss.types = 0;
7140         for (i = 0; i != priv->reta_idx_n; ++i)
7141                 queue[i] = (*priv->reta_idx)[i];
7142         flow_idx = flow_list_create(dev, MLX5_FLOW_TYPE_CTL,
7143                                     &attr, items, actions, false, &error);
7144         if (!flow_idx)
7145                 return -rte_errno;
7146         return 0;
7147 }
7148
7149 /**
7150  * Enable a flow control configured from the control plane.
7151  *
7152  * @param dev
7153  *   Pointer to Ethernet device.
7154  * @param eth_spec
7155  *   An Ethernet flow spec to apply.
7156  * @param eth_mask
7157  *   An Ethernet flow mask to apply.
7158  *
7159  * @return
7160  *   0 on success, a negative errno value otherwise and rte_errno is set.
7161  */
7162 int
7163 mlx5_ctrl_flow(struct rte_eth_dev *dev,
7164                struct rte_flow_item_eth *eth_spec,
7165                struct rte_flow_item_eth *eth_mask)
7166 {
7167         return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
7168 }
7169
7170 /**
7171  * Create default miss flow rule matching lacp traffic
7172  *
7173  * @param dev
7174  *   Pointer to Ethernet device.
7175  * @param eth_spec
7176  *   An Ethernet flow spec to apply.
7177  *
7178  * @return
7179  *   0 on success, a negative errno value otherwise and rte_errno is set.
7180  */
7181 int
7182 mlx5_flow_lacp_miss(struct rte_eth_dev *dev)
7183 {
7184         /*
7185          * The LACP matching is done by only using ether type since using
7186          * a multicast dst mac causes kernel to give low priority to this flow.
7187          */
7188         static const struct rte_flow_item_eth lacp_spec = {
7189                 .type = RTE_BE16(0x8809),
7190         };
7191         static const struct rte_flow_item_eth lacp_mask = {
7192                 .type = 0xffff,
7193         };
7194         const struct rte_flow_attr attr = {
7195                 .ingress = 1,
7196         };
7197         struct rte_flow_item items[] = {
7198                 {
7199                         .type = RTE_FLOW_ITEM_TYPE_ETH,
7200                         .spec = &lacp_spec,
7201                         .mask = &lacp_mask,
7202                 },
7203                 {
7204                         .type = RTE_FLOW_ITEM_TYPE_END,
7205                 },
7206         };
7207         struct rte_flow_action actions[] = {
7208                 {
7209                         .type = (enum rte_flow_action_type)
7210                                 MLX5_RTE_FLOW_ACTION_TYPE_DEFAULT_MISS,
7211                 },
7212                 {
7213                         .type = RTE_FLOW_ACTION_TYPE_END,
7214                 },
7215         };
7216         struct rte_flow_error error;
7217         uint32_t flow_idx = flow_list_create(dev, MLX5_FLOW_TYPE_CTL,
7218                                         &attr, items, actions,
7219                                         false, &error);
7220
7221         if (!flow_idx)
7222                 return -rte_errno;
7223         return 0;
7224 }
7225
7226 /**
7227  * Destroy a flow.
7228  *
7229  * @see rte_flow_destroy()
7230  * @see rte_flow_ops
7231  */
7232 int
7233 mlx5_flow_destroy(struct rte_eth_dev *dev,
7234                   struct rte_flow *flow,
7235                   struct rte_flow_error *error __rte_unused)
7236 {
7237         flow_list_destroy(dev, MLX5_FLOW_TYPE_GEN,
7238                                 (uintptr_t)(void *)flow);
7239         return 0;
7240 }
7241
7242 /**
7243  * Destroy all flows.
7244  *
7245  * @see rte_flow_flush()
7246  * @see rte_flow_ops
7247  */
7248 int
7249 mlx5_flow_flush(struct rte_eth_dev *dev,
7250                 struct rte_flow_error *error __rte_unused)
7251 {
7252         mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_GEN, false);
7253         return 0;
7254 }
7255
7256 /**
7257  * Isolated mode.
7258  *
7259  * @see rte_flow_isolate()
7260  * @see rte_flow_ops
7261  */
7262 int
7263 mlx5_flow_isolate(struct rte_eth_dev *dev,
7264                   int enable,
7265                   struct rte_flow_error *error)
7266 {
7267         struct mlx5_priv *priv = dev->data->dev_private;
7268
7269         if (dev->data->dev_started) {
7270                 rte_flow_error_set(error, EBUSY,
7271                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
7272                                    NULL,
7273                                    "port must be stopped first");
7274                 return -rte_errno;
7275         }
7276         priv->isolated = !!enable;
7277         if (enable)
7278                 dev->dev_ops = &mlx5_dev_ops_isolate;
7279         else
7280                 dev->dev_ops = &mlx5_dev_ops;
7281
7282         dev->rx_descriptor_status = mlx5_rx_descriptor_status;
7283         dev->tx_descriptor_status = mlx5_tx_descriptor_status;
7284
7285         return 0;
7286 }
7287
7288 /**
7289  * Query a flow.
7290  *
7291  * @see rte_flow_query()
7292  * @see rte_flow_ops
7293  */
7294 static int
7295 flow_drv_query(struct rte_eth_dev *dev,
7296                uint32_t flow_idx,
7297                const struct rte_flow_action *actions,
7298                void *data,
7299                struct rte_flow_error *error)
7300 {
7301         struct mlx5_priv *priv = dev->data->dev_private;
7302         const struct mlx5_flow_driver_ops *fops;
7303         struct rte_flow *flow = mlx5_ipool_get(priv->flows[MLX5_FLOW_TYPE_GEN],
7304                                                flow_idx);
7305         enum mlx5_flow_drv_type ftype;
7306
7307         if (!flow) {
7308                 return rte_flow_error_set(error, ENOENT,
7309                           RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
7310                           NULL,
7311                           "invalid flow handle");
7312         }
7313         ftype = flow->drv_type;
7314         MLX5_ASSERT(ftype > MLX5_FLOW_TYPE_MIN && ftype < MLX5_FLOW_TYPE_MAX);
7315         fops = flow_get_drv_ops(ftype);
7316
7317         return fops->query(dev, flow, actions, data, error);
7318 }
7319
7320 /**
7321  * Query a flow.
7322  *
7323  * @see rte_flow_query()
7324  * @see rte_flow_ops
7325  */
7326 int
7327 mlx5_flow_query(struct rte_eth_dev *dev,
7328                 struct rte_flow *flow,
7329                 const struct rte_flow_action *actions,
7330                 void *data,
7331                 struct rte_flow_error *error)
7332 {
7333         int ret;
7334
7335         ret = flow_drv_query(dev, (uintptr_t)(void *)flow, actions, data,
7336                              error);
7337         if (ret < 0)
7338                 return ret;
7339         return 0;
7340 }
7341
7342 /**
7343  * Get rte_flow callbacks.
7344  *
7345  * @param dev
7346  *   Pointer to Ethernet device structure.
7347  * @param ops
7348  *   Pointer to operation-specific structure.
7349  *
7350  * @return 0
7351  */
7352 int
7353 mlx5_flow_ops_get(struct rte_eth_dev *dev __rte_unused,
7354                   const struct rte_flow_ops **ops)
7355 {
7356         *ops = &mlx5_flow_ops;
7357         return 0;
7358 }
7359
7360 /**
7361  * Validate meter policy actions.
7362  * Dispatcher for action type specific validation.
7363  *
7364  * @param[in] dev
7365  *   Pointer to the Ethernet device structure.
7366  * @param[in] action
7367  *   The meter policy action object to validate.
7368  * @param[in] attr
7369  *   Attributes of flow to determine steering domain.
7370  * @param[out] is_rss
7371  *   Is RSS or not.
7372  * @param[out] domain_bitmap
7373  *   Domain bitmap.
7374  * @param[out] is_def_policy
7375  *   Is default policy or not.
7376  * @param[out] error
7377  *   Perform verbose error reporting if not NULL. Initialized in case of
7378  *   error only.
7379  *
7380  * @return
7381  *   0 on success, otherwise negative errno value.
7382  */
7383 int
7384 mlx5_flow_validate_mtr_acts(struct rte_eth_dev *dev,
7385                         const struct rte_flow_action *actions[RTE_COLORS],
7386                         struct rte_flow_attr *attr,
7387                         bool *is_rss,
7388                         uint8_t *domain_bitmap,
7389                         uint8_t *policy_mode,
7390                         struct rte_mtr_error *error)
7391 {
7392         const struct mlx5_flow_driver_ops *fops;
7393
7394         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7395         return fops->validate_mtr_acts(dev, actions, attr, is_rss,
7396                                        domain_bitmap, policy_mode, error);
7397 }
7398
7399 /**
7400  * Destroy the meter table set.
7401  *
7402  * @param[in] dev
7403  *   Pointer to Ethernet device.
7404  * @param[in] mtr_policy
7405  *   Meter policy struct.
7406  */
7407 void
7408 mlx5_flow_destroy_mtr_acts(struct rte_eth_dev *dev,
7409                       struct mlx5_flow_meter_policy *mtr_policy)
7410 {
7411         const struct mlx5_flow_driver_ops *fops;
7412
7413         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7414         fops->destroy_mtr_acts(dev, mtr_policy);
7415 }
7416
7417 /**
7418  * Create policy action, lock free,
7419  * (mutex should be acquired by caller).
7420  * Dispatcher for action type specific call.
7421  *
7422  * @param[in] dev
7423  *   Pointer to the Ethernet device structure.
7424  * @param[in] mtr_policy
7425  *   Meter policy struct.
7426  * @param[in] action
7427  *   Action specification used to create meter actions.
7428  * @param[out] error
7429  *   Perform verbose error reporting if not NULL. Initialized in case of
7430  *   error only.
7431  *
7432  * @return
7433  *   0 on success, otherwise negative errno value.
7434  */
7435 int
7436 mlx5_flow_create_mtr_acts(struct rte_eth_dev *dev,
7437                       struct mlx5_flow_meter_policy *mtr_policy,
7438                       const struct rte_flow_action *actions[RTE_COLORS],
7439                       struct rte_mtr_error *error)
7440 {
7441         const struct mlx5_flow_driver_ops *fops;
7442
7443         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7444         return fops->create_mtr_acts(dev, mtr_policy, actions, error);
7445 }
7446
7447 /**
7448  * Create policy rules, lock free,
7449  * (mutex should be acquired by caller).
7450  * Dispatcher for action type specific call.
7451  *
7452  * @param[in] dev
7453  *   Pointer to the Ethernet device structure.
7454  * @param[in] mtr_policy
7455  *   Meter policy struct.
7456  *
7457  * @return
7458  *   0 on success, -1 otherwise.
7459  */
7460 int
7461 mlx5_flow_create_policy_rules(struct rte_eth_dev *dev,
7462                              struct mlx5_flow_meter_policy *mtr_policy)
7463 {
7464         const struct mlx5_flow_driver_ops *fops;
7465
7466         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7467         return fops->create_policy_rules(dev, mtr_policy);
7468 }
7469
7470 /**
7471  * Destroy policy rules, lock free,
7472  * (mutex should be acquired by caller).
7473  * Dispatcher for action type specific call.
7474  *
7475  * @param[in] dev
7476  *   Pointer to the Ethernet device structure.
7477  * @param[in] mtr_policy
7478  *   Meter policy struct.
7479  */
7480 void
7481 mlx5_flow_destroy_policy_rules(struct rte_eth_dev *dev,
7482                              struct mlx5_flow_meter_policy *mtr_policy)
7483 {
7484         const struct mlx5_flow_driver_ops *fops;
7485
7486         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7487         fops->destroy_policy_rules(dev, mtr_policy);
7488 }
7489
7490 /**
7491  * Destroy the default policy table set.
7492  *
7493  * @param[in] dev
7494  *   Pointer to Ethernet device.
7495  */
7496 void
7497 mlx5_flow_destroy_def_policy(struct rte_eth_dev *dev)
7498 {
7499         const struct mlx5_flow_driver_ops *fops;
7500
7501         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7502         fops->destroy_def_policy(dev);
7503 }
7504
7505 /**
7506  * Destroy the default policy table set.
7507  *
7508  * @param[in] dev
7509  *   Pointer to Ethernet device.
7510  *
7511  * @return
7512  *   0 on success, -1 otherwise.
7513  */
7514 int
7515 mlx5_flow_create_def_policy(struct rte_eth_dev *dev)
7516 {
7517         const struct mlx5_flow_driver_ops *fops;
7518
7519         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7520         return fops->create_def_policy(dev);
7521 }
7522
7523 /**
7524  * Create the needed meter and suffix tables.
7525  *
7526  * @param[in] dev
7527  *   Pointer to Ethernet device.
7528  *
7529  * @return
7530  *   0 on success, -1 otherwise.
7531  */
7532 int
7533 mlx5_flow_create_mtr_tbls(struct rte_eth_dev *dev,
7534                         struct mlx5_flow_meter_info *fm,
7535                         uint32_t mtr_idx,
7536                         uint8_t domain_bitmap)
7537 {
7538         const struct mlx5_flow_driver_ops *fops;
7539
7540         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7541         return fops->create_mtr_tbls(dev, fm, mtr_idx, domain_bitmap);
7542 }
7543
7544 /**
7545  * Destroy the meter table set.
7546  *
7547  * @param[in] dev
7548  *   Pointer to Ethernet device.
7549  * @param[in] tbl
7550  *   Pointer to the meter table set.
7551  */
7552 void
7553 mlx5_flow_destroy_mtr_tbls(struct rte_eth_dev *dev,
7554                            struct mlx5_flow_meter_info *fm)
7555 {
7556         const struct mlx5_flow_driver_ops *fops;
7557
7558         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7559         fops->destroy_mtr_tbls(dev, fm);
7560 }
7561
7562 /**
7563  * Destroy the global meter drop table.
7564  *
7565  * @param[in] dev
7566  *   Pointer to Ethernet device.
7567  */
7568 void
7569 mlx5_flow_destroy_mtr_drop_tbls(struct rte_eth_dev *dev)
7570 {
7571         const struct mlx5_flow_driver_ops *fops;
7572
7573         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7574         fops->destroy_mtr_drop_tbls(dev);
7575 }
7576
7577 /**
7578  * Destroy the sub policy table with RX queue.
7579  *
7580  * @param[in] dev
7581  *   Pointer to Ethernet device.
7582  * @param[in] mtr_policy
7583  *   Pointer to meter policy table.
7584  */
7585 void
7586 mlx5_flow_destroy_sub_policy_with_rxq(struct rte_eth_dev *dev,
7587                 struct mlx5_flow_meter_policy *mtr_policy)
7588 {
7589         const struct mlx5_flow_driver_ops *fops;
7590
7591         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7592         fops->destroy_sub_policy_with_rxq(dev, mtr_policy);
7593 }
7594
7595 /**
7596  * Allocate the needed aso flow meter id.
7597  *
7598  * @param[in] dev
7599  *   Pointer to Ethernet device.
7600  *
7601  * @return
7602  *   Index to aso flow meter on success, NULL otherwise.
7603  */
7604 uint32_t
7605 mlx5_flow_mtr_alloc(struct rte_eth_dev *dev)
7606 {
7607         const struct mlx5_flow_driver_ops *fops;
7608
7609         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7610         return fops->create_meter(dev);
7611 }
7612
7613 /**
7614  * Free the aso flow meter id.
7615  *
7616  * @param[in] dev
7617  *   Pointer to Ethernet device.
7618  * @param[in] mtr_idx
7619  *  Index to aso flow meter to be free.
7620  *
7621  * @return
7622  *   0 on success.
7623  */
7624 void
7625 mlx5_flow_mtr_free(struct rte_eth_dev *dev, uint32_t mtr_idx)
7626 {
7627         const struct mlx5_flow_driver_ops *fops;
7628
7629         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7630         fops->free_meter(dev, mtr_idx);
7631 }
7632
7633 /**
7634  * Allocate a counter.
7635  *
7636  * @param[in] dev
7637  *   Pointer to Ethernet device structure.
7638  *
7639  * @return
7640  *   Index to allocated counter  on success, 0 otherwise.
7641  */
7642 uint32_t
7643 mlx5_counter_alloc(struct rte_eth_dev *dev)
7644 {
7645         const struct mlx5_flow_driver_ops *fops;
7646         struct rte_flow_attr attr = { .transfer = 0 };
7647
7648         if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
7649                 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7650                 return fops->counter_alloc(dev);
7651         }
7652         DRV_LOG(ERR,
7653                 "port %u counter allocate is not supported.",
7654                  dev->data->port_id);
7655         return 0;
7656 }
7657
7658 /**
7659  * Free a counter.
7660  *
7661  * @param[in] dev
7662  *   Pointer to Ethernet device structure.
7663  * @param[in] cnt
7664  *   Index to counter to be free.
7665  */
7666 void
7667 mlx5_counter_free(struct rte_eth_dev *dev, uint32_t cnt)
7668 {
7669         const struct mlx5_flow_driver_ops *fops;
7670         struct rte_flow_attr attr = { .transfer = 0 };
7671
7672         if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
7673                 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7674                 fops->counter_free(dev, cnt);
7675                 return;
7676         }
7677         DRV_LOG(ERR,
7678                 "port %u counter free is not supported.",
7679                  dev->data->port_id);
7680 }
7681
7682 /**
7683  * Query counter statistics.
7684  *
7685  * @param[in] dev
7686  *   Pointer to Ethernet device structure.
7687  * @param[in] cnt
7688  *   Index to counter to query.
7689  * @param[in] clear
7690  *   Set to clear counter statistics.
7691  * @param[out] pkts
7692  *   The counter hits packets number to save.
7693  * @param[out] bytes
7694  *   The counter hits bytes number to save.
7695  *
7696  * @return
7697  *   0 on success, a negative errno value otherwise.
7698  */
7699 int
7700 mlx5_counter_query(struct rte_eth_dev *dev, uint32_t cnt,
7701                    bool clear, uint64_t *pkts, uint64_t *bytes)
7702 {
7703         const struct mlx5_flow_driver_ops *fops;
7704         struct rte_flow_attr attr = { .transfer = 0 };
7705
7706         if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
7707                 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7708                 return fops->counter_query(dev, cnt, clear, pkts, bytes);
7709         }
7710         DRV_LOG(ERR,
7711                 "port %u counter query is not supported.",
7712                  dev->data->port_id);
7713         return -ENOTSUP;
7714 }
7715
7716 /**
7717  * Allocate a new memory for the counter values wrapped by all the needed
7718  * management.
7719  *
7720  * @param[in] sh
7721  *   Pointer to mlx5_dev_ctx_shared object.
7722  *
7723  * @return
7724  *   0 on success, a negative errno value otherwise.
7725  */
7726 static int
7727 mlx5_flow_create_counter_stat_mem_mng(struct mlx5_dev_ctx_shared *sh)
7728 {
7729         struct mlx5_devx_mkey_attr mkey_attr;
7730         struct mlx5_counter_stats_mem_mng *mem_mng;
7731         volatile struct flow_counter_stats *raw_data;
7732         int raws_n = MLX5_CNT_CONTAINER_RESIZE + MLX5_MAX_PENDING_QUERIES;
7733         int size = (sizeof(struct flow_counter_stats) *
7734                         MLX5_COUNTERS_PER_POOL +
7735                         sizeof(struct mlx5_counter_stats_raw)) * raws_n +
7736                         sizeof(struct mlx5_counter_stats_mem_mng);
7737         size_t pgsize = rte_mem_page_size();
7738         uint8_t *mem;
7739         int i;
7740
7741         if (pgsize == (size_t)-1) {
7742                 DRV_LOG(ERR, "Failed to get mem page size");
7743                 rte_errno = ENOMEM;
7744                 return -ENOMEM;
7745         }
7746         mem = mlx5_malloc(MLX5_MEM_ZERO, size, pgsize, SOCKET_ID_ANY);
7747         if (!mem) {
7748                 rte_errno = ENOMEM;
7749                 return -ENOMEM;
7750         }
7751         mem_mng = (struct mlx5_counter_stats_mem_mng *)(mem + size) - 1;
7752         size = sizeof(*raw_data) * MLX5_COUNTERS_PER_POOL * raws_n;
7753         mem_mng->umem = mlx5_os_umem_reg(sh->cdev->ctx, mem, size,
7754                                                  IBV_ACCESS_LOCAL_WRITE);
7755         if (!mem_mng->umem) {
7756                 rte_errno = errno;
7757                 mlx5_free(mem);
7758                 return -rte_errno;
7759         }
7760         memset(&mkey_attr, 0, sizeof(mkey_attr));
7761         mkey_attr.addr = (uintptr_t)mem;
7762         mkey_attr.size = size;
7763         mkey_attr.umem_id = mlx5_os_get_umem_id(mem_mng->umem);
7764         mkey_attr.pd = sh->cdev->pdn;
7765         mkey_attr.relaxed_ordering_write = sh->cmng.relaxed_ordering_write;
7766         mkey_attr.relaxed_ordering_read = sh->cmng.relaxed_ordering_read;
7767         mem_mng->dm = mlx5_devx_cmd_mkey_create(sh->cdev->ctx, &mkey_attr);
7768         if (!mem_mng->dm) {
7769                 mlx5_os_umem_dereg(mem_mng->umem);
7770                 rte_errno = errno;
7771                 mlx5_free(mem);
7772                 return -rte_errno;
7773         }
7774         mem_mng->raws = (struct mlx5_counter_stats_raw *)(mem + size);
7775         raw_data = (volatile struct flow_counter_stats *)mem;
7776         for (i = 0; i < raws_n; ++i) {
7777                 mem_mng->raws[i].mem_mng = mem_mng;
7778                 mem_mng->raws[i].data = raw_data + i * MLX5_COUNTERS_PER_POOL;
7779         }
7780         for (i = 0; i < MLX5_MAX_PENDING_QUERIES; ++i)
7781                 LIST_INSERT_HEAD(&sh->cmng.free_stat_raws,
7782                                  mem_mng->raws + MLX5_CNT_CONTAINER_RESIZE + i,
7783                                  next);
7784         LIST_INSERT_HEAD(&sh->cmng.mem_mngs, mem_mng, next);
7785         sh->cmng.mem_mng = mem_mng;
7786         return 0;
7787 }
7788
7789 /**
7790  * Set the statistic memory to the new counter pool.
7791  *
7792  * @param[in] sh
7793  *   Pointer to mlx5_dev_ctx_shared object.
7794  * @param[in] pool
7795  *   Pointer to the pool to set the statistic memory.
7796  *
7797  * @return
7798  *   0 on success, a negative errno value otherwise.
7799  */
7800 static int
7801 mlx5_flow_set_counter_stat_mem(struct mlx5_dev_ctx_shared *sh,
7802                                struct mlx5_flow_counter_pool *pool)
7803 {
7804         struct mlx5_flow_counter_mng *cmng = &sh->cmng;
7805         /* Resize statistic memory once used out. */
7806         if (!(pool->index % MLX5_CNT_CONTAINER_RESIZE) &&
7807             mlx5_flow_create_counter_stat_mem_mng(sh)) {
7808                 DRV_LOG(ERR, "Cannot resize counter stat mem.");
7809                 return -1;
7810         }
7811         rte_spinlock_lock(&pool->sl);
7812         pool->raw = cmng->mem_mng->raws + pool->index %
7813                     MLX5_CNT_CONTAINER_RESIZE;
7814         rte_spinlock_unlock(&pool->sl);
7815         pool->raw_hw = NULL;
7816         return 0;
7817 }
7818
7819 #define MLX5_POOL_QUERY_FREQ_US 1000000
7820
7821 /**
7822  * Set the periodic procedure for triggering asynchronous batch queries for all
7823  * the counter pools.
7824  *
7825  * @param[in] sh
7826  *   Pointer to mlx5_dev_ctx_shared object.
7827  */
7828 void
7829 mlx5_set_query_alarm(struct mlx5_dev_ctx_shared *sh)
7830 {
7831         uint32_t pools_n, us;
7832
7833         pools_n = __atomic_load_n(&sh->cmng.n_valid, __ATOMIC_RELAXED);
7834         us = MLX5_POOL_QUERY_FREQ_US / pools_n;
7835         DRV_LOG(DEBUG, "Set alarm for %u pools each %u us", pools_n, us);
7836         if (rte_eal_alarm_set(us, mlx5_flow_query_alarm, sh)) {
7837                 sh->cmng.query_thread_on = 0;
7838                 DRV_LOG(ERR, "Cannot reinitialize query alarm");
7839         } else {
7840                 sh->cmng.query_thread_on = 1;
7841         }
7842 }
7843
7844 /**
7845  * The periodic procedure for triggering asynchronous batch queries for all the
7846  * counter pools. This function is probably called by the host thread.
7847  *
7848  * @param[in] arg
7849  *   The parameter for the alarm process.
7850  */
7851 void
7852 mlx5_flow_query_alarm(void *arg)
7853 {
7854         struct mlx5_dev_ctx_shared *sh = arg;
7855         int ret;
7856         uint16_t pool_index = sh->cmng.pool_index;
7857         struct mlx5_flow_counter_mng *cmng = &sh->cmng;
7858         struct mlx5_flow_counter_pool *pool;
7859         uint16_t n_valid;
7860
7861         if (sh->cmng.pending_queries >= MLX5_MAX_PENDING_QUERIES)
7862                 goto set_alarm;
7863         rte_spinlock_lock(&cmng->pool_update_sl);
7864         pool = cmng->pools[pool_index];
7865         n_valid = cmng->n_valid;
7866         rte_spinlock_unlock(&cmng->pool_update_sl);
7867         /* Set the statistic memory to the new created pool. */
7868         if ((!pool->raw && mlx5_flow_set_counter_stat_mem(sh, pool)))
7869                 goto set_alarm;
7870         if (pool->raw_hw)
7871                 /* There is a pool query in progress. */
7872                 goto set_alarm;
7873         pool->raw_hw =
7874                 LIST_FIRST(&sh->cmng.free_stat_raws);
7875         if (!pool->raw_hw)
7876                 /* No free counter statistics raw memory. */
7877                 goto set_alarm;
7878         /*
7879          * Identify the counters released between query trigger and query
7880          * handle more efficiently. The counter released in this gap period
7881          * should wait for a new round of query as the new arrived packets
7882          * will not be taken into account.
7883          */
7884         pool->query_gen++;
7885         ret = mlx5_devx_cmd_flow_counter_query(pool->min_dcs, 0,
7886                                                MLX5_COUNTERS_PER_POOL,
7887                                                NULL, NULL,
7888                                                pool->raw_hw->mem_mng->dm->id,
7889                                                (void *)(uintptr_t)
7890                                                pool->raw_hw->data,
7891                                                sh->devx_comp,
7892                                                (uint64_t)(uintptr_t)pool);
7893         if (ret) {
7894                 DRV_LOG(ERR, "Failed to trigger asynchronous query for dcs ID"
7895                         " %d", pool->min_dcs->id);
7896                 pool->raw_hw = NULL;
7897                 goto set_alarm;
7898         }
7899         LIST_REMOVE(pool->raw_hw, next);
7900         sh->cmng.pending_queries++;
7901         pool_index++;
7902         if (pool_index >= n_valid)
7903                 pool_index = 0;
7904 set_alarm:
7905         sh->cmng.pool_index = pool_index;
7906         mlx5_set_query_alarm(sh);
7907 }
7908
7909 /**
7910  * Check and callback event for new aged flow in the counter pool
7911  *
7912  * @param[in] sh
7913  *   Pointer to mlx5_dev_ctx_shared object.
7914  * @param[in] pool
7915  *   Pointer to Current counter pool.
7916  */
7917 static void
7918 mlx5_flow_aging_check(struct mlx5_dev_ctx_shared *sh,
7919                    struct mlx5_flow_counter_pool *pool)
7920 {
7921         struct mlx5_priv *priv;
7922         struct mlx5_flow_counter *cnt;
7923         struct mlx5_age_info *age_info;
7924         struct mlx5_age_param *age_param;
7925         struct mlx5_counter_stats_raw *cur = pool->raw_hw;
7926         struct mlx5_counter_stats_raw *prev = pool->raw;
7927         const uint64_t curr_time = MLX5_CURR_TIME_SEC;
7928         const uint32_t time_delta = curr_time - pool->time_of_last_age_check;
7929         uint16_t expected = AGE_CANDIDATE;
7930         uint32_t i;
7931
7932         pool->time_of_last_age_check = curr_time;
7933         for (i = 0; i < MLX5_COUNTERS_PER_POOL; ++i) {
7934                 cnt = MLX5_POOL_GET_CNT(pool, i);
7935                 age_param = MLX5_CNT_TO_AGE(cnt);
7936                 if (__atomic_load_n(&age_param->state,
7937                                     __ATOMIC_RELAXED) != AGE_CANDIDATE)
7938                         continue;
7939                 if (cur->data[i].hits != prev->data[i].hits) {
7940                         __atomic_store_n(&age_param->sec_since_last_hit, 0,
7941                                          __ATOMIC_RELAXED);
7942                         continue;
7943                 }
7944                 if (__atomic_add_fetch(&age_param->sec_since_last_hit,
7945                                        time_delta,
7946                                        __ATOMIC_RELAXED) <= age_param->timeout)
7947                         continue;
7948                 /**
7949                  * Hold the lock first, or if between the
7950                  * state AGE_TMOUT and tailq operation the
7951                  * release happened, the release procedure
7952                  * may delete a non-existent tailq node.
7953                  */
7954                 priv = rte_eth_devices[age_param->port_id].data->dev_private;
7955                 age_info = GET_PORT_AGE_INFO(priv);
7956                 rte_spinlock_lock(&age_info->aged_sl);
7957                 if (__atomic_compare_exchange_n(&age_param->state, &expected,
7958                                                 AGE_TMOUT, false,
7959                                                 __ATOMIC_RELAXED,
7960                                                 __ATOMIC_RELAXED)) {
7961                         TAILQ_INSERT_TAIL(&age_info->aged_counters, cnt, next);
7962                         MLX5_AGE_SET(age_info, MLX5_AGE_EVENT_NEW);
7963                 }
7964                 rte_spinlock_unlock(&age_info->aged_sl);
7965         }
7966         mlx5_age_event_prepare(sh);
7967 }
7968
7969 /**
7970  * Handler for the HW respond about ready values from an asynchronous batch
7971  * query. This function is probably called by the host thread.
7972  *
7973  * @param[in] sh
7974  *   The pointer to the shared device context.
7975  * @param[in] async_id
7976  *   The Devx async ID.
7977  * @param[in] status
7978  *   The status of the completion.
7979  */
7980 void
7981 mlx5_flow_async_pool_query_handle(struct mlx5_dev_ctx_shared *sh,
7982                                   uint64_t async_id, int status)
7983 {
7984         struct mlx5_flow_counter_pool *pool =
7985                 (struct mlx5_flow_counter_pool *)(uintptr_t)async_id;
7986         struct mlx5_counter_stats_raw *raw_to_free;
7987         uint8_t query_gen = pool->query_gen ^ 1;
7988         struct mlx5_flow_counter_mng *cmng = &sh->cmng;
7989         enum mlx5_counter_type cnt_type =
7990                 pool->is_aged ? MLX5_COUNTER_TYPE_AGE :
7991                                 MLX5_COUNTER_TYPE_ORIGIN;
7992
7993         if (unlikely(status)) {
7994                 raw_to_free = pool->raw_hw;
7995         } else {
7996                 raw_to_free = pool->raw;
7997                 if (pool->is_aged)
7998                         mlx5_flow_aging_check(sh, pool);
7999                 rte_spinlock_lock(&pool->sl);
8000                 pool->raw = pool->raw_hw;
8001                 rte_spinlock_unlock(&pool->sl);
8002                 /* Be sure the new raw counters data is updated in memory. */
8003                 rte_io_wmb();
8004                 if (!TAILQ_EMPTY(&pool->counters[query_gen])) {
8005                         rte_spinlock_lock(&cmng->csl[cnt_type]);
8006                         TAILQ_CONCAT(&cmng->counters[cnt_type],
8007                                      &pool->counters[query_gen], next);
8008                         rte_spinlock_unlock(&cmng->csl[cnt_type]);
8009                 }
8010         }
8011         LIST_INSERT_HEAD(&sh->cmng.free_stat_raws, raw_to_free, next);
8012         pool->raw_hw = NULL;
8013         sh->cmng.pending_queries--;
8014 }
8015
8016 static int
8017 flow_group_to_table(uint32_t port_id, uint32_t group, uint32_t *table,
8018                     const struct flow_grp_info *grp_info,
8019                     struct rte_flow_error *error)
8020 {
8021         if (grp_info->transfer && grp_info->external &&
8022             grp_info->fdb_def_rule) {
8023                 if (group == UINT32_MAX)
8024                         return rte_flow_error_set
8025                                                 (error, EINVAL,
8026                                                  RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
8027                                                  NULL,
8028                                                  "group index not supported");
8029                 *table = group + 1;
8030         } else {
8031                 *table = group;
8032         }
8033         DRV_LOG(DEBUG, "port %u group=%#x table=%#x", port_id, group, *table);
8034         return 0;
8035 }
8036
8037 /**
8038  * Translate the rte_flow group index to HW table value.
8039  *
8040  * If tunnel offload is disabled, all group ids converted to flow table
8041  * id using the standard method.
8042  * If tunnel offload is enabled, group id can be converted using the
8043  * standard or tunnel conversion method. Group conversion method
8044  * selection depends on flags in `grp_info` parameter:
8045  * - Internal (grp_info.external == 0) groups conversion uses the
8046  *   standard method.
8047  * - Group ids in JUMP action converted with the tunnel conversion.
8048  * - Group id in rule attribute conversion depends on a rule type and
8049  *   group id value:
8050  *   ** non zero group attributes converted with the tunnel method
8051  *   ** zero group attribute in non-tunnel rule is converted using the
8052  *      standard method - there's only one root table
8053  *   ** zero group attribute in steer tunnel rule is converted with the
8054  *      standard method - single root table
8055  *   ** zero group attribute in match tunnel rule is a special OvS
8056  *      case: that value is used for portability reasons. That group
8057  *      id is converted with the tunnel conversion method.
8058  *
8059  * @param[in] dev
8060  *   Port device
8061  * @param[in] tunnel
8062  *   PMD tunnel offload object
8063  * @param[in] group
8064  *   rte_flow group index value.
8065  * @param[out] table
8066  *   HW table value.
8067  * @param[in] grp_info
8068  *   flags used for conversion
8069  * @param[out] error
8070  *   Pointer to error structure.
8071  *
8072  * @return
8073  *   0 on success, a negative errno value otherwise and rte_errno is set.
8074  */
8075 int
8076 mlx5_flow_group_to_table(struct rte_eth_dev *dev,
8077                          const struct mlx5_flow_tunnel *tunnel,
8078                          uint32_t group, uint32_t *table,
8079                          const struct flow_grp_info *grp_info,
8080                          struct rte_flow_error *error)
8081 {
8082         int ret;
8083         bool standard_translation;
8084
8085         if (!grp_info->skip_scale && grp_info->external &&
8086             group < MLX5_MAX_TABLES_EXTERNAL)
8087                 group *= MLX5_FLOW_TABLE_FACTOR;
8088         if (is_tunnel_offload_active(dev)) {
8089                 standard_translation = !grp_info->external ||
8090                                         grp_info->std_tbl_fix;
8091         } else {
8092                 standard_translation = true;
8093         }
8094         DRV_LOG(DEBUG,
8095                 "port %u group=%u transfer=%d external=%d fdb_def_rule=%d translate=%s",
8096                 dev->data->port_id, group, grp_info->transfer,
8097                 grp_info->external, grp_info->fdb_def_rule,
8098                 standard_translation ? "STANDARD" : "TUNNEL");
8099         if (standard_translation)
8100                 ret = flow_group_to_table(dev->data->port_id, group, table,
8101                                           grp_info, error);
8102         else
8103                 ret = tunnel_flow_group_to_flow_table(dev, tunnel, group,
8104                                                       table, error);
8105
8106         return ret;
8107 }
8108
8109 /**
8110  * Discover availability of metadata reg_c's.
8111  *
8112  * Iteratively use test flows to check availability.
8113  *
8114  * @param[in] dev
8115  *   Pointer to the Ethernet device structure.
8116  *
8117  * @return
8118  *   0 on success, a negative errno value otherwise and rte_errno is set.
8119  */
8120 int
8121 mlx5_flow_discover_mreg_c(struct rte_eth_dev *dev)
8122 {
8123         struct mlx5_priv *priv = dev->data->dev_private;
8124         enum modify_reg idx;
8125         int n = 0;
8126
8127         /* reg_c[0] and reg_c[1] are reserved. */
8128         priv->sh->flow_mreg_c[n++] = REG_C_0;
8129         priv->sh->flow_mreg_c[n++] = REG_C_1;
8130         /* Discover availability of other reg_c's. */
8131         for (idx = REG_C_2; idx <= REG_C_7; ++idx) {
8132                 struct rte_flow_attr attr = {
8133                         .group = MLX5_FLOW_MREG_CP_TABLE_GROUP,
8134                         .priority = MLX5_FLOW_LOWEST_PRIO_INDICATOR,
8135                         .ingress = 1,
8136                 };
8137                 struct rte_flow_item items[] = {
8138                         [0] = {
8139                                 .type = RTE_FLOW_ITEM_TYPE_END,
8140                         },
8141                 };
8142                 struct rte_flow_action actions[] = {
8143                         [0] = {
8144                                 .type = (enum rte_flow_action_type)
8145                                         MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
8146                                 .conf = &(struct mlx5_flow_action_copy_mreg){
8147                                         .src = REG_C_1,
8148                                         .dst = idx,
8149                                 },
8150                         },
8151                         [1] = {
8152                                 .type = RTE_FLOW_ACTION_TYPE_JUMP,
8153                                 .conf = &(struct rte_flow_action_jump){
8154                                         .group = MLX5_FLOW_MREG_ACT_TABLE_GROUP,
8155                                 },
8156                         },
8157                         [2] = {
8158                                 .type = RTE_FLOW_ACTION_TYPE_END,
8159                         },
8160                 };
8161                 uint32_t flow_idx;
8162                 struct rte_flow *flow;
8163                 struct rte_flow_error error;
8164
8165                 if (!priv->config.dv_flow_en)
8166                         break;
8167                 /* Create internal flow, validation skips copy action. */
8168                 flow_idx = flow_list_create(dev, MLX5_FLOW_TYPE_GEN, &attr,
8169                                         items, actions, false, &error);
8170                 flow = mlx5_ipool_get(priv->flows[MLX5_FLOW_TYPE_GEN],
8171                                       flow_idx);
8172                 if (!flow)
8173                         continue;
8174                 priv->sh->flow_mreg_c[n++] = idx;
8175                 flow_list_destroy(dev, MLX5_FLOW_TYPE_GEN, flow_idx);
8176         }
8177         for (; n < MLX5_MREG_C_NUM; ++n)
8178                 priv->sh->flow_mreg_c[n] = REG_NON;
8179         priv->sh->metadata_regc_check_flag = 1;
8180         return 0;
8181 }
8182
8183 int
8184 save_dump_file(const uint8_t *data, uint32_t size,
8185         uint32_t type, uint64_t id, void *arg, FILE *file)
8186 {
8187         char line[BUF_SIZE];
8188         uint32_t out = 0;
8189         uint32_t k;
8190         uint32_t actions_num;
8191         struct rte_flow_query_count *count;
8192
8193         memset(line, 0, BUF_SIZE);
8194         switch (type) {
8195         case DR_DUMP_REC_TYPE_PMD_MODIFY_HDR:
8196                 actions_num = *(uint32_t *)(arg);
8197                 out += snprintf(line + out, BUF_SIZE - out, "%d,0x%" PRIx64 ",%d,",
8198                                 type, id, actions_num);
8199                 break;
8200         case DR_DUMP_REC_TYPE_PMD_PKT_REFORMAT:
8201                 out += snprintf(line + out, BUF_SIZE - out, "%d,0x%" PRIx64 ",",
8202                                 type, id);
8203                 break;
8204         case DR_DUMP_REC_TYPE_PMD_COUNTER:
8205                 count = (struct rte_flow_query_count *)arg;
8206                 fprintf(file,
8207                         "%d,0x%" PRIx64 ",%" PRIu64 ",%" PRIu64 "\n",
8208                         type, id, count->hits, count->bytes);
8209                 return 0;
8210         default:
8211                 return -1;
8212         }
8213
8214         for (k = 0; k < size; k++) {
8215                 /* Make sure we do not overrun the line buffer length. */
8216                 if (out >= BUF_SIZE - 4) {
8217                         line[out] = '\0';
8218                         break;
8219                 }
8220                 out += snprintf(line + out, BUF_SIZE - out, "%02x",
8221                                 (data[k]) & 0xff);
8222         }
8223         fprintf(file, "%s\n", line);
8224         return 0;
8225 }
8226
8227 int
8228 mlx5_flow_query_counter(struct rte_eth_dev *dev, struct rte_flow *flow,
8229         struct rte_flow_query_count *count, struct rte_flow_error *error)
8230 {
8231         struct rte_flow_action action[2];
8232         enum mlx5_flow_drv_type ftype;
8233         const struct mlx5_flow_driver_ops *fops;
8234
8235         if (!flow) {
8236                 return rte_flow_error_set(error, ENOENT,
8237                                 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8238                                 NULL,
8239                                 "invalid flow handle");
8240         }
8241         action[0].type = RTE_FLOW_ACTION_TYPE_COUNT;
8242         action[1].type = RTE_FLOW_ACTION_TYPE_END;
8243         if (flow->counter) {
8244                 memset(count, 0, sizeof(struct rte_flow_query_count));
8245                 ftype = (enum mlx5_flow_drv_type)(flow->drv_type);
8246                 MLX5_ASSERT(ftype > MLX5_FLOW_TYPE_MIN &&
8247                                                 ftype < MLX5_FLOW_TYPE_MAX);
8248                 fops = flow_get_drv_ops(ftype);
8249                 return fops->query(dev, flow, action, count, error);
8250         }
8251         return -1;
8252 }
8253
8254 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
8255 /**
8256  * Dump flow ipool data to file
8257  *
8258  * @param[in] dev
8259  *   The pointer to Ethernet device.
8260  * @param[in] file
8261  *   A pointer to a file for output.
8262  * @param[out] error
8263  *   Perform verbose error reporting if not NULL. PMDs initialize this
8264  *   structure in case of error only.
8265  * @return
8266  *   0 on success, a negative value otherwise.
8267  */
8268 int
8269 mlx5_flow_dev_dump_ipool(struct rte_eth_dev *dev,
8270         struct rte_flow *flow, FILE *file,
8271         struct rte_flow_error *error)
8272 {
8273         struct mlx5_priv *priv = dev->data->dev_private;
8274         struct mlx5_flow_dv_modify_hdr_resource  *modify_hdr;
8275         struct mlx5_flow_dv_encap_decap_resource *encap_decap;
8276         uint32_t handle_idx;
8277         struct mlx5_flow_handle *dh;
8278         struct rte_flow_query_count count;
8279         uint32_t actions_num;
8280         const uint8_t *data;
8281         size_t size;
8282         uint64_t id;
8283         uint32_t type;
8284         void *action = NULL;
8285
8286         if (!flow) {
8287                 return rte_flow_error_set(error, ENOENT,
8288                                 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8289                                 NULL,
8290                                 "invalid flow handle");
8291         }
8292         handle_idx = flow->dev_handles;
8293         while (handle_idx) {
8294                 dh = mlx5_ipool_get(priv->sh->ipool
8295                                 [MLX5_IPOOL_MLX5_FLOW], handle_idx);
8296                 if (!dh)
8297                         continue;
8298                 handle_idx = dh->next.next;
8299
8300                 /* query counter */
8301                 type = DR_DUMP_REC_TYPE_PMD_COUNTER;
8302                 flow_dv_query_count_ptr(dev, flow->counter,
8303                                                 &action, error);
8304                 if (action) {
8305                         id = (uint64_t)(uintptr_t)action;
8306                         if (!mlx5_flow_query_counter(dev, flow, &count, error))
8307                                 save_dump_file(NULL, 0, type,
8308                                                 id, (void *)&count, file);
8309                 }
8310                 /* Get modify_hdr and encap_decap buf from ipools. */
8311                 encap_decap = NULL;
8312                 modify_hdr = dh->dvh.modify_hdr;
8313
8314                 if (dh->dvh.rix_encap_decap) {
8315                         encap_decap = mlx5_ipool_get(priv->sh->ipool
8316                                                 [MLX5_IPOOL_DECAP_ENCAP],
8317                                                 dh->dvh.rix_encap_decap);
8318                 }
8319                 if (modify_hdr) {
8320                         data = (const uint8_t *)modify_hdr->actions;
8321                         size = (size_t)(modify_hdr->actions_num) * 8;
8322                         id = (uint64_t)(uintptr_t)modify_hdr->action;
8323                         actions_num = modify_hdr->actions_num;
8324                         type = DR_DUMP_REC_TYPE_PMD_MODIFY_HDR;
8325                         save_dump_file(data, size, type, id,
8326                                                 (void *)(&actions_num), file);
8327                 }
8328                 if (encap_decap) {
8329                         data = encap_decap->buf;
8330                         size = encap_decap->size;
8331                         id = (uint64_t)(uintptr_t)encap_decap->action;
8332                         type = DR_DUMP_REC_TYPE_PMD_PKT_REFORMAT;
8333                         save_dump_file(data, size, type,
8334                                                 id, NULL, file);
8335                 }
8336         }
8337         return 0;
8338 }
8339
8340 /**
8341  * Dump all flow's encap_decap/modify_hdr/counter data to file
8342  *
8343  * @param[in] dev
8344  *   The pointer to Ethernet device.
8345  * @param[in] file
8346  *   A pointer to a file for output.
8347  * @param[out] error
8348  *   Perform verbose error reporting if not NULL. PMDs initialize this
8349  *   structure in case of error only.
8350  * @return
8351  *   0 on success, a negative value otherwise.
8352  */
8353 static int
8354 mlx5_flow_dev_dump_sh_all(struct rte_eth_dev *dev,
8355         FILE *file, struct rte_flow_error *error)
8356 {
8357         struct mlx5_priv *priv = dev->data->dev_private;
8358         struct mlx5_dev_ctx_shared *sh = priv->sh;
8359         struct mlx5_hlist *h;
8360         struct mlx5_flow_dv_modify_hdr_resource  *modify_hdr;
8361         struct mlx5_flow_dv_encap_decap_resource *encap_decap;
8362         struct rte_flow_query_count count;
8363         uint32_t actions_num;
8364         const uint8_t *data;
8365         size_t size;
8366         uint64_t id;
8367         uint32_t type;
8368         uint32_t i;
8369         uint32_t j;
8370         struct mlx5_list_inconst *l_inconst;
8371         struct mlx5_list_entry *e;
8372         int lcore_index;
8373         struct mlx5_flow_counter_mng *cmng = &priv->sh->cmng;
8374         uint32_t max;
8375         void *action;
8376
8377         /* encap_decap hlist is lcore_share, get global core cache. */
8378         i = MLX5_LIST_GLOBAL;
8379         h = sh->encaps_decaps;
8380         if (h) {
8381                 for (j = 0; j <= h->mask; j++) {
8382                         l_inconst = &h->buckets[j].l;
8383                         if (!l_inconst || !l_inconst->cache[i])
8384                                 continue;
8385
8386                         e = LIST_FIRST(&l_inconst->cache[i]->h);
8387                         while (e) {
8388                                 encap_decap =
8389                                 (struct mlx5_flow_dv_encap_decap_resource *)e;
8390                                 data = encap_decap->buf;
8391                                 size = encap_decap->size;
8392                                 id = (uint64_t)(uintptr_t)encap_decap->action;
8393                                 type = DR_DUMP_REC_TYPE_PMD_PKT_REFORMAT;
8394                                 save_dump_file(data, size, type,
8395                                         id, NULL, file);
8396                                 e = LIST_NEXT(e, next);
8397                         }
8398                 }
8399         }
8400
8401         /* get modify_hdr */
8402         h = sh->modify_cmds;
8403         if (h) {
8404                 lcore_index = rte_lcore_index(rte_lcore_id());
8405                 if (unlikely(lcore_index == -1)) {
8406                         lcore_index = MLX5_LIST_NLCORE;
8407                         rte_spinlock_lock(&h->l_const.lcore_lock);
8408                 }
8409                 i = lcore_index;
8410
8411                 for (j = 0; j <= h->mask; j++) {
8412                         l_inconst = &h->buckets[j].l;
8413                         if (!l_inconst || !l_inconst->cache[i])
8414                                 continue;
8415
8416                         e = LIST_FIRST(&l_inconst->cache[i]->h);
8417                         while (e) {
8418                                 modify_hdr =
8419                                 (struct mlx5_flow_dv_modify_hdr_resource *)e;
8420                                 data = (const uint8_t *)modify_hdr->actions;
8421                                 size = (size_t)(modify_hdr->actions_num) * 8;
8422                                 actions_num = modify_hdr->actions_num;
8423                                 id = (uint64_t)(uintptr_t)modify_hdr->action;
8424                                 type = DR_DUMP_REC_TYPE_PMD_MODIFY_HDR;
8425                                 save_dump_file(data, size, type, id,
8426                                                 (void *)(&actions_num), file);
8427                                 e = LIST_NEXT(e, next);
8428                         }
8429                 }
8430
8431                 if (unlikely(lcore_index == MLX5_LIST_NLCORE))
8432                         rte_spinlock_unlock(&h->l_const.lcore_lock);
8433         }
8434
8435         /* get counter */
8436         MLX5_ASSERT(cmng->n_valid <= cmng->n);
8437         max = MLX5_COUNTERS_PER_POOL * cmng->n_valid;
8438         for (j = 1; j <= max; j++) {
8439                 action = NULL;
8440                 flow_dv_query_count_ptr(dev, j, &action, error);
8441                 if (action) {
8442                         if (!flow_dv_query_count(dev, j, &count, error)) {
8443                                 type = DR_DUMP_REC_TYPE_PMD_COUNTER;
8444                                 id = (uint64_t)(uintptr_t)action;
8445                                 save_dump_file(NULL, 0, type,
8446                                                 id, (void *)&count, file);
8447                         }
8448                 }
8449         }
8450         return 0;
8451 }
8452 #endif
8453
8454 /**
8455  * Dump flow raw hw data to file
8456  *
8457  * @param[in] dev
8458  *    The pointer to Ethernet device.
8459  * @param[in] file
8460  *   A pointer to a file for output.
8461  * @param[out] error
8462  *   Perform verbose error reporting if not NULL. PMDs initialize this
8463  *   structure in case of error only.
8464  * @return
8465  *   0 on success, a nagative value otherwise.
8466  */
8467 int
8468 mlx5_flow_dev_dump(struct rte_eth_dev *dev, struct rte_flow *flow_idx,
8469                    FILE *file,
8470                    struct rte_flow_error *error __rte_unused)
8471 {
8472         struct mlx5_priv *priv = dev->data->dev_private;
8473         struct mlx5_dev_ctx_shared *sh = priv->sh;
8474         uint32_t handle_idx;
8475         int ret;
8476         struct mlx5_flow_handle *dh;
8477         struct rte_flow *flow;
8478
8479         if (!priv->config.dv_flow_en) {
8480                 if (fputs("device dv flow disabled\n", file) <= 0)
8481                         return -errno;
8482                 return -ENOTSUP;
8483         }
8484
8485         /* dump all */
8486         if (!flow_idx) {
8487 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
8488                 if (mlx5_flow_dev_dump_sh_all(dev, file, error))
8489                         return -EINVAL;
8490 #endif
8491                 return mlx5_devx_cmd_flow_dump(sh->fdb_domain,
8492                                         sh->rx_domain,
8493                                         sh->tx_domain, file);
8494         }
8495         /* dump one */
8496         flow = mlx5_ipool_get(priv->flows[MLX5_FLOW_TYPE_GEN],
8497                         (uintptr_t)(void *)flow_idx);
8498         if (!flow)
8499                 return -EINVAL;
8500
8501 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
8502         mlx5_flow_dev_dump_ipool(dev, flow, file, error);
8503 #endif
8504         handle_idx = flow->dev_handles;
8505         while (handle_idx) {
8506                 dh = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW],
8507                                 handle_idx);
8508                 if (!dh)
8509                         return -ENOENT;
8510                 if (dh->drv_flow) {
8511                         ret = mlx5_devx_cmd_flow_single_dump(dh->drv_flow,
8512                                         file);
8513                         if (ret)
8514                                 return -ENOENT;
8515                 }
8516                 handle_idx = dh->next.next;
8517         }
8518         return 0;
8519 }
8520
8521 /**
8522  * Get aged-out flows.
8523  *
8524  * @param[in] dev
8525  *   Pointer to the Ethernet device structure.
8526  * @param[in] context
8527  *   The address of an array of pointers to the aged-out flows contexts.
8528  * @param[in] nb_countexts
8529  *   The length of context array pointers.
8530  * @param[out] error
8531  *   Perform verbose error reporting if not NULL. Initialized in case of
8532  *   error only.
8533  *
8534  * @return
8535  *   how many contexts get in success, otherwise negative errno value.
8536  *   if nb_contexts is 0, return the amount of all aged contexts.
8537  *   if nb_contexts is not 0 , return the amount of aged flows reported
8538  *   in the context array.
8539  */
8540 int
8541 mlx5_flow_get_aged_flows(struct rte_eth_dev *dev, void **contexts,
8542                         uint32_t nb_contexts, struct rte_flow_error *error)
8543 {
8544         const struct mlx5_flow_driver_ops *fops;
8545         struct rte_flow_attr attr = { .transfer = 0 };
8546
8547         if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
8548                 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8549                 return fops->get_aged_flows(dev, contexts, nb_contexts,
8550                                                     error);
8551         }
8552         DRV_LOG(ERR,
8553                 "port %u get aged flows is not supported.",
8554                  dev->data->port_id);
8555         return -ENOTSUP;
8556 }
8557
8558 /* Wrapper for driver action_validate op callback */
8559 static int
8560 flow_drv_action_validate(struct rte_eth_dev *dev,
8561                          const struct rte_flow_indir_action_conf *conf,
8562                          const struct rte_flow_action *action,
8563                          const struct mlx5_flow_driver_ops *fops,
8564                          struct rte_flow_error *error)
8565 {
8566         static const char err_msg[] = "indirect action validation unsupported";
8567
8568         if (!fops->action_validate) {
8569                 DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
8570                 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
8571                                    NULL, err_msg);
8572                 return -rte_errno;
8573         }
8574         return fops->action_validate(dev, conf, action, error);
8575 }
8576
8577 /**
8578  * Destroys the shared action by handle.
8579  *
8580  * @param dev
8581  *   Pointer to Ethernet device structure.
8582  * @param[in] handle
8583  *   Handle for the indirect action object to be destroyed.
8584  * @param[out] error
8585  *   Perform verbose error reporting if not NULL. PMDs initialize this
8586  *   structure in case of error only.
8587  *
8588  * @return
8589  *   0 on success, a negative errno value otherwise and rte_errno is set.
8590  *
8591  * @note: wrapper for driver action_create op callback.
8592  */
8593 static int
8594 mlx5_action_handle_destroy(struct rte_eth_dev *dev,
8595                            struct rte_flow_action_handle *handle,
8596                            struct rte_flow_error *error)
8597 {
8598         static const char err_msg[] = "indirect action destruction unsupported";
8599         struct rte_flow_attr attr = { .transfer = 0 };
8600         const struct mlx5_flow_driver_ops *fops =
8601                         flow_get_drv_ops(flow_get_drv_type(dev, &attr));
8602
8603         if (!fops->action_destroy) {
8604                 DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
8605                 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
8606                                    NULL, err_msg);
8607                 return -rte_errno;
8608         }
8609         return fops->action_destroy(dev, handle, error);
8610 }
8611
8612 /* Wrapper for driver action_destroy op callback */
8613 static int
8614 flow_drv_action_update(struct rte_eth_dev *dev,
8615                        struct rte_flow_action_handle *handle,
8616                        const void *update,
8617                        const struct mlx5_flow_driver_ops *fops,
8618                        struct rte_flow_error *error)
8619 {
8620         static const char err_msg[] = "indirect action update unsupported";
8621
8622         if (!fops->action_update) {
8623                 DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
8624                 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
8625                                    NULL, err_msg);
8626                 return -rte_errno;
8627         }
8628         return fops->action_update(dev, handle, update, error);
8629 }
8630
8631 /* Wrapper for driver action_destroy op callback */
8632 static int
8633 flow_drv_action_query(struct rte_eth_dev *dev,
8634                       const struct rte_flow_action_handle *handle,
8635                       void *data,
8636                       const struct mlx5_flow_driver_ops *fops,
8637                       struct rte_flow_error *error)
8638 {
8639         static const char err_msg[] = "indirect action query unsupported";
8640
8641         if (!fops->action_query) {
8642                 DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
8643                 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
8644                                    NULL, err_msg);
8645                 return -rte_errno;
8646         }
8647         return fops->action_query(dev, handle, data, error);
8648 }
8649
8650 /**
8651  * Create indirect action for reuse in multiple flow rules.
8652  *
8653  * @param dev
8654  *   Pointer to Ethernet device structure.
8655  * @param conf
8656  *   Pointer to indirect action object configuration.
8657  * @param[in] action
8658  *   Action configuration for indirect action object creation.
8659  * @param[out] error
8660  *   Perform verbose error reporting if not NULL. PMDs initialize this
8661  *   structure in case of error only.
8662  * @return
8663  *   A valid handle in case of success, NULL otherwise and rte_errno is set.
8664  */
8665 static struct rte_flow_action_handle *
8666 mlx5_action_handle_create(struct rte_eth_dev *dev,
8667                           const struct rte_flow_indir_action_conf *conf,
8668                           const struct rte_flow_action *action,
8669                           struct rte_flow_error *error)
8670 {
8671         static const char err_msg[] = "indirect action creation unsupported";
8672         struct rte_flow_attr attr = { .transfer = 0 };
8673         const struct mlx5_flow_driver_ops *fops =
8674                         flow_get_drv_ops(flow_get_drv_type(dev, &attr));
8675
8676         if (flow_drv_action_validate(dev, conf, action, fops, error))
8677                 return NULL;
8678         if (!fops->action_create) {
8679                 DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
8680                 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
8681                                    NULL, err_msg);
8682                 return NULL;
8683         }
8684         return fops->action_create(dev, conf, action, error);
8685 }
8686
8687 /**
8688  * Updates inplace the indirect action configuration pointed by *handle*
8689  * with the configuration provided as *update* argument.
8690  * The update of the indirect action configuration effects all flow rules
8691  * reusing the action via handle.
8692  *
8693  * @param dev
8694  *   Pointer to Ethernet device structure.
8695  * @param[in] handle
8696  *   Handle for the indirect action to be updated.
8697  * @param[in] update
8698  *   Action specification used to modify the action pointed by handle.
8699  *   *update* could be of same type with the action pointed by the *handle*
8700  *   handle argument, or some other structures like a wrapper, depending on
8701  *   the indirect action type.
8702  * @param[out] error
8703  *   Perform verbose error reporting if not NULL. PMDs initialize this
8704  *   structure in case of error only.
8705  *
8706  * @return
8707  *   0 on success, a negative errno value otherwise and rte_errno is set.
8708  */
8709 static int
8710 mlx5_action_handle_update(struct rte_eth_dev *dev,
8711                 struct rte_flow_action_handle *handle,
8712                 const void *update,
8713                 struct rte_flow_error *error)
8714 {
8715         struct rte_flow_attr attr = { .transfer = 0 };
8716         const struct mlx5_flow_driver_ops *fops =
8717                         flow_get_drv_ops(flow_get_drv_type(dev, &attr));
8718         int ret;
8719
8720         ret = flow_drv_action_validate(dev, NULL,
8721                         (const struct rte_flow_action *)update, fops, error);
8722         if (ret)
8723                 return ret;
8724         return flow_drv_action_update(dev, handle, update, fops,
8725                                       error);
8726 }
8727
8728 /**
8729  * Query the indirect action by handle.
8730  *
8731  * This function allows retrieving action-specific data such as counters.
8732  * Data is gathered by special action which may be present/referenced in
8733  * more than one flow rule definition.
8734  *
8735  * see @RTE_FLOW_ACTION_TYPE_COUNT
8736  *
8737  * @param dev
8738  *   Pointer to Ethernet device structure.
8739  * @param[in] handle
8740  *   Handle for the indirect action to query.
8741  * @param[in, out] data
8742  *   Pointer to storage for the associated query data type.
8743  * @param[out] error
8744  *   Perform verbose error reporting if not NULL. PMDs initialize this
8745  *   structure in case of error only.
8746  *
8747  * @return
8748  *   0 on success, a negative errno value otherwise and rte_errno is set.
8749  */
8750 static int
8751 mlx5_action_handle_query(struct rte_eth_dev *dev,
8752                          const struct rte_flow_action_handle *handle,
8753                          void *data,
8754                          struct rte_flow_error *error)
8755 {
8756         struct rte_flow_attr attr = { .transfer = 0 };
8757         const struct mlx5_flow_driver_ops *fops =
8758                         flow_get_drv_ops(flow_get_drv_type(dev, &attr));
8759
8760         return flow_drv_action_query(dev, handle, data, fops, error);
8761 }
8762
8763 /**
8764  * Destroy all indirect actions (shared RSS).
8765  *
8766  * @param dev
8767  *   Pointer to Ethernet device.
8768  *
8769  * @return
8770  *   0 on success, a negative errno value otherwise and rte_errno is set.
8771  */
8772 int
8773 mlx5_action_handle_flush(struct rte_eth_dev *dev)
8774 {
8775         struct rte_flow_error error;
8776         struct mlx5_priv *priv = dev->data->dev_private;
8777         struct mlx5_shared_action_rss *shared_rss;
8778         int ret = 0;
8779         uint32_t idx;
8780
8781         ILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS],
8782                       priv->rss_shared_actions, idx, shared_rss, next) {
8783                 ret |= mlx5_action_handle_destroy(dev,
8784                        (struct rte_flow_action_handle *)(uintptr_t)idx, &error);
8785         }
8786         return ret;
8787 }
8788
8789 #ifndef HAVE_MLX5DV_DR
8790 #define MLX5_DOMAIN_SYNC_FLOW ((1 << 0) | (1 << 1))
8791 #else
8792 #define MLX5_DOMAIN_SYNC_FLOW \
8793         (MLX5DV_DR_DOMAIN_SYNC_FLAGS_SW | MLX5DV_DR_DOMAIN_SYNC_FLAGS_HW)
8794 #endif
8795
8796 int rte_pmd_mlx5_sync_flow(uint16_t port_id, uint32_t domains)
8797 {
8798         struct rte_eth_dev *dev = &rte_eth_devices[port_id];
8799         const struct mlx5_flow_driver_ops *fops;
8800         int ret;
8801         struct rte_flow_attr attr = { .transfer = 0 };
8802
8803         fops = flow_get_drv_ops(flow_get_drv_type(dev, &attr));
8804         ret = fops->sync_domain(dev, domains, MLX5_DOMAIN_SYNC_FLOW);
8805         if (ret > 0)
8806                 ret = -ret;
8807         return ret;
8808 }
8809
8810 const struct mlx5_flow_tunnel *
8811 mlx5_get_tof(const struct rte_flow_item *item,
8812              const struct rte_flow_action *action,
8813              enum mlx5_tof_rule_type *rule_type)
8814 {
8815         for (; item->type != RTE_FLOW_ITEM_TYPE_END; item++) {
8816                 if (item->type == (typeof(item->type))
8817                                   MLX5_RTE_FLOW_ITEM_TYPE_TUNNEL) {
8818                         *rule_type = MLX5_TUNNEL_OFFLOAD_MATCH_RULE;
8819                         return flow_items_to_tunnel(item);
8820                 }
8821         }
8822         for (; action->conf != RTE_FLOW_ACTION_TYPE_END; action++) {
8823                 if (action->type == (typeof(action->type))
8824                                     MLX5_RTE_FLOW_ACTION_TYPE_TUNNEL_SET) {
8825                         *rule_type = MLX5_TUNNEL_OFFLOAD_SET_RULE;
8826                         return flow_actions_to_tunnel(action);
8827                 }
8828         }
8829         return NULL;
8830 }
8831
8832 /**
8833  * tunnel offload functionalilty is defined for DV environment only
8834  */
8835 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
8836 __extension__
8837 union tunnel_offload_mark {
8838         uint32_t val;
8839         struct {
8840                 uint32_t app_reserve:8;
8841                 uint32_t table_id:15;
8842                 uint32_t transfer:1;
8843                 uint32_t _unused_:8;
8844         };
8845 };
8846
8847 static bool
8848 mlx5_access_tunnel_offload_db
8849         (struct rte_eth_dev *dev,
8850          bool (*match)(struct rte_eth_dev *,
8851                        struct mlx5_flow_tunnel *, const void *),
8852          void (*hit)(struct rte_eth_dev *, struct mlx5_flow_tunnel *, void *),
8853          void (*miss)(struct rte_eth_dev *, void *),
8854          void *ctx, bool lock_op);
8855
8856 static int
8857 flow_tunnel_add_default_miss(struct rte_eth_dev *dev,
8858                              struct rte_flow *flow,
8859                              const struct rte_flow_attr *attr,
8860                              const struct rte_flow_action *app_actions,
8861                              uint32_t flow_idx,
8862                              const struct mlx5_flow_tunnel *tunnel,
8863                              struct tunnel_default_miss_ctx *ctx,
8864                              struct rte_flow_error *error)
8865 {
8866         struct mlx5_priv *priv = dev->data->dev_private;
8867         struct mlx5_flow *dev_flow;
8868         struct rte_flow_attr miss_attr = *attr;
8869         const struct rte_flow_item miss_items[2] = {
8870                 {
8871                         .type = RTE_FLOW_ITEM_TYPE_ETH,
8872                         .spec = NULL,
8873                         .last = NULL,
8874                         .mask = NULL
8875                 },
8876                 {
8877                         .type = RTE_FLOW_ITEM_TYPE_END,
8878                         .spec = NULL,
8879                         .last = NULL,
8880                         .mask = NULL
8881                 }
8882         };
8883         union tunnel_offload_mark mark_id;
8884         struct rte_flow_action_mark miss_mark;
8885         struct rte_flow_action miss_actions[3] = {
8886                 [0] = { .type = RTE_FLOW_ACTION_TYPE_MARK, .conf = &miss_mark },
8887                 [2] = { .type = RTE_FLOW_ACTION_TYPE_END,  .conf = NULL }
8888         };
8889         const struct rte_flow_action_jump *jump_data;
8890         uint32_t i, flow_table = 0; /* prevent compilation warning */
8891         struct flow_grp_info grp_info = {
8892                 .external = 1,
8893                 .transfer = attr->transfer,
8894                 .fdb_def_rule = !!priv->fdb_def_rule,
8895                 .std_tbl_fix = 0,
8896         };
8897         int ret;
8898
8899         if (!attr->transfer) {
8900                 uint32_t q_size;
8901
8902                 miss_actions[1].type = RTE_FLOW_ACTION_TYPE_RSS;
8903                 q_size = priv->reta_idx_n * sizeof(ctx->queue[0]);
8904                 ctx->queue = mlx5_malloc(MLX5_MEM_SYS | MLX5_MEM_ZERO, q_size,
8905                                          0, SOCKET_ID_ANY);
8906                 if (!ctx->queue)
8907                         return rte_flow_error_set
8908                                 (error, ENOMEM,
8909                                 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
8910                                 NULL, "invalid default miss RSS");
8911                 ctx->action_rss.func = RTE_ETH_HASH_FUNCTION_DEFAULT,
8912                 ctx->action_rss.level = 0,
8913                 ctx->action_rss.types = priv->rss_conf.rss_hf,
8914                 ctx->action_rss.key_len = priv->rss_conf.rss_key_len,
8915                 ctx->action_rss.queue_num = priv->reta_idx_n,
8916                 ctx->action_rss.key = priv->rss_conf.rss_key,
8917                 ctx->action_rss.queue = ctx->queue;
8918                 if (!priv->reta_idx_n || !priv->rxqs_n)
8919                         return rte_flow_error_set
8920                                 (error, EINVAL,
8921                                 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
8922                                 NULL, "invalid port configuration");
8923                 if (!(dev->data->dev_conf.rxmode.mq_mode & RTE_ETH_MQ_RX_RSS_FLAG))
8924                         ctx->action_rss.types = 0;
8925                 for (i = 0; i != priv->reta_idx_n; ++i)
8926                         ctx->queue[i] = (*priv->reta_idx)[i];
8927         } else {
8928                 miss_actions[1].type = RTE_FLOW_ACTION_TYPE_JUMP;
8929                 ctx->miss_jump.group = MLX5_TNL_MISS_FDB_JUMP_GRP;
8930         }
8931         miss_actions[1].conf = (typeof(miss_actions[1].conf))ctx->raw;
8932         for (; app_actions->type != RTE_FLOW_ACTION_TYPE_JUMP; app_actions++);
8933         jump_data = app_actions->conf;
8934         miss_attr.priority = MLX5_TNL_MISS_RULE_PRIORITY;
8935         miss_attr.group = jump_data->group;
8936         ret = mlx5_flow_group_to_table(dev, tunnel, jump_data->group,
8937                                        &flow_table, &grp_info, error);
8938         if (ret)
8939                 return rte_flow_error_set(error, EINVAL,
8940                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
8941                                           NULL, "invalid tunnel id");
8942         mark_id.app_reserve = 0;
8943         mark_id.table_id = tunnel_flow_tbl_to_id(flow_table);
8944         mark_id.transfer = !!attr->transfer;
8945         mark_id._unused_ = 0;
8946         miss_mark.id = mark_id.val;
8947         dev_flow = flow_drv_prepare(dev, flow, &miss_attr,
8948                                     miss_items, miss_actions, flow_idx, error);
8949         if (!dev_flow)
8950                 return -rte_errno;
8951         dev_flow->flow = flow;
8952         dev_flow->external = true;
8953         dev_flow->tunnel = tunnel;
8954         dev_flow->tof_type = MLX5_TUNNEL_OFFLOAD_MISS_RULE;
8955         /* Subflow object was created, we must include one in the list. */
8956         SILIST_INSERT(&flow->dev_handles, dev_flow->handle_idx,
8957                       dev_flow->handle, next);
8958         DRV_LOG(DEBUG,
8959                 "port %u tunnel type=%d id=%u miss rule priority=%u group=%u",
8960                 dev->data->port_id, tunnel->app_tunnel.type,
8961                 tunnel->tunnel_id, miss_attr.priority, miss_attr.group);
8962         ret = flow_drv_translate(dev, dev_flow, &miss_attr, miss_items,
8963                                   miss_actions, error);
8964         if (!ret)
8965                 ret = flow_mreg_update_copy_table(dev, flow, miss_actions,
8966                                                   error);
8967
8968         return ret;
8969 }
8970
8971 static const struct mlx5_flow_tbl_data_entry  *
8972 tunnel_mark_decode(struct rte_eth_dev *dev, uint32_t mark)
8973 {
8974         struct mlx5_priv *priv = dev->data->dev_private;
8975         struct mlx5_dev_ctx_shared *sh = priv->sh;
8976         struct mlx5_list_entry *he;
8977         union tunnel_offload_mark mbits = { .val = mark };
8978         union mlx5_flow_tbl_key table_key = {
8979                 {
8980                         .level = tunnel_id_to_flow_tbl(mbits.table_id),
8981                         .id = 0,
8982                         .reserved = 0,
8983                         .dummy = 0,
8984                         .is_fdb = !!mbits.transfer,
8985                         .is_egress = 0,
8986                 }
8987         };
8988         struct mlx5_flow_cb_ctx ctx = {
8989                 .data = &table_key.v64,
8990         };
8991
8992         he = mlx5_hlist_lookup(sh->flow_tbls, table_key.v64, &ctx);
8993         return he ?
8994                container_of(he, struct mlx5_flow_tbl_data_entry, entry) : NULL;
8995 }
8996
8997 static void
8998 mlx5_flow_tunnel_grp2tbl_remove_cb(void *tool_ctx,
8999                                    struct mlx5_list_entry *entry)
9000 {
9001         struct mlx5_dev_ctx_shared *sh = tool_ctx;
9002         struct tunnel_tbl_entry *tte = container_of(entry, typeof(*tte), hash);
9003
9004         mlx5_ipool_free(sh->ipool[MLX5_IPOOL_TNL_TBL_ID],
9005                         tunnel_flow_tbl_to_id(tte->flow_table));
9006         mlx5_free(tte);
9007 }
9008
9009 static int
9010 mlx5_flow_tunnel_grp2tbl_match_cb(void *tool_ctx __rte_unused,
9011                                   struct mlx5_list_entry *entry, void *cb_ctx)
9012 {
9013         struct mlx5_flow_cb_ctx *ctx = cb_ctx;
9014         union tunnel_tbl_key tbl = {
9015                 .val = *(uint64_t *)(ctx->data),
9016         };
9017         struct tunnel_tbl_entry *tte = container_of(entry, typeof(*tte), hash);
9018
9019         return tbl.tunnel_id != tte->tunnel_id || tbl.group != tte->group;
9020 }
9021
9022 static struct mlx5_list_entry *
9023 mlx5_flow_tunnel_grp2tbl_create_cb(void *tool_ctx, void *cb_ctx)
9024 {
9025         struct mlx5_dev_ctx_shared *sh = tool_ctx;
9026         struct mlx5_flow_cb_ctx *ctx = cb_ctx;
9027         struct tunnel_tbl_entry *tte;
9028         union tunnel_tbl_key tbl = {
9029                 .val = *(uint64_t *)(ctx->data),
9030         };
9031
9032         tte = mlx5_malloc(MLX5_MEM_SYS | MLX5_MEM_ZERO,
9033                           sizeof(*tte), 0,
9034                           SOCKET_ID_ANY);
9035         if (!tte)
9036                 goto err;
9037         mlx5_ipool_malloc(sh->ipool[MLX5_IPOOL_TNL_TBL_ID],
9038                           &tte->flow_table);
9039         if (tte->flow_table >= MLX5_MAX_TABLES) {
9040                 DRV_LOG(ERR, "Tunnel TBL ID %d exceed max limit.",
9041                         tte->flow_table);
9042                 mlx5_ipool_free(sh->ipool[MLX5_IPOOL_TNL_TBL_ID],
9043                                 tte->flow_table);
9044                 goto err;
9045         } else if (!tte->flow_table) {
9046                 goto err;
9047         }
9048         tte->flow_table = tunnel_id_to_flow_tbl(tte->flow_table);
9049         tte->tunnel_id = tbl.tunnel_id;
9050         tte->group = tbl.group;
9051         return &tte->hash;
9052 err:
9053         if (tte)
9054                 mlx5_free(tte);
9055         return NULL;
9056 }
9057
9058 static struct mlx5_list_entry *
9059 mlx5_flow_tunnel_grp2tbl_clone_cb(void *tool_ctx __rte_unused,
9060                                   struct mlx5_list_entry *oentry,
9061                                   void *cb_ctx __rte_unused)
9062 {
9063         struct tunnel_tbl_entry *tte = mlx5_malloc(MLX5_MEM_SYS, sizeof(*tte),
9064                                                    0, SOCKET_ID_ANY);
9065
9066         if (!tte)
9067                 return NULL;
9068         memcpy(tte, oentry, sizeof(*tte));
9069         return &tte->hash;
9070 }
9071
9072 static void
9073 mlx5_flow_tunnel_grp2tbl_clone_free_cb(void *tool_ctx __rte_unused,
9074                                        struct mlx5_list_entry *entry)
9075 {
9076         struct tunnel_tbl_entry *tte = container_of(entry, typeof(*tte), hash);
9077
9078         mlx5_free(tte);
9079 }
9080
9081 static uint32_t
9082 tunnel_flow_group_to_flow_table(struct rte_eth_dev *dev,
9083                                 const struct mlx5_flow_tunnel *tunnel,
9084                                 uint32_t group, uint32_t *table,
9085                                 struct rte_flow_error *error)
9086 {
9087         struct mlx5_list_entry *he;
9088         struct tunnel_tbl_entry *tte;
9089         union tunnel_tbl_key key = {
9090                 .tunnel_id = tunnel ? tunnel->tunnel_id : 0,
9091                 .group = group
9092         };
9093         struct mlx5_flow_tunnel_hub *thub = mlx5_tunnel_hub(dev);
9094         struct mlx5_hlist *group_hash;
9095         struct mlx5_flow_cb_ctx ctx = {
9096                 .data = &key.val,
9097         };
9098
9099         group_hash = tunnel ? tunnel->groups : thub->groups;
9100         he = mlx5_hlist_register(group_hash, key.val, &ctx);
9101         if (!he)
9102                 return rte_flow_error_set(error, EINVAL,
9103                                           RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
9104                                           NULL,
9105                                           "tunnel group index not supported");
9106         tte = container_of(he, typeof(*tte), hash);
9107         *table = tte->flow_table;
9108         DRV_LOG(DEBUG, "port %u tunnel %u group=%#x table=%#x",
9109                 dev->data->port_id, key.tunnel_id, group, *table);
9110         return 0;
9111 }
9112
9113 static void
9114 mlx5_flow_tunnel_free(struct rte_eth_dev *dev,
9115                       struct mlx5_flow_tunnel *tunnel)
9116 {
9117         struct mlx5_priv *priv = dev->data->dev_private;
9118         struct mlx5_indexed_pool *ipool;
9119
9120         DRV_LOG(DEBUG, "port %u release pmd tunnel id=0x%x",
9121                 dev->data->port_id, tunnel->tunnel_id);
9122         LIST_REMOVE(tunnel, chain);
9123         mlx5_hlist_destroy(tunnel->groups);
9124         ipool = priv->sh->ipool[MLX5_IPOOL_TUNNEL_ID];
9125         mlx5_ipool_free(ipool, tunnel->tunnel_id);
9126 }
9127
9128 static bool
9129 mlx5_access_tunnel_offload_db
9130         (struct rte_eth_dev *dev,
9131          bool (*match)(struct rte_eth_dev *,
9132                        struct mlx5_flow_tunnel *, const void *),
9133          void (*hit)(struct rte_eth_dev *, struct mlx5_flow_tunnel *, void *),
9134          void (*miss)(struct rte_eth_dev *, void *),
9135          void *ctx, bool lock_op)
9136 {
9137         bool verdict = false;
9138         struct mlx5_flow_tunnel_hub *thub = mlx5_tunnel_hub(dev);
9139         struct mlx5_flow_tunnel *tunnel;
9140
9141         rte_spinlock_lock(&thub->sl);
9142         LIST_FOREACH(tunnel, &thub->tunnels, chain) {
9143                 verdict = match(dev, tunnel, (const void *)ctx);
9144                 if (verdict)
9145                         break;
9146         }
9147         if (!lock_op)
9148                 rte_spinlock_unlock(&thub->sl);
9149         if (verdict && hit)
9150                 hit(dev, tunnel, ctx);
9151         if (!verdict && miss)
9152                 miss(dev, ctx);
9153         if (lock_op)
9154                 rte_spinlock_unlock(&thub->sl);
9155
9156         return verdict;
9157 }
9158
9159 struct tunnel_db_find_tunnel_id_ctx {
9160         uint32_t tunnel_id;
9161         struct mlx5_flow_tunnel *tunnel;
9162 };
9163
9164 static bool
9165 find_tunnel_id_match(struct rte_eth_dev *dev,
9166                      struct mlx5_flow_tunnel *tunnel, const void *x)
9167 {
9168         const struct tunnel_db_find_tunnel_id_ctx *ctx = x;
9169
9170         RTE_SET_USED(dev);
9171         return tunnel->tunnel_id == ctx->tunnel_id;
9172 }
9173
9174 static void
9175 find_tunnel_id_hit(struct rte_eth_dev *dev,
9176                    struct mlx5_flow_tunnel *tunnel, void *x)
9177 {
9178         struct tunnel_db_find_tunnel_id_ctx *ctx = x;
9179         RTE_SET_USED(dev);
9180         ctx->tunnel = tunnel;
9181 }
9182
9183 static struct mlx5_flow_tunnel *
9184 mlx5_find_tunnel_id(struct rte_eth_dev *dev, uint32_t id)
9185 {
9186         struct tunnel_db_find_tunnel_id_ctx ctx = {
9187                 .tunnel_id = id,
9188         };
9189
9190         mlx5_access_tunnel_offload_db(dev, find_tunnel_id_match,
9191                                       find_tunnel_id_hit, NULL, &ctx, true);
9192
9193         return ctx.tunnel;
9194 }
9195
9196 static struct mlx5_flow_tunnel *
9197 mlx5_flow_tunnel_allocate(struct rte_eth_dev *dev,
9198                           const struct rte_flow_tunnel *app_tunnel)
9199 {
9200         struct mlx5_priv *priv = dev->data->dev_private;
9201         struct mlx5_indexed_pool *ipool;
9202         struct mlx5_flow_tunnel *tunnel;
9203         uint32_t id;
9204
9205         ipool = priv->sh->ipool[MLX5_IPOOL_TUNNEL_ID];
9206         tunnel = mlx5_ipool_zmalloc(ipool, &id);
9207         if (!tunnel)
9208                 return NULL;
9209         if (id >= MLX5_MAX_TUNNELS) {
9210                 mlx5_ipool_free(ipool, id);
9211                 DRV_LOG(ERR, "Tunnel ID %d exceed max limit.", id);
9212                 return NULL;
9213         }
9214         tunnel->groups = mlx5_hlist_create("tunnel groups", 64, false, true,
9215                                            priv->sh,
9216                                            mlx5_flow_tunnel_grp2tbl_create_cb,
9217                                            mlx5_flow_tunnel_grp2tbl_match_cb,
9218                                            mlx5_flow_tunnel_grp2tbl_remove_cb,
9219                                            mlx5_flow_tunnel_grp2tbl_clone_cb,
9220                                         mlx5_flow_tunnel_grp2tbl_clone_free_cb);
9221         if (!tunnel->groups) {
9222                 mlx5_ipool_free(ipool, id);
9223                 return NULL;
9224         }
9225         /* initiate new PMD tunnel */
9226         memcpy(&tunnel->app_tunnel, app_tunnel, sizeof(*app_tunnel));
9227         tunnel->tunnel_id = id;
9228         tunnel->action.type = (typeof(tunnel->action.type))
9229                               MLX5_RTE_FLOW_ACTION_TYPE_TUNNEL_SET;
9230         tunnel->action.conf = tunnel;
9231         tunnel->item.type = (typeof(tunnel->item.type))
9232                             MLX5_RTE_FLOW_ITEM_TYPE_TUNNEL;
9233         tunnel->item.spec = tunnel;
9234         tunnel->item.last = NULL;
9235         tunnel->item.mask = NULL;
9236
9237         DRV_LOG(DEBUG, "port %u new pmd tunnel id=0x%x",
9238                 dev->data->port_id, tunnel->tunnel_id);
9239
9240         return tunnel;
9241 }
9242
9243 struct tunnel_db_get_tunnel_ctx {
9244         const struct rte_flow_tunnel *app_tunnel;
9245         struct mlx5_flow_tunnel *tunnel;
9246 };
9247
9248 static bool get_tunnel_match(struct rte_eth_dev *dev,
9249                              struct mlx5_flow_tunnel *tunnel, const void *x)
9250 {
9251         const struct tunnel_db_get_tunnel_ctx *ctx = x;
9252
9253         RTE_SET_USED(dev);
9254         return !memcmp(ctx->app_tunnel, &tunnel->app_tunnel,
9255                        sizeof(*ctx->app_tunnel));
9256 }
9257
9258 static void get_tunnel_hit(struct rte_eth_dev *dev,
9259                            struct mlx5_flow_tunnel *tunnel, void *x)
9260 {
9261         /* called under tunnel spinlock protection */
9262         struct tunnel_db_get_tunnel_ctx *ctx = x;
9263
9264         RTE_SET_USED(dev);
9265         tunnel->refctn++;
9266         ctx->tunnel = tunnel;
9267 }
9268
9269 static void get_tunnel_miss(struct rte_eth_dev *dev, void *x)
9270 {
9271         /* called under tunnel spinlock protection */
9272         struct mlx5_flow_tunnel_hub *thub = mlx5_tunnel_hub(dev);
9273         struct tunnel_db_get_tunnel_ctx *ctx = x;
9274
9275         rte_spinlock_unlock(&thub->sl);
9276         ctx->tunnel = mlx5_flow_tunnel_allocate(dev, ctx->app_tunnel);
9277         rte_spinlock_lock(&thub->sl);
9278         if (ctx->tunnel) {
9279                 ctx->tunnel->refctn = 1;
9280                 LIST_INSERT_HEAD(&thub->tunnels, ctx->tunnel, chain);
9281         }
9282 }
9283
9284
9285 static int
9286 mlx5_get_flow_tunnel(struct rte_eth_dev *dev,
9287                      const struct rte_flow_tunnel *app_tunnel,
9288                      struct mlx5_flow_tunnel **tunnel)
9289 {
9290         struct tunnel_db_get_tunnel_ctx ctx = {
9291                 .app_tunnel = app_tunnel,
9292         };
9293
9294         mlx5_access_tunnel_offload_db(dev, get_tunnel_match, get_tunnel_hit,
9295                                       get_tunnel_miss, &ctx, true);
9296         *tunnel = ctx.tunnel;
9297         return ctx.tunnel ? 0 : -ENOMEM;
9298 }
9299
9300 void mlx5_release_tunnel_hub(struct mlx5_dev_ctx_shared *sh, uint16_t port_id)
9301 {
9302         struct mlx5_flow_tunnel_hub *thub = sh->tunnel_hub;
9303
9304         if (!thub)
9305                 return;
9306         if (!LIST_EMPTY(&thub->tunnels))
9307                 DRV_LOG(WARNING, "port %u tunnels present", port_id);
9308         mlx5_hlist_destroy(thub->groups);
9309         mlx5_free(thub);
9310 }
9311
9312 int mlx5_alloc_tunnel_hub(struct mlx5_dev_ctx_shared *sh)
9313 {
9314         int err;
9315         struct mlx5_flow_tunnel_hub *thub;
9316
9317         thub = mlx5_malloc(MLX5_MEM_SYS | MLX5_MEM_ZERO, sizeof(*thub),
9318                            0, SOCKET_ID_ANY);
9319         if (!thub)
9320                 return -ENOMEM;
9321         LIST_INIT(&thub->tunnels);
9322         rte_spinlock_init(&thub->sl);
9323         thub->groups = mlx5_hlist_create("flow groups", 64,
9324                                          false, true, sh,
9325                                          mlx5_flow_tunnel_grp2tbl_create_cb,
9326                                          mlx5_flow_tunnel_grp2tbl_match_cb,
9327                                          mlx5_flow_tunnel_grp2tbl_remove_cb,
9328                                          mlx5_flow_tunnel_grp2tbl_clone_cb,
9329                                         mlx5_flow_tunnel_grp2tbl_clone_free_cb);
9330         if (!thub->groups) {
9331                 err = -rte_errno;
9332                 goto err;
9333         }
9334         sh->tunnel_hub = thub;
9335
9336         return 0;
9337
9338 err:
9339         if (thub->groups)
9340                 mlx5_hlist_destroy(thub->groups);
9341         if (thub)
9342                 mlx5_free(thub);
9343         return err;
9344 }
9345
9346 static inline bool
9347 mlx5_flow_tunnel_validate(struct rte_eth_dev *dev,
9348                           struct rte_flow_tunnel *tunnel,
9349                           const char *err_msg)
9350 {
9351         err_msg = NULL;
9352         if (!is_tunnel_offload_active(dev)) {
9353                 err_msg = "tunnel offload was not activated";
9354                 goto out;
9355         } else if (!tunnel) {
9356                 err_msg = "no application tunnel";
9357                 goto out;
9358         }
9359
9360         switch (tunnel->type) {
9361         default:
9362                 err_msg = "unsupported tunnel type";
9363                 goto out;
9364         case RTE_FLOW_ITEM_TYPE_VXLAN:
9365         case RTE_FLOW_ITEM_TYPE_GRE:
9366         case RTE_FLOW_ITEM_TYPE_NVGRE:
9367         case RTE_FLOW_ITEM_TYPE_GENEVE:
9368                 break;
9369         }
9370
9371 out:
9372         return !err_msg;
9373 }
9374
9375 static int
9376 mlx5_flow_tunnel_decap_set(struct rte_eth_dev *dev,
9377                     struct rte_flow_tunnel *app_tunnel,
9378                     struct rte_flow_action **actions,
9379                     uint32_t *num_of_actions,
9380                     struct rte_flow_error *error)
9381 {
9382         int ret;
9383         struct mlx5_flow_tunnel *tunnel;
9384         const char *err_msg = NULL;
9385         bool verdict = mlx5_flow_tunnel_validate(dev, app_tunnel, err_msg);
9386
9387         if (!verdict)
9388                 return rte_flow_error_set(error, EINVAL,
9389                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
9390                                           err_msg);
9391         ret = mlx5_get_flow_tunnel(dev, app_tunnel, &tunnel);
9392         if (ret < 0) {
9393                 return rte_flow_error_set(error, ret,
9394                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
9395                                           "failed to initialize pmd tunnel");
9396         }
9397         *actions = &tunnel->action;
9398         *num_of_actions = 1;
9399         return 0;
9400 }
9401
9402 static int
9403 mlx5_flow_tunnel_match(struct rte_eth_dev *dev,
9404                        struct rte_flow_tunnel *app_tunnel,
9405                        struct rte_flow_item **items,
9406                        uint32_t *num_of_items,
9407                        struct rte_flow_error *error)
9408 {
9409         int ret;
9410         struct mlx5_flow_tunnel *tunnel;
9411         const char *err_msg = NULL;
9412         bool verdict = mlx5_flow_tunnel_validate(dev, app_tunnel, err_msg);
9413
9414         if (!verdict)
9415                 return rte_flow_error_set(error, EINVAL,
9416                                           RTE_FLOW_ERROR_TYPE_HANDLE, NULL,
9417                                           err_msg);
9418         ret = mlx5_get_flow_tunnel(dev, app_tunnel, &tunnel);
9419         if (ret < 0) {
9420                 return rte_flow_error_set(error, ret,
9421                                           RTE_FLOW_ERROR_TYPE_HANDLE, NULL,
9422                                           "failed to initialize pmd tunnel");
9423         }
9424         *items = &tunnel->item;
9425         *num_of_items = 1;
9426         return 0;
9427 }
9428
9429 struct tunnel_db_element_release_ctx {
9430         struct rte_flow_item *items;
9431         struct rte_flow_action *actions;
9432         uint32_t num_elements;
9433         struct rte_flow_error *error;
9434         int ret;
9435 };
9436
9437 static bool
9438 tunnel_element_release_match(struct rte_eth_dev *dev,
9439                              struct mlx5_flow_tunnel *tunnel, const void *x)
9440 {
9441         const struct tunnel_db_element_release_ctx *ctx = x;
9442
9443         RTE_SET_USED(dev);
9444         if (ctx->num_elements != 1)
9445                 return false;
9446         else if (ctx->items)
9447                 return ctx->items == &tunnel->item;
9448         else if (ctx->actions)
9449                 return ctx->actions == &tunnel->action;
9450
9451         return false;
9452 }
9453
9454 static void
9455 tunnel_element_release_hit(struct rte_eth_dev *dev,
9456                            struct mlx5_flow_tunnel *tunnel, void *x)
9457 {
9458         struct tunnel_db_element_release_ctx *ctx = x;
9459         ctx->ret = 0;
9460         if (!__atomic_sub_fetch(&tunnel->refctn, 1, __ATOMIC_RELAXED))
9461                 mlx5_flow_tunnel_free(dev, tunnel);
9462 }
9463
9464 static void
9465 tunnel_element_release_miss(struct rte_eth_dev *dev, void *x)
9466 {
9467         struct tunnel_db_element_release_ctx *ctx = x;
9468         RTE_SET_USED(dev);
9469         ctx->ret = rte_flow_error_set(ctx->error, EINVAL,
9470                                       RTE_FLOW_ERROR_TYPE_HANDLE, NULL,
9471                                       "invalid argument");
9472 }
9473
9474 static int
9475 mlx5_flow_tunnel_item_release(struct rte_eth_dev *dev,
9476                        struct rte_flow_item *pmd_items,
9477                        uint32_t num_items, struct rte_flow_error *err)
9478 {
9479         struct tunnel_db_element_release_ctx ctx = {
9480                 .items = pmd_items,
9481                 .actions = NULL,
9482                 .num_elements = num_items,
9483                 .error = err,
9484         };
9485
9486         mlx5_access_tunnel_offload_db(dev, tunnel_element_release_match,
9487                                       tunnel_element_release_hit,
9488                                       tunnel_element_release_miss, &ctx, false);
9489
9490         return ctx.ret;
9491 }
9492
9493 static int
9494 mlx5_flow_tunnel_action_release(struct rte_eth_dev *dev,
9495                          struct rte_flow_action *pmd_actions,
9496                          uint32_t num_actions, struct rte_flow_error *err)
9497 {
9498         struct tunnel_db_element_release_ctx ctx = {
9499                 .items = NULL,
9500                 .actions = pmd_actions,
9501                 .num_elements = num_actions,
9502                 .error = err,
9503         };
9504
9505         mlx5_access_tunnel_offload_db(dev, tunnel_element_release_match,
9506                                       tunnel_element_release_hit,
9507                                       tunnel_element_release_miss, &ctx, false);
9508
9509         return ctx.ret;
9510 }
9511
9512 static int
9513 mlx5_flow_tunnel_get_restore_info(struct rte_eth_dev *dev,
9514                                   struct rte_mbuf *m,
9515                                   struct rte_flow_restore_info *info,
9516                                   struct rte_flow_error *err)
9517 {
9518         uint64_t ol_flags = m->ol_flags;
9519         const struct mlx5_flow_tbl_data_entry *tble;
9520         const uint64_t mask = RTE_MBUF_F_RX_FDIR | RTE_MBUF_F_RX_FDIR_ID;
9521
9522         if (!is_tunnel_offload_active(dev)) {
9523                 info->flags = 0;
9524                 return 0;
9525         }
9526
9527         if ((ol_flags & mask) != mask)
9528                 goto err;
9529         tble = tunnel_mark_decode(dev, m->hash.fdir.hi);
9530         if (!tble) {
9531                 DRV_LOG(DEBUG, "port %u invalid miss tunnel mark %#x",
9532                         dev->data->port_id, m->hash.fdir.hi);
9533                 goto err;
9534         }
9535         MLX5_ASSERT(tble->tunnel);
9536         memcpy(&info->tunnel, &tble->tunnel->app_tunnel, sizeof(info->tunnel));
9537         info->group_id = tble->group_id;
9538         info->flags = RTE_FLOW_RESTORE_INFO_TUNNEL |
9539                       RTE_FLOW_RESTORE_INFO_GROUP_ID |
9540                       RTE_FLOW_RESTORE_INFO_ENCAPSULATED;
9541
9542         return 0;
9543
9544 err:
9545         return rte_flow_error_set(err, EINVAL,
9546                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
9547                                   "failed to get restore info");
9548 }
9549
9550 #else /* HAVE_IBV_FLOW_DV_SUPPORT */
9551 static int
9552 mlx5_flow_tunnel_decap_set(__rte_unused struct rte_eth_dev *dev,
9553                            __rte_unused struct rte_flow_tunnel *app_tunnel,
9554                            __rte_unused struct rte_flow_action **actions,
9555                            __rte_unused uint32_t *num_of_actions,
9556                            __rte_unused struct rte_flow_error *error)
9557 {
9558         return -ENOTSUP;
9559 }
9560
9561 static int
9562 mlx5_flow_tunnel_match(__rte_unused struct rte_eth_dev *dev,
9563                        __rte_unused struct rte_flow_tunnel *app_tunnel,
9564                        __rte_unused struct rte_flow_item **items,
9565                        __rte_unused uint32_t *num_of_items,
9566                        __rte_unused struct rte_flow_error *error)
9567 {
9568         return -ENOTSUP;
9569 }
9570
9571 static int
9572 mlx5_flow_tunnel_item_release(__rte_unused struct rte_eth_dev *dev,
9573                               __rte_unused struct rte_flow_item *pmd_items,
9574                               __rte_unused uint32_t num_items,
9575                               __rte_unused struct rte_flow_error *err)
9576 {
9577         return -ENOTSUP;
9578 }
9579
9580 static int
9581 mlx5_flow_tunnel_action_release(__rte_unused struct rte_eth_dev *dev,
9582                                 __rte_unused struct rte_flow_action *pmd_action,
9583                                 __rte_unused uint32_t num_actions,
9584                                 __rte_unused struct rte_flow_error *err)
9585 {
9586         return -ENOTSUP;
9587 }
9588
9589 static int
9590 mlx5_flow_tunnel_get_restore_info(__rte_unused struct rte_eth_dev *dev,
9591                                   __rte_unused struct rte_mbuf *m,
9592                                   __rte_unused struct rte_flow_restore_info *i,
9593                                   __rte_unused struct rte_flow_error *err)
9594 {
9595         return -ENOTSUP;
9596 }
9597
9598 static int
9599 flow_tunnel_add_default_miss(__rte_unused struct rte_eth_dev *dev,
9600                              __rte_unused struct rte_flow *flow,
9601                              __rte_unused const struct rte_flow_attr *attr,
9602                              __rte_unused const struct rte_flow_action *actions,
9603                              __rte_unused uint32_t flow_idx,
9604                              __rte_unused const struct mlx5_flow_tunnel *tunnel,
9605                              __rte_unused struct tunnel_default_miss_ctx *ctx,
9606                              __rte_unused struct rte_flow_error *error)
9607 {
9608         return -ENOTSUP;
9609 }
9610
9611 static struct mlx5_flow_tunnel *
9612 mlx5_find_tunnel_id(__rte_unused struct rte_eth_dev *dev,
9613                     __rte_unused uint32_t id)
9614 {
9615         return NULL;
9616 }
9617
9618 static void
9619 mlx5_flow_tunnel_free(__rte_unused struct rte_eth_dev *dev,
9620                       __rte_unused struct mlx5_flow_tunnel *tunnel)
9621 {
9622 }
9623
9624 static uint32_t
9625 tunnel_flow_group_to_flow_table(__rte_unused struct rte_eth_dev *dev,
9626                                 __rte_unused const struct mlx5_flow_tunnel *t,
9627                                 __rte_unused uint32_t group,
9628                                 __rte_unused uint32_t *table,
9629                                 struct rte_flow_error *error)
9630 {
9631         return rte_flow_error_set(error, ENOTSUP,
9632                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
9633                                   "tunnel offload requires DV support");
9634 }
9635
9636 void
9637 mlx5_release_tunnel_hub(__rte_unused struct mlx5_dev_ctx_shared *sh,
9638                         __rte_unused  uint16_t port_id)
9639 {
9640 }
9641 #endif /* HAVE_IBV_FLOW_DV_SUPPORT */
9642
9643 static void
9644 mlx5_dbg__print_pattern(const struct rte_flow_item *item)
9645 {
9646         int ret;
9647         struct rte_flow_error error;
9648
9649         for (; item->type != RTE_FLOW_ITEM_TYPE_END; item++) {
9650                 char *item_name;
9651                 ret = rte_flow_conv(RTE_FLOW_CONV_OP_ITEM_NAME_PTR, &item_name,
9652                                     sizeof(item_name),
9653                                     (void *)(uintptr_t)item->type, &error);
9654                 if (ret > 0)
9655                         printf("%s ", item_name);
9656                 else
9657                         printf("%d\n", (int)item->type);
9658         }
9659         printf("END\n");
9660 }
9661
9662 static int
9663 mlx5_flow_is_std_vxlan_port(const struct rte_flow_item *udp_item)
9664 {
9665         const struct rte_flow_item_udp *spec = udp_item->spec;
9666         const struct rte_flow_item_udp *mask = udp_item->mask;
9667         uint16_t udp_dport = 0;
9668
9669         if (spec != NULL) {
9670                 if (!mask)
9671                         mask = &rte_flow_item_udp_mask;
9672                 udp_dport = rte_be_to_cpu_16(spec->hdr.dst_port &
9673                                 mask->hdr.dst_port);
9674         }
9675         return (!udp_dport || udp_dport == MLX5_UDP_PORT_VXLAN);
9676 }
9677
9678 static const struct mlx5_flow_expand_node *
9679 mlx5_flow_expand_rss_adjust_node(const struct rte_flow_item *pattern,
9680                 unsigned int item_idx,
9681                 const struct mlx5_flow_expand_node graph[],
9682                 const struct mlx5_flow_expand_node *node)
9683 {
9684         const struct rte_flow_item *item = pattern + item_idx, *prev_item;
9685
9686         if (item->type == RTE_FLOW_ITEM_TYPE_VXLAN &&
9687                         node != NULL &&
9688                         node->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
9689                 /*
9690                  * The expansion node is VXLAN and it is also the last
9691                  * expandable item in the pattern, so need to continue
9692                  * expansion of the inner tunnel.
9693                  */
9694                 MLX5_ASSERT(item_idx > 0);
9695                 prev_item = pattern + item_idx - 1;
9696                 MLX5_ASSERT(prev_item->type == RTE_FLOW_ITEM_TYPE_UDP);
9697                 if (mlx5_flow_is_std_vxlan_port(prev_item))
9698                         return &graph[MLX5_EXPANSION_STD_VXLAN];
9699                 return &graph[MLX5_EXPANSION_L3_VXLAN];
9700         }
9701         return node;
9702 }
9703
9704 /* Map of Verbs to Flow priority with 8 Verbs priorities. */
9705 static const uint32_t priority_map_3[][MLX5_PRIORITY_MAP_MAX] = {
9706         { 0, 1, 2 }, { 2, 3, 4 }, { 5, 6, 7 },
9707 };
9708
9709 /* Map of Verbs to Flow priority with 16 Verbs priorities. */
9710 static const uint32_t priority_map_5[][MLX5_PRIORITY_MAP_MAX] = {
9711         { 0, 1, 2 }, { 3, 4, 5 }, { 6, 7, 8 },
9712         { 9, 10, 11 }, { 12, 13, 14 },
9713 };
9714
9715 /**
9716  * Discover the number of available flow priorities.
9717  *
9718  * @param dev
9719  *   Ethernet device.
9720  *
9721  * @return
9722  *   On success, number of available flow priorities.
9723  *   On failure, a negative errno-style code and rte_errno is set.
9724  */
9725 int
9726 mlx5_flow_discover_priorities(struct rte_eth_dev *dev)
9727 {
9728         static const uint16_t vprio[] = {8, 16};
9729         const struct mlx5_priv *priv = dev->data->dev_private;
9730         const struct mlx5_flow_driver_ops *fops;
9731         enum mlx5_flow_drv_type type;
9732         int ret;
9733
9734         type = mlx5_flow_os_get_type();
9735         if (type == MLX5_FLOW_TYPE_MAX) {
9736                 type = MLX5_FLOW_TYPE_VERBS;
9737                 if (priv->sh->devx && priv->config.dv_flow_en)
9738                         type = MLX5_FLOW_TYPE_DV;
9739         }
9740         fops = flow_get_drv_ops(type);
9741         if (fops->discover_priorities == NULL) {
9742                 DRV_LOG(ERR, "Priority discovery not supported");
9743                 rte_errno = ENOTSUP;
9744                 return -rte_errno;
9745         }
9746         ret = fops->discover_priorities(dev, vprio, RTE_DIM(vprio));
9747         if (ret < 0)
9748                 return ret;
9749         switch (ret) {
9750         case 8:
9751                 ret = RTE_DIM(priority_map_3);
9752                 break;
9753         case 16:
9754                 ret = RTE_DIM(priority_map_5);
9755                 break;
9756         default:
9757                 rte_errno = ENOTSUP;
9758                 DRV_LOG(ERR,
9759                         "port %u maximum priority: %d expected 8/16",
9760                         dev->data->port_id, ret);
9761                 return -rte_errno;
9762         }
9763         DRV_LOG(INFO, "port %u supported flow priorities:"
9764                 " 0-%d for ingress or egress root table,"
9765                 " 0-%d for non-root table or transfer root table.",
9766                 dev->data->port_id, ret - 2,
9767                 MLX5_NON_ROOT_FLOW_MAX_PRIO - 1);
9768         return ret;
9769 }
9770
9771 /**
9772  * Adjust flow priority based on the highest layer and the request priority.
9773  *
9774  * @param[in] dev
9775  *   Pointer to the Ethernet device structure.
9776  * @param[in] priority
9777  *   The rule base priority.
9778  * @param[in] subpriority
9779  *   The priority based on the items.
9780  *
9781  * @return
9782  *   The new priority.
9783  */
9784 uint32_t
9785 mlx5_flow_adjust_priority(struct rte_eth_dev *dev, int32_t priority,
9786                           uint32_t subpriority)
9787 {
9788         uint32_t res = 0;
9789         struct mlx5_priv *priv = dev->data->dev_private;
9790
9791         switch (priv->sh->flow_max_priority) {
9792         case RTE_DIM(priority_map_3):
9793                 res = priority_map_3[priority][subpriority];
9794                 break;
9795         case RTE_DIM(priority_map_5):
9796                 res = priority_map_5[priority][subpriority];
9797                 break;
9798         }
9799         return  res;
9800 }