18c313c6f08bb34bb4b9b8b6fb11e810368f9202
[dpdk.git] / drivers / net / mlx5 / mlx5_flow.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2016 6WIND S.A.
3  * Copyright 2016 Mellanox Technologies, Ltd
4  */
5
6 #include <stdalign.h>
7 #include <stdint.h>
8 #include <string.h>
9 #include <stdbool.h>
10 #include <sys/queue.h>
11
12 #include <rte_common.h>
13 #include <rte_ether.h>
14 #include <ethdev_driver.h>
15 #include <rte_eal_paging.h>
16 #include <rte_flow.h>
17 #include <rte_cycles.h>
18 #include <rte_flow_driver.h>
19 #include <rte_malloc.h>
20 #include <rte_ip.h>
21
22 #include <mlx5_glue.h>
23 #include <mlx5_devx_cmds.h>
24 #include <mlx5_prm.h>
25 #include <mlx5_malloc.h>
26
27 #include "mlx5_defs.h"
28 #include "mlx5.h"
29 #include "mlx5_flow.h"
30 #include "mlx5_flow_os.h"
31 #include "mlx5_rx.h"
32 #include "mlx5_tx.h"
33 #include "mlx5_common_os.h"
34 #include "rte_pmd_mlx5.h"
35
36 struct tunnel_default_miss_ctx {
37         uint16_t *queue;
38         __extension__
39         union {
40                 struct rte_flow_action_rss action_rss;
41                 struct rte_flow_action_queue miss_queue;
42                 struct rte_flow_action_jump miss_jump;
43                 uint8_t raw[0];
44         };
45 };
46
47 static int
48 flow_tunnel_add_default_miss(struct rte_eth_dev *dev,
49                              struct rte_flow *flow,
50                              const struct rte_flow_attr *attr,
51                              const struct rte_flow_action *app_actions,
52                              uint32_t flow_idx,
53                              const struct mlx5_flow_tunnel *tunnel,
54                              struct tunnel_default_miss_ctx *ctx,
55                              struct rte_flow_error *error);
56 static struct mlx5_flow_tunnel *
57 mlx5_find_tunnel_id(struct rte_eth_dev *dev, uint32_t id);
58 static void
59 mlx5_flow_tunnel_free(struct rte_eth_dev *dev, struct mlx5_flow_tunnel *tunnel);
60 static uint32_t
61 tunnel_flow_group_to_flow_table(struct rte_eth_dev *dev,
62                                 const struct mlx5_flow_tunnel *tunnel,
63                                 uint32_t group, uint32_t *table,
64                                 struct rte_flow_error *error);
65
66 static struct mlx5_flow_workspace *mlx5_flow_push_thread_workspace(void);
67 static void mlx5_flow_pop_thread_workspace(void);
68
69
70 /** Device flow drivers. */
71 extern const struct mlx5_flow_driver_ops mlx5_flow_verbs_drv_ops;
72
73 const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops;
74
75 const struct mlx5_flow_driver_ops *flow_drv_ops[] = {
76         [MLX5_FLOW_TYPE_MIN] = &mlx5_flow_null_drv_ops,
77 #if defined(HAVE_IBV_FLOW_DV_SUPPORT) || !defined(HAVE_INFINIBAND_VERBS_H)
78         [MLX5_FLOW_TYPE_DV] = &mlx5_flow_dv_drv_ops,
79         [MLX5_FLOW_TYPE_HW] = &mlx5_flow_hw_drv_ops,
80 #endif
81         [MLX5_FLOW_TYPE_VERBS] = &mlx5_flow_verbs_drv_ops,
82         [MLX5_FLOW_TYPE_MAX] = &mlx5_flow_null_drv_ops
83 };
84
85 /** Helper macro to build input graph for mlx5_flow_expand_rss(). */
86 #define MLX5_FLOW_EXPAND_RSS_NEXT(...) \
87         (const int []){ \
88                 __VA_ARGS__, 0, \
89         }
90
91 /** Node object of input graph for mlx5_flow_expand_rss(). */
92 struct mlx5_flow_expand_node {
93         const int *const next;
94         /**<
95          * List of next node indexes. Index 0 is interpreted as a terminator.
96          */
97         const enum rte_flow_item_type type;
98         /**< Pattern item type of current node. */
99         uint64_t rss_types;
100         /**<
101          * RSS types bit-field associated with this node
102          * (see RTE_ETH_RSS_* definitions).
103          */
104         uint64_t node_flags;
105         /**<
106          *  Bit-fields that define how the node is used in the expansion.
107          * (see MLX5_EXPANSION_NODE_* definitions).
108          */
109 };
110
111 /* Optional expand field. The expansion alg will not go deeper. */
112 #define MLX5_EXPANSION_NODE_OPTIONAL (UINT64_C(1) << 0)
113
114 /* The node is not added implicitly as expansion to the flow pattern.
115  * If the node type does not match the flow pattern item type, the
116  * expansion alg will go deeper to its next items.
117  * In the current implementation, the list of next nodes indexes can
118  * have up to one node with this flag set and it has to be the last
119  * node index (before the list terminator).
120  */
121 #define MLX5_EXPANSION_NODE_EXPLICIT (UINT64_C(1) << 1)
122
123 /** Object returned by mlx5_flow_expand_rss(). */
124 struct mlx5_flow_expand_rss {
125         uint32_t entries;
126         /**< Number of entries @p patterns and @p priorities. */
127         struct {
128                 struct rte_flow_item *pattern; /**< Expanded pattern array. */
129                 uint32_t priority; /**< Priority offset for each expansion. */
130         } entry[];
131 };
132
133 static void
134 mlx5_dbg__print_pattern(const struct rte_flow_item *item);
135
136 static const struct mlx5_flow_expand_node *
137 mlx5_flow_expand_rss_adjust_node(const struct rte_flow_item *pattern,
138                 unsigned int item_idx,
139                 const struct mlx5_flow_expand_node graph[],
140                 const struct mlx5_flow_expand_node *node);
141
142 static bool
143 mlx5_flow_is_rss_expandable_item(const struct rte_flow_item *item)
144 {
145         switch (item->type) {
146         case RTE_FLOW_ITEM_TYPE_ETH:
147         case RTE_FLOW_ITEM_TYPE_VLAN:
148         case RTE_FLOW_ITEM_TYPE_IPV4:
149         case RTE_FLOW_ITEM_TYPE_IPV6:
150         case RTE_FLOW_ITEM_TYPE_UDP:
151         case RTE_FLOW_ITEM_TYPE_TCP:
152         case RTE_FLOW_ITEM_TYPE_VXLAN:
153         case RTE_FLOW_ITEM_TYPE_NVGRE:
154         case RTE_FLOW_ITEM_TYPE_GRE:
155         case RTE_FLOW_ITEM_TYPE_GENEVE:
156         case RTE_FLOW_ITEM_TYPE_MPLS:
157         case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
158         case RTE_FLOW_ITEM_TYPE_GRE_KEY:
159         case RTE_FLOW_ITEM_TYPE_IPV6_FRAG_EXT:
160         case RTE_FLOW_ITEM_TYPE_GTP:
161                 return true;
162         default:
163                 break;
164         }
165         return false;
166 }
167
168 /**
169  * Network Service Header (NSH) and its next protocol values
170  * are described in RFC-8393.
171  */
172 static enum rte_flow_item_type
173 mlx5_nsh_proto_to_item_type(uint8_t proto_spec, uint8_t proto_mask)
174 {
175         enum rte_flow_item_type type;
176
177         switch (proto_mask & proto_spec) {
178         case RTE_VXLAN_GPE_TYPE_IPV4:
179                 type = RTE_FLOW_ITEM_TYPE_IPV4;
180                 break;
181         case RTE_VXLAN_GPE_TYPE_IPV6:
182                 type = RTE_VXLAN_GPE_TYPE_IPV6;
183                 break;
184         case RTE_VXLAN_GPE_TYPE_ETH:
185                 type = RTE_FLOW_ITEM_TYPE_ETH;
186                 break;
187         default:
188                 type = RTE_FLOW_ITEM_TYPE_END;
189         }
190         return type;
191 }
192
193 static enum rte_flow_item_type
194 mlx5_inet_proto_to_item_type(uint8_t proto_spec, uint8_t proto_mask)
195 {
196         enum rte_flow_item_type type;
197
198         switch (proto_mask & proto_spec) {
199         case IPPROTO_UDP:
200                 type = RTE_FLOW_ITEM_TYPE_UDP;
201                 break;
202         case IPPROTO_TCP:
203                 type = RTE_FLOW_ITEM_TYPE_TCP;
204                 break;
205         case IPPROTO_IP:
206                 type = RTE_FLOW_ITEM_TYPE_IPV4;
207                 break;
208         case IPPROTO_IPV6:
209                 type = RTE_FLOW_ITEM_TYPE_IPV6;
210                 break;
211         default:
212                 type = RTE_FLOW_ITEM_TYPE_END;
213         }
214         return type;
215 }
216
217 static enum rte_flow_item_type
218 mlx5_ethertype_to_item_type(rte_be16_t type_spec,
219                             rte_be16_t type_mask, bool is_tunnel)
220 {
221         enum rte_flow_item_type type;
222
223         switch (rte_be_to_cpu_16(type_spec & type_mask)) {
224         case RTE_ETHER_TYPE_TEB:
225                 type = is_tunnel ?
226                        RTE_FLOW_ITEM_TYPE_ETH : RTE_FLOW_ITEM_TYPE_END;
227                 break;
228         case RTE_ETHER_TYPE_VLAN:
229                 type = !is_tunnel ?
230                        RTE_FLOW_ITEM_TYPE_VLAN : RTE_FLOW_ITEM_TYPE_END;
231                 break;
232         case RTE_ETHER_TYPE_IPV4:
233                 type = RTE_FLOW_ITEM_TYPE_IPV4;
234                 break;
235         case RTE_ETHER_TYPE_IPV6:
236                 type = RTE_FLOW_ITEM_TYPE_IPV6;
237                 break;
238         default:
239                 type = RTE_FLOW_ITEM_TYPE_END;
240         }
241         return type;
242 }
243
244 static enum rte_flow_item_type
245 mlx5_flow_expand_rss_item_complete(const struct rte_flow_item *item)
246 {
247 #define MLX5_XSET_ITEM_MASK_SPEC(type, fld)                              \
248         do {                                                             \
249                 const void *m = item->mask;                              \
250                 const void *s = item->spec;                              \
251                 mask = m ?                                               \
252                         ((const struct rte_flow_item_##type *)m)->fld :  \
253                         rte_flow_item_##type##_mask.fld;                 \
254                 spec = ((const struct rte_flow_item_##type *)s)->fld;    \
255         } while (0)
256
257         enum rte_flow_item_type ret;
258         uint16_t spec, mask;
259
260         if (item == NULL || item->spec == NULL)
261                 return RTE_FLOW_ITEM_TYPE_VOID;
262         switch (item->type) {
263         case RTE_FLOW_ITEM_TYPE_ETH:
264                 MLX5_XSET_ITEM_MASK_SPEC(eth, type);
265                 if (!mask)
266                         return RTE_FLOW_ITEM_TYPE_VOID;
267                 ret = mlx5_ethertype_to_item_type(spec, mask, false);
268                 break;
269         case RTE_FLOW_ITEM_TYPE_VLAN:
270                 MLX5_XSET_ITEM_MASK_SPEC(vlan, inner_type);
271                 if (!mask)
272                         return RTE_FLOW_ITEM_TYPE_VOID;
273                 ret = mlx5_ethertype_to_item_type(spec, mask, false);
274                 break;
275         case RTE_FLOW_ITEM_TYPE_IPV4:
276                 MLX5_XSET_ITEM_MASK_SPEC(ipv4, hdr.next_proto_id);
277                 if (!mask)
278                         return RTE_FLOW_ITEM_TYPE_VOID;
279                 ret = mlx5_inet_proto_to_item_type(spec, mask);
280                 break;
281         case RTE_FLOW_ITEM_TYPE_IPV6:
282                 MLX5_XSET_ITEM_MASK_SPEC(ipv6, hdr.proto);
283                 if (!mask)
284                         return RTE_FLOW_ITEM_TYPE_VOID;
285                 ret = mlx5_inet_proto_to_item_type(spec, mask);
286                 break;
287         case RTE_FLOW_ITEM_TYPE_GENEVE:
288                 MLX5_XSET_ITEM_MASK_SPEC(geneve, protocol);
289                 ret = mlx5_ethertype_to_item_type(spec, mask, true);
290                 break;
291         case RTE_FLOW_ITEM_TYPE_GRE:
292                 MLX5_XSET_ITEM_MASK_SPEC(gre, protocol);
293                 ret = mlx5_ethertype_to_item_type(spec, mask, true);
294                 break;
295         case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
296                 MLX5_XSET_ITEM_MASK_SPEC(vxlan_gpe, protocol);
297                 ret = mlx5_nsh_proto_to_item_type(spec, mask);
298                 break;
299         default:
300                 ret = RTE_FLOW_ITEM_TYPE_VOID;
301                 break;
302         }
303         return ret;
304 #undef MLX5_XSET_ITEM_MASK_SPEC
305 }
306
307 static const int *
308 mlx5_flow_expand_rss_skip_explicit(const struct mlx5_flow_expand_node graph[],
309                 const int *next_node)
310 {
311         const struct mlx5_flow_expand_node *node = NULL;
312         const int *next = next_node;
313
314         while (next && *next) {
315                 /*
316                  * Skip the nodes with the MLX5_EXPANSION_NODE_EXPLICIT
317                  * flag set, because they were not found in the flow pattern.
318                  */
319                 node = &graph[*next];
320                 if (!(node->node_flags & MLX5_EXPANSION_NODE_EXPLICIT))
321                         break;
322                 next = node->next;
323         }
324         return next;
325 }
326
327 #define MLX5_RSS_EXP_ELT_N 16
328
329 /**
330  * Expand RSS flows into several possible flows according to the RSS hash
331  * fields requested and the driver capabilities.
332  *
333  * @param[out] buf
334  *   Buffer to store the result expansion.
335  * @param[in] size
336  *   Buffer size in bytes. If 0, @p buf can be NULL.
337  * @param[in] pattern
338  *   User flow pattern.
339  * @param[in] types
340  *   RSS types to expand (see RTE_ETH_RSS_* definitions).
341  * @param[in] graph
342  *   Input graph to expand @p pattern according to @p types.
343  * @param[in] graph_root_index
344  *   Index of root node in @p graph, typically 0.
345  *
346  * @return
347  *   A positive value representing the size of @p buf in bytes regardless of
348  *   @p size on success, a negative errno value otherwise and rte_errno is
349  *   set, the following errors are defined:
350  *
351  *   -E2BIG: graph-depth @p graph is too deep.
352  *   -EINVAL: @p size has not enough space for expanded pattern.
353  */
354 static int
355 mlx5_flow_expand_rss(struct mlx5_flow_expand_rss *buf, size_t size,
356                      const struct rte_flow_item *pattern, uint64_t types,
357                      const struct mlx5_flow_expand_node graph[],
358                      int graph_root_index)
359 {
360         const struct rte_flow_item *item;
361         const struct mlx5_flow_expand_node *node = &graph[graph_root_index];
362         const int *next_node;
363         const int *stack[MLX5_RSS_EXP_ELT_N];
364         int stack_pos = 0;
365         struct rte_flow_item flow_items[MLX5_RSS_EXP_ELT_N];
366         unsigned int i, item_idx, last_expand_item_idx = 0;
367         size_t lsize;
368         size_t user_pattern_size = 0;
369         void *addr = NULL;
370         const struct mlx5_flow_expand_node *next = NULL;
371         struct rte_flow_item missed_item;
372         int missed = 0;
373         int elt = 0;
374         const struct rte_flow_item *last_expand_item = NULL;
375
376         memset(&missed_item, 0, sizeof(missed_item));
377         lsize = offsetof(struct mlx5_flow_expand_rss, entry) +
378                 MLX5_RSS_EXP_ELT_N * sizeof(buf->entry[0]);
379         if (lsize > size)
380                 return -EINVAL;
381         buf->entry[0].priority = 0;
382         buf->entry[0].pattern = (void *)&buf->entry[MLX5_RSS_EXP_ELT_N];
383         buf->entries = 0;
384         addr = buf->entry[0].pattern;
385         for (item = pattern, item_idx = 0;
386                         item->type != RTE_FLOW_ITEM_TYPE_END;
387                         item++, item_idx++) {
388                 if (!mlx5_flow_is_rss_expandable_item(item)) {
389                         user_pattern_size += sizeof(*item);
390                         continue;
391                 }
392                 last_expand_item = item;
393                 last_expand_item_idx = item_idx;
394                 i = 0;
395                 while (node->next && node->next[i]) {
396                         next = &graph[node->next[i]];
397                         if (next->type == item->type)
398                                 break;
399                         if (next->node_flags & MLX5_EXPANSION_NODE_EXPLICIT) {
400                                 node = next;
401                                 i = 0;
402                         } else {
403                                 ++i;
404                         }
405                 }
406                 if (next)
407                         node = next;
408                 user_pattern_size += sizeof(*item);
409         }
410         user_pattern_size += sizeof(*item); /* Handle END item. */
411         lsize += user_pattern_size;
412         if (lsize > size)
413                 return -EINVAL;
414         /* Copy the user pattern in the first entry of the buffer. */
415         rte_memcpy(addr, pattern, user_pattern_size);
416         addr = (void *)(((uintptr_t)addr) + user_pattern_size);
417         buf->entries = 1;
418         /* Start expanding. */
419         memset(flow_items, 0, sizeof(flow_items));
420         user_pattern_size -= sizeof(*item);
421         /*
422          * Check if the last valid item has spec set, need complete pattern,
423          * and the pattern can be used for expansion.
424          */
425         missed_item.type = mlx5_flow_expand_rss_item_complete(last_expand_item);
426         if (missed_item.type == RTE_FLOW_ITEM_TYPE_END) {
427                 /* Item type END indicates expansion is not required. */
428                 return lsize;
429         }
430         if (missed_item.type != RTE_FLOW_ITEM_TYPE_VOID) {
431                 next = NULL;
432                 missed = 1;
433                 i = 0;
434                 while (node->next && node->next[i]) {
435                         next = &graph[node->next[i]];
436                         if (next->type == missed_item.type) {
437                                 flow_items[0].type = missed_item.type;
438                                 flow_items[1].type = RTE_FLOW_ITEM_TYPE_END;
439                                 break;
440                         }
441                         if (next->node_flags & MLX5_EXPANSION_NODE_EXPLICIT) {
442                                 node = next;
443                                 i = 0;
444                         } else {
445                                 ++i;
446                         }
447                         next = NULL;
448                 }
449         }
450         if (next && missed) {
451                 elt = 2; /* missed item + item end. */
452                 node = next;
453                 lsize += elt * sizeof(*item) + user_pattern_size;
454                 if (lsize > size)
455                         return -EINVAL;
456                 if (node->rss_types & types) {
457                         buf->entry[buf->entries].priority = 1;
458                         buf->entry[buf->entries].pattern = addr;
459                         buf->entries++;
460                         rte_memcpy(addr, buf->entry[0].pattern,
461                                    user_pattern_size);
462                         addr = (void *)(((uintptr_t)addr) + user_pattern_size);
463                         rte_memcpy(addr, flow_items, elt * sizeof(*item));
464                         addr = (void *)(((uintptr_t)addr) +
465                                         elt * sizeof(*item));
466                 }
467         } else if (last_expand_item != NULL) {
468                 node = mlx5_flow_expand_rss_adjust_node(pattern,
469                                 last_expand_item_idx, graph, node);
470         }
471         memset(flow_items, 0, sizeof(flow_items));
472         next_node = mlx5_flow_expand_rss_skip_explicit(graph,
473                         node->next);
474         stack[stack_pos] = next_node;
475         node = next_node ? &graph[*next_node] : NULL;
476         while (node) {
477                 flow_items[stack_pos].type = node->type;
478                 if (node->rss_types & types) {
479                         size_t n;
480                         /*
481                          * compute the number of items to copy from the
482                          * expansion and copy it.
483                          * When the stack_pos is 0, there are 1 element in it,
484                          * plus the addition END item.
485                          */
486                         elt = stack_pos + 2;
487                         flow_items[stack_pos + 1].type = RTE_FLOW_ITEM_TYPE_END;
488                         lsize += elt * sizeof(*item) + user_pattern_size;
489                         if (lsize > size)
490                                 return -EINVAL;
491                         n = elt * sizeof(*item);
492                         buf->entry[buf->entries].priority =
493                                 stack_pos + 1 + missed;
494                         buf->entry[buf->entries].pattern = addr;
495                         buf->entries++;
496                         rte_memcpy(addr, buf->entry[0].pattern,
497                                    user_pattern_size);
498                         addr = (void *)(((uintptr_t)addr) +
499                                         user_pattern_size);
500                         rte_memcpy(addr, &missed_item,
501                                    missed * sizeof(*item));
502                         addr = (void *)(((uintptr_t)addr) +
503                                 missed * sizeof(*item));
504                         rte_memcpy(addr, flow_items, n);
505                         addr = (void *)(((uintptr_t)addr) + n);
506                 }
507                 /* Go deeper. */
508                 if (!(node->node_flags & MLX5_EXPANSION_NODE_OPTIONAL) &&
509                                 node->next) {
510                         next_node = mlx5_flow_expand_rss_skip_explicit(graph,
511                                         node->next);
512                         if (stack_pos++ == MLX5_RSS_EXP_ELT_N) {
513                                 rte_errno = E2BIG;
514                                 return -rte_errno;
515                         }
516                         stack[stack_pos] = next_node;
517                 } else if (*(next_node + 1)) {
518                         /* Follow up with the next possibility. */
519                         next_node = mlx5_flow_expand_rss_skip_explicit(graph,
520                                         ++next_node);
521                 } else if (!stack_pos) {
522                         /*
523                          * Completing the traverse over the different paths.
524                          * The next_node is advanced to the terminator.
525                          */
526                         ++next_node;
527                 } else {
528                         /* Move to the next path. */
529                         while (stack_pos) {
530                                 next_node = stack[--stack_pos];
531                                 next_node++;
532                                 if (*next_node)
533                                         break;
534                         }
535                         next_node = mlx5_flow_expand_rss_skip_explicit(graph,
536                                         next_node);
537                         stack[stack_pos] = next_node;
538                 }
539                 node = next_node && *next_node ? &graph[*next_node] : NULL;
540         };
541         return lsize;
542 }
543
544 enum mlx5_expansion {
545         MLX5_EXPANSION_ROOT,
546         MLX5_EXPANSION_ROOT_OUTER,
547         MLX5_EXPANSION_OUTER_ETH,
548         MLX5_EXPANSION_OUTER_VLAN,
549         MLX5_EXPANSION_OUTER_IPV4,
550         MLX5_EXPANSION_OUTER_IPV4_UDP,
551         MLX5_EXPANSION_OUTER_IPV4_TCP,
552         MLX5_EXPANSION_OUTER_IPV6,
553         MLX5_EXPANSION_OUTER_IPV6_UDP,
554         MLX5_EXPANSION_OUTER_IPV6_TCP,
555         MLX5_EXPANSION_VXLAN,
556         MLX5_EXPANSION_STD_VXLAN,
557         MLX5_EXPANSION_L3_VXLAN,
558         MLX5_EXPANSION_VXLAN_GPE,
559         MLX5_EXPANSION_GRE,
560         MLX5_EXPANSION_NVGRE,
561         MLX5_EXPANSION_GRE_KEY,
562         MLX5_EXPANSION_MPLS,
563         MLX5_EXPANSION_ETH,
564         MLX5_EXPANSION_VLAN,
565         MLX5_EXPANSION_IPV4,
566         MLX5_EXPANSION_IPV4_UDP,
567         MLX5_EXPANSION_IPV4_TCP,
568         MLX5_EXPANSION_IPV6,
569         MLX5_EXPANSION_IPV6_UDP,
570         MLX5_EXPANSION_IPV6_TCP,
571         MLX5_EXPANSION_IPV6_FRAG_EXT,
572         MLX5_EXPANSION_GTP,
573         MLX5_EXPANSION_GENEVE,
574 };
575
576 /** Supported expansion of items. */
577 static const struct mlx5_flow_expand_node mlx5_support_expansion[] = {
578         [MLX5_EXPANSION_ROOT] = {
579                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
580                                                   MLX5_EXPANSION_IPV4,
581                                                   MLX5_EXPANSION_IPV6),
582                 .type = RTE_FLOW_ITEM_TYPE_END,
583         },
584         [MLX5_EXPANSION_ROOT_OUTER] = {
585                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_ETH,
586                                                   MLX5_EXPANSION_OUTER_IPV4,
587                                                   MLX5_EXPANSION_OUTER_IPV6),
588                 .type = RTE_FLOW_ITEM_TYPE_END,
589         },
590         [MLX5_EXPANSION_OUTER_ETH] = {
591                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_VLAN),
592                 .type = RTE_FLOW_ITEM_TYPE_ETH,
593                 .rss_types = 0,
594         },
595         [MLX5_EXPANSION_OUTER_VLAN] = {
596                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_IPV4,
597                                                   MLX5_EXPANSION_OUTER_IPV6),
598                 .type = RTE_FLOW_ITEM_TYPE_VLAN,
599                 .node_flags = MLX5_EXPANSION_NODE_EXPLICIT,
600         },
601         [MLX5_EXPANSION_OUTER_IPV4] = {
602                 .next = MLX5_FLOW_EXPAND_RSS_NEXT
603                         (MLX5_EXPANSION_OUTER_IPV4_UDP,
604                          MLX5_EXPANSION_OUTER_IPV4_TCP,
605                          MLX5_EXPANSION_GRE,
606                          MLX5_EXPANSION_NVGRE,
607                          MLX5_EXPANSION_IPV4,
608                          MLX5_EXPANSION_IPV6),
609                 .type = RTE_FLOW_ITEM_TYPE_IPV4,
610                 .rss_types = RTE_ETH_RSS_IPV4 | RTE_ETH_RSS_FRAG_IPV4 |
611                         RTE_ETH_RSS_NONFRAG_IPV4_OTHER,
612         },
613         [MLX5_EXPANSION_OUTER_IPV4_UDP] = {
614                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN,
615                                                   MLX5_EXPANSION_VXLAN_GPE,
616                                                   MLX5_EXPANSION_MPLS,
617                                                   MLX5_EXPANSION_GENEVE,
618                                                   MLX5_EXPANSION_GTP),
619                 .type = RTE_FLOW_ITEM_TYPE_UDP,
620                 .rss_types = RTE_ETH_RSS_NONFRAG_IPV4_UDP,
621         },
622         [MLX5_EXPANSION_OUTER_IPV4_TCP] = {
623                 .type = RTE_FLOW_ITEM_TYPE_TCP,
624                 .rss_types = RTE_ETH_RSS_NONFRAG_IPV4_TCP,
625         },
626         [MLX5_EXPANSION_OUTER_IPV6] = {
627                 .next = MLX5_FLOW_EXPAND_RSS_NEXT
628                         (MLX5_EXPANSION_OUTER_IPV6_UDP,
629                          MLX5_EXPANSION_OUTER_IPV6_TCP,
630                          MLX5_EXPANSION_IPV4,
631                          MLX5_EXPANSION_IPV6,
632                          MLX5_EXPANSION_GRE,
633                          MLX5_EXPANSION_NVGRE),
634                 .type = RTE_FLOW_ITEM_TYPE_IPV6,
635                 .rss_types = RTE_ETH_RSS_IPV6 | RTE_ETH_RSS_FRAG_IPV6 |
636                         RTE_ETH_RSS_NONFRAG_IPV6_OTHER,
637         },
638         [MLX5_EXPANSION_OUTER_IPV6_UDP] = {
639                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN,
640                                                   MLX5_EXPANSION_VXLAN_GPE,
641                                                   MLX5_EXPANSION_MPLS,
642                                                   MLX5_EXPANSION_GENEVE,
643                                                   MLX5_EXPANSION_GTP),
644                 .type = RTE_FLOW_ITEM_TYPE_UDP,
645                 .rss_types = RTE_ETH_RSS_NONFRAG_IPV6_UDP,
646         },
647         [MLX5_EXPANSION_OUTER_IPV6_TCP] = {
648                 .type = RTE_FLOW_ITEM_TYPE_TCP,
649                 .rss_types = RTE_ETH_RSS_NONFRAG_IPV6_TCP,
650         },
651         [MLX5_EXPANSION_VXLAN] = {
652                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
653                                                   MLX5_EXPANSION_IPV4,
654                                                   MLX5_EXPANSION_IPV6),
655                 .type = RTE_FLOW_ITEM_TYPE_VXLAN,
656         },
657         [MLX5_EXPANSION_STD_VXLAN] = {
658                         .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH),
659                                         .type = RTE_FLOW_ITEM_TYPE_VXLAN,
660         },
661         [MLX5_EXPANSION_L3_VXLAN] = {
662                         .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
663                                         MLX5_EXPANSION_IPV6),
664                                         .type = RTE_FLOW_ITEM_TYPE_VXLAN,
665         },
666         [MLX5_EXPANSION_VXLAN_GPE] = {
667                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
668                                                   MLX5_EXPANSION_IPV4,
669                                                   MLX5_EXPANSION_IPV6),
670                 .type = RTE_FLOW_ITEM_TYPE_VXLAN_GPE,
671         },
672         [MLX5_EXPANSION_GRE] = {
673                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
674                                                   MLX5_EXPANSION_IPV4,
675                                                   MLX5_EXPANSION_IPV6,
676                                                   MLX5_EXPANSION_GRE_KEY,
677                                                   MLX5_EXPANSION_MPLS),
678                 .type = RTE_FLOW_ITEM_TYPE_GRE,
679         },
680         [MLX5_EXPANSION_GRE_KEY] = {
681                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
682                                                   MLX5_EXPANSION_IPV6,
683                                                   MLX5_EXPANSION_MPLS),
684                 .type = RTE_FLOW_ITEM_TYPE_GRE_KEY,
685                 .node_flags = MLX5_EXPANSION_NODE_OPTIONAL,
686         },
687         [MLX5_EXPANSION_NVGRE] = {
688                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH),
689                 .type = RTE_FLOW_ITEM_TYPE_NVGRE,
690         },
691         [MLX5_EXPANSION_MPLS] = {
692                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
693                                                   MLX5_EXPANSION_IPV6,
694                                                   MLX5_EXPANSION_ETH),
695                 .type = RTE_FLOW_ITEM_TYPE_MPLS,
696                 .node_flags = MLX5_EXPANSION_NODE_OPTIONAL,
697         },
698         [MLX5_EXPANSION_ETH] = {
699                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VLAN),
700                 .type = RTE_FLOW_ITEM_TYPE_ETH,
701         },
702         [MLX5_EXPANSION_VLAN] = {
703                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
704                                                   MLX5_EXPANSION_IPV6),
705                 .type = RTE_FLOW_ITEM_TYPE_VLAN,
706                 .node_flags = MLX5_EXPANSION_NODE_EXPLICIT,
707         },
708         [MLX5_EXPANSION_IPV4] = {
709                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4_UDP,
710                                                   MLX5_EXPANSION_IPV4_TCP),
711                 .type = RTE_FLOW_ITEM_TYPE_IPV4,
712                 .rss_types = RTE_ETH_RSS_IPV4 | RTE_ETH_RSS_FRAG_IPV4 |
713                         RTE_ETH_RSS_NONFRAG_IPV4_OTHER,
714         },
715         [MLX5_EXPANSION_IPV4_UDP] = {
716                 .type = RTE_FLOW_ITEM_TYPE_UDP,
717                 .rss_types = RTE_ETH_RSS_NONFRAG_IPV4_UDP,
718         },
719         [MLX5_EXPANSION_IPV4_TCP] = {
720                 .type = RTE_FLOW_ITEM_TYPE_TCP,
721                 .rss_types = RTE_ETH_RSS_NONFRAG_IPV4_TCP,
722         },
723         [MLX5_EXPANSION_IPV6] = {
724                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV6_UDP,
725                                                   MLX5_EXPANSION_IPV6_TCP,
726                                                   MLX5_EXPANSION_IPV6_FRAG_EXT),
727                 .type = RTE_FLOW_ITEM_TYPE_IPV6,
728                 .rss_types = RTE_ETH_RSS_IPV6 | RTE_ETH_RSS_FRAG_IPV6 |
729                         RTE_ETH_RSS_NONFRAG_IPV6_OTHER,
730         },
731         [MLX5_EXPANSION_IPV6_UDP] = {
732                 .type = RTE_FLOW_ITEM_TYPE_UDP,
733                 .rss_types = RTE_ETH_RSS_NONFRAG_IPV6_UDP,
734         },
735         [MLX5_EXPANSION_IPV6_TCP] = {
736                 .type = RTE_FLOW_ITEM_TYPE_TCP,
737                 .rss_types = RTE_ETH_RSS_NONFRAG_IPV6_TCP,
738         },
739         [MLX5_EXPANSION_IPV6_FRAG_EXT] = {
740                 .type = RTE_FLOW_ITEM_TYPE_IPV6_FRAG_EXT,
741         },
742         [MLX5_EXPANSION_GTP] = {
743                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
744                                                   MLX5_EXPANSION_IPV6),
745                 .type = RTE_FLOW_ITEM_TYPE_GTP,
746         },
747         [MLX5_EXPANSION_GENEVE] = {
748                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
749                                                   MLX5_EXPANSION_IPV4,
750                                                   MLX5_EXPANSION_IPV6),
751                 .type = RTE_FLOW_ITEM_TYPE_GENEVE,
752         },
753 };
754
755 static struct rte_flow_action_handle *
756 mlx5_action_handle_create(struct rte_eth_dev *dev,
757                           const struct rte_flow_indir_action_conf *conf,
758                           const struct rte_flow_action *action,
759                           struct rte_flow_error *error);
760 static int mlx5_action_handle_destroy
761                                 (struct rte_eth_dev *dev,
762                                  struct rte_flow_action_handle *handle,
763                                  struct rte_flow_error *error);
764 static int mlx5_action_handle_update
765                                 (struct rte_eth_dev *dev,
766                                  struct rte_flow_action_handle *handle,
767                                  const void *update,
768                                  struct rte_flow_error *error);
769 static int mlx5_action_handle_query
770                                 (struct rte_eth_dev *dev,
771                                  const struct rte_flow_action_handle *handle,
772                                  void *data,
773                                  struct rte_flow_error *error);
774 static int
775 mlx5_flow_tunnel_decap_set(struct rte_eth_dev *dev,
776                     struct rte_flow_tunnel *app_tunnel,
777                     struct rte_flow_action **actions,
778                     uint32_t *num_of_actions,
779                     struct rte_flow_error *error);
780 static int
781 mlx5_flow_tunnel_match(struct rte_eth_dev *dev,
782                        struct rte_flow_tunnel *app_tunnel,
783                        struct rte_flow_item **items,
784                        uint32_t *num_of_items,
785                        struct rte_flow_error *error);
786 static int
787 mlx5_flow_tunnel_item_release(struct rte_eth_dev *dev,
788                               struct rte_flow_item *pmd_items,
789                               uint32_t num_items, struct rte_flow_error *err);
790 static int
791 mlx5_flow_tunnel_action_release(struct rte_eth_dev *dev,
792                                 struct rte_flow_action *pmd_actions,
793                                 uint32_t num_actions,
794                                 struct rte_flow_error *err);
795 static int
796 mlx5_flow_tunnel_get_restore_info(struct rte_eth_dev *dev,
797                                   struct rte_mbuf *m,
798                                   struct rte_flow_restore_info *info,
799                                   struct rte_flow_error *err);
800 static struct rte_flow_item_flex_handle *
801 mlx5_flow_flex_item_create(struct rte_eth_dev *dev,
802                            const struct rte_flow_item_flex_conf *conf,
803                            struct rte_flow_error *error);
804 static int
805 mlx5_flow_flex_item_release(struct rte_eth_dev *dev,
806                             const struct rte_flow_item_flex_handle *handle,
807                             struct rte_flow_error *error);
808 static int
809 mlx5_flow_info_get(struct rte_eth_dev *dev,
810                    struct rte_flow_port_info *port_info,
811                    struct rte_flow_queue_info *queue_info,
812                    struct rte_flow_error *error);
813 static int
814 mlx5_flow_port_configure(struct rte_eth_dev *dev,
815                          const struct rte_flow_port_attr *port_attr,
816                          uint16_t nb_queue,
817                          const struct rte_flow_queue_attr *queue_attr[],
818                          struct rte_flow_error *err);
819
820 static struct rte_flow_pattern_template *
821 mlx5_flow_pattern_template_create(struct rte_eth_dev *dev,
822                 const struct rte_flow_pattern_template_attr *attr,
823                 const struct rte_flow_item items[],
824                 struct rte_flow_error *error);
825
826 static int
827 mlx5_flow_pattern_template_destroy(struct rte_eth_dev *dev,
828                                    struct rte_flow_pattern_template *template,
829                                    struct rte_flow_error *error);
830 static struct rte_flow_actions_template *
831 mlx5_flow_actions_template_create(struct rte_eth_dev *dev,
832                         const struct rte_flow_actions_template_attr *attr,
833                         const struct rte_flow_action actions[],
834                         const struct rte_flow_action masks[],
835                         struct rte_flow_error *error);
836 static int
837 mlx5_flow_actions_template_destroy(struct rte_eth_dev *dev,
838                                    struct rte_flow_actions_template *template,
839                                    struct rte_flow_error *error);
840
841 static const struct rte_flow_ops mlx5_flow_ops = {
842         .validate = mlx5_flow_validate,
843         .create = mlx5_flow_create,
844         .destroy = mlx5_flow_destroy,
845         .flush = mlx5_flow_flush,
846         .isolate = mlx5_flow_isolate,
847         .query = mlx5_flow_query,
848         .dev_dump = mlx5_flow_dev_dump,
849         .get_aged_flows = mlx5_flow_get_aged_flows,
850         .action_handle_create = mlx5_action_handle_create,
851         .action_handle_destroy = mlx5_action_handle_destroy,
852         .action_handle_update = mlx5_action_handle_update,
853         .action_handle_query = mlx5_action_handle_query,
854         .tunnel_decap_set = mlx5_flow_tunnel_decap_set,
855         .tunnel_match = mlx5_flow_tunnel_match,
856         .tunnel_action_decap_release = mlx5_flow_tunnel_action_release,
857         .tunnel_item_release = mlx5_flow_tunnel_item_release,
858         .get_restore_info = mlx5_flow_tunnel_get_restore_info,
859         .flex_item_create = mlx5_flow_flex_item_create,
860         .flex_item_release = mlx5_flow_flex_item_release,
861         .info_get = mlx5_flow_info_get,
862         .configure = mlx5_flow_port_configure,
863         .pattern_template_create = mlx5_flow_pattern_template_create,
864         .pattern_template_destroy = mlx5_flow_pattern_template_destroy,
865         .actions_template_create = mlx5_flow_actions_template_create,
866         .actions_template_destroy = mlx5_flow_actions_template_destroy,
867 };
868
869 /* Tunnel information. */
870 struct mlx5_flow_tunnel_info {
871         uint64_t tunnel; /**< Tunnel bit (see MLX5_FLOW_*). */
872         uint32_t ptype; /**< Tunnel Ptype (see RTE_PTYPE_*). */
873 };
874
875 static struct mlx5_flow_tunnel_info tunnels_info[] = {
876         {
877                 .tunnel = MLX5_FLOW_LAYER_VXLAN,
878                 .ptype = RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L4_UDP,
879         },
880         {
881                 .tunnel = MLX5_FLOW_LAYER_GENEVE,
882                 .ptype = RTE_PTYPE_TUNNEL_GENEVE | RTE_PTYPE_L4_UDP,
883         },
884         {
885                 .tunnel = MLX5_FLOW_LAYER_VXLAN_GPE,
886                 .ptype = RTE_PTYPE_TUNNEL_VXLAN_GPE | RTE_PTYPE_L4_UDP,
887         },
888         {
889                 .tunnel = MLX5_FLOW_LAYER_GRE,
890                 .ptype = RTE_PTYPE_TUNNEL_GRE,
891         },
892         {
893                 .tunnel = MLX5_FLOW_LAYER_MPLS | MLX5_FLOW_LAYER_OUTER_L4_UDP,
894                 .ptype = RTE_PTYPE_TUNNEL_MPLS_IN_UDP | RTE_PTYPE_L4_UDP,
895         },
896         {
897                 .tunnel = MLX5_FLOW_LAYER_MPLS,
898                 .ptype = RTE_PTYPE_TUNNEL_MPLS_IN_GRE,
899         },
900         {
901                 .tunnel = MLX5_FLOW_LAYER_NVGRE,
902                 .ptype = RTE_PTYPE_TUNNEL_NVGRE,
903         },
904         {
905                 .tunnel = MLX5_FLOW_LAYER_IPIP,
906                 .ptype = RTE_PTYPE_TUNNEL_IP,
907         },
908         {
909                 .tunnel = MLX5_FLOW_LAYER_IPV6_ENCAP,
910                 .ptype = RTE_PTYPE_TUNNEL_IP,
911         },
912         {
913                 .tunnel = MLX5_FLOW_LAYER_GTP,
914                 .ptype = RTE_PTYPE_TUNNEL_GTPU,
915         },
916 };
917
918
919
920 /**
921  * Translate tag ID to register.
922  *
923  * @param[in] dev
924  *   Pointer to the Ethernet device structure.
925  * @param[in] feature
926  *   The feature that request the register.
927  * @param[in] id
928  *   The request register ID.
929  * @param[out] error
930  *   Error description in case of any.
931  *
932  * @return
933  *   The request register on success, a negative errno
934  *   value otherwise and rte_errno is set.
935  */
936 int
937 mlx5_flow_get_reg_id(struct rte_eth_dev *dev,
938                      enum mlx5_feature_name feature,
939                      uint32_t id,
940                      struct rte_flow_error *error)
941 {
942         struct mlx5_priv *priv = dev->data->dev_private;
943         struct mlx5_sh_config *config = &priv->sh->config;
944         enum modify_reg start_reg;
945         bool skip_mtr_reg = false;
946
947         switch (feature) {
948         case MLX5_HAIRPIN_RX:
949                 return REG_B;
950         case MLX5_HAIRPIN_TX:
951                 return REG_A;
952         case MLX5_METADATA_RX:
953                 switch (config->dv_xmeta_en) {
954                 case MLX5_XMETA_MODE_LEGACY:
955                         return REG_B;
956                 case MLX5_XMETA_MODE_META16:
957                         return REG_C_0;
958                 case MLX5_XMETA_MODE_META32:
959                         return REG_C_1;
960                 }
961                 break;
962         case MLX5_METADATA_TX:
963                 return REG_A;
964         case MLX5_METADATA_FDB:
965                 switch (config->dv_xmeta_en) {
966                 case MLX5_XMETA_MODE_LEGACY:
967                         return REG_NON;
968                 case MLX5_XMETA_MODE_META16:
969                         return REG_C_0;
970                 case MLX5_XMETA_MODE_META32:
971                         return REG_C_1;
972                 }
973                 break;
974         case MLX5_FLOW_MARK:
975                 switch (config->dv_xmeta_en) {
976                 case MLX5_XMETA_MODE_LEGACY:
977                         return REG_NON;
978                 case MLX5_XMETA_MODE_META16:
979                         return REG_C_1;
980                 case MLX5_XMETA_MODE_META32:
981                         return REG_C_0;
982                 }
983                 break;
984         case MLX5_MTR_ID:
985                 /*
986                  * If meter color and meter id share one register, flow match
987                  * should use the meter color register for match.
988                  */
989                 if (priv->mtr_reg_share)
990                         return priv->mtr_color_reg;
991                 else
992                         return priv->mtr_color_reg != REG_C_2 ? REG_C_2 :
993                                REG_C_3;
994         case MLX5_MTR_COLOR:
995         case MLX5_ASO_FLOW_HIT:
996         case MLX5_ASO_CONNTRACK:
997         case MLX5_SAMPLE_ID:
998                 /* All features use the same REG_C. */
999                 MLX5_ASSERT(priv->mtr_color_reg != REG_NON);
1000                 return priv->mtr_color_reg;
1001         case MLX5_COPY_MARK:
1002                 /*
1003                  * Metadata COPY_MARK register using is in meter suffix sub
1004                  * flow while with meter. It's safe to share the same register.
1005                  */
1006                 return priv->mtr_color_reg != REG_C_2 ? REG_C_2 : REG_C_3;
1007         case MLX5_APP_TAG:
1008                 /*
1009                  * If meter is enable, it will engage the register for color
1010                  * match and flow match. If meter color match is not using the
1011                  * REG_C_2, need to skip the REG_C_x be used by meter color
1012                  * match.
1013                  * If meter is disable, free to use all available registers.
1014                  */
1015                 start_reg = priv->mtr_color_reg != REG_C_2 ? REG_C_2 :
1016                             (priv->mtr_reg_share ? REG_C_3 : REG_C_4);
1017                 skip_mtr_reg = !!(priv->mtr_en && start_reg == REG_C_2);
1018                 if (id > (uint32_t)(REG_C_7 - start_reg))
1019                         return rte_flow_error_set(error, EINVAL,
1020                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1021                                                   NULL, "invalid tag id");
1022                 if (priv->sh->flow_mreg_c[id + start_reg - REG_C_0] == REG_NON)
1023                         return rte_flow_error_set(error, ENOTSUP,
1024                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1025                                                   NULL, "unsupported tag id");
1026                 /*
1027                  * This case means meter is using the REG_C_x great than 2.
1028                  * Take care not to conflict with meter color REG_C_x.
1029                  * If the available index REG_C_y >= REG_C_x, skip the
1030                  * color register.
1031                  */
1032                 if (skip_mtr_reg && priv->sh->flow_mreg_c
1033                     [id + start_reg - REG_C_0] >= priv->mtr_color_reg) {
1034                         if (id >= (uint32_t)(REG_C_7 - start_reg))
1035                                 return rte_flow_error_set(error, EINVAL,
1036                                                        RTE_FLOW_ERROR_TYPE_ITEM,
1037                                                         NULL, "invalid tag id");
1038                         if (priv->sh->flow_mreg_c
1039                             [id + 1 + start_reg - REG_C_0] != REG_NON)
1040                                 return priv->sh->flow_mreg_c
1041                                                [id + 1 + start_reg - REG_C_0];
1042                         return rte_flow_error_set(error, ENOTSUP,
1043                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1044                                                   NULL, "unsupported tag id");
1045                 }
1046                 return priv->sh->flow_mreg_c[id + start_reg - REG_C_0];
1047         }
1048         MLX5_ASSERT(false);
1049         return rte_flow_error_set(error, EINVAL,
1050                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1051                                   NULL, "invalid feature name");
1052 }
1053
1054 /**
1055  * Check extensive flow metadata register support.
1056  *
1057  * @param dev
1058  *   Pointer to rte_eth_dev structure.
1059  *
1060  * @return
1061  *   True if device supports extensive flow metadata register, otherwise false.
1062  */
1063 bool
1064 mlx5_flow_ext_mreg_supported(struct rte_eth_dev *dev)
1065 {
1066         struct mlx5_priv *priv = dev->data->dev_private;
1067
1068         /*
1069          * Having available reg_c can be regarded inclusively as supporting
1070          * extensive flow metadata register, which could mean,
1071          * - metadata register copy action by modify header.
1072          * - 16 modify header actions is supported.
1073          * - reg_c's are preserved across different domain (FDB and NIC) on
1074          *   packet loopback by flow lookup miss.
1075          */
1076         return priv->sh->flow_mreg_c[2] != REG_NON;
1077 }
1078
1079 /**
1080  * Get the lowest priority.
1081  *
1082  * @param[in] dev
1083  *   Pointer to the Ethernet device structure.
1084  * @param[in] attributes
1085  *   Pointer to device flow rule attributes.
1086  *
1087  * @return
1088  *   The value of lowest priority of flow.
1089  */
1090 uint32_t
1091 mlx5_get_lowest_priority(struct rte_eth_dev *dev,
1092                           const struct rte_flow_attr *attr)
1093 {
1094         struct mlx5_priv *priv = dev->data->dev_private;
1095
1096         if (!attr->group && !attr->transfer)
1097                 return priv->sh->flow_max_priority - 2;
1098         return MLX5_NON_ROOT_FLOW_MAX_PRIO - 1;
1099 }
1100
1101 /**
1102  * Calculate matcher priority of the flow.
1103  *
1104  * @param[in] dev
1105  *   Pointer to the Ethernet device structure.
1106  * @param[in] attr
1107  *   Pointer to device flow rule attributes.
1108  * @param[in] subpriority
1109  *   The priority based on the items.
1110  * @param[in] external
1111  *   Flow is user flow.
1112  * @return
1113  *   The matcher priority of the flow.
1114  */
1115 uint16_t
1116 mlx5_get_matcher_priority(struct rte_eth_dev *dev,
1117                           const struct rte_flow_attr *attr,
1118                           uint32_t subpriority, bool external)
1119 {
1120         uint16_t priority = (uint16_t)attr->priority;
1121         struct mlx5_priv *priv = dev->data->dev_private;
1122
1123         if (!attr->group && !attr->transfer) {
1124                 if (attr->priority == MLX5_FLOW_LOWEST_PRIO_INDICATOR)
1125                         priority = priv->sh->flow_max_priority - 1;
1126                 return mlx5_os_flow_adjust_priority(dev, priority, subpriority);
1127         } else if (!external && attr->transfer && attr->group == 0 &&
1128                    attr->priority == MLX5_FLOW_LOWEST_PRIO_INDICATOR) {
1129                 return (priv->sh->flow_max_priority - 1) * 3;
1130         }
1131         if (attr->priority == MLX5_FLOW_LOWEST_PRIO_INDICATOR)
1132                 priority = MLX5_NON_ROOT_FLOW_MAX_PRIO;
1133         return priority * 3 + subpriority;
1134 }
1135
1136 /**
1137  * Verify the @p item specifications (spec, last, mask) are compatible with the
1138  * NIC capabilities.
1139  *
1140  * @param[in] item
1141  *   Item specification.
1142  * @param[in] mask
1143  *   @p item->mask or flow default bit-masks.
1144  * @param[in] nic_mask
1145  *   Bit-masks covering supported fields by the NIC to compare with user mask.
1146  * @param[in] size
1147  *   Bit-masks size in bytes.
1148  * @param[in] range_accepted
1149  *   True if range of values is accepted for specific fields, false otherwise.
1150  * @param[out] error
1151  *   Pointer to error structure.
1152  *
1153  * @return
1154  *   0 on success, a negative errno value otherwise and rte_errno is set.
1155  */
1156 int
1157 mlx5_flow_item_acceptable(const struct rte_flow_item *item,
1158                           const uint8_t *mask,
1159                           const uint8_t *nic_mask,
1160                           unsigned int size,
1161                           bool range_accepted,
1162                           struct rte_flow_error *error)
1163 {
1164         unsigned int i;
1165
1166         MLX5_ASSERT(nic_mask);
1167         for (i = 0; i < size; ++i)
1168                 if ((nic_mask[i] | mask[i]) != nic_mask[i])
1169                         return rte_flow_error_set(error, ENOTSUP,
1170                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1171                                                   item,
1172                                                   "mask enables non supported"
1173                                                   " bits");
1174         if (!item->spec && (item->mask || item->last))
1175                 return rte_flow_error_set(error, EINVAL,
1176                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1177                                           "mask/last without a spec is not"
1178                                           " supported");
1179         if (item->spec && item->last && !range_accepted) {
1180                 uint8_t spec[size];
1181                 uint8_t last[size];
1182                 unsigned int i;
1183                 int ret;
1184
1185                 for (i = 0; i < size; ++i) {
1186                         spec[i] = ((const uint8_t *)item->spec)[i] & mask[i];
1187                         last[i] = ((const uint8_t *)item->last)[i] & mask[i];
1188                 }
1189                 ret = memcmp(spec, last, size);
1190                 if (ret != 0)
1191                         return rte_flow_error_set(error, EINVAL,
1192                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1193                                                   item,
1194                                                   "range is not valid");
1195         }
1196         return 0;
1197 }
1198
1199 /**
1200  * Adjust the hash fields according to the @p flow information.
1201  *
1202  * @param[in] dev_flow.
1203  *   Pointer to the mlx5_flow.
1204  * @param[in] tunnel
1205  *   1 when the hash field is for a tunnel item.
1206  * @param[in] layer_types
1207  *   RTE_ETH_RSS_* types.
1208  * @param[in] hash_fields
1209  *   Item hash fields.
1210  *
1211  * @return
1212  *   The hash fields that should be used.
1213  */
1214 uint64_t
1215 mlx5_flow_hashfields_adjust(struct mlx5_flow_rss_desc *rss_desc,
1216                             int tunnel __rte_unused, uint64_t layer_types,
1217                             uint64_t hash_fields)
1218 {
1219 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
1220         int rss_request_inner = rss_desc->level >= 2;
1221
1222         /* Check RSS hash level for tunnel. */
1223         if (tunnel && rss_request_inner)
1224                 hash_fields |= IBV_RX_HASH_INNER;
1225         else if (tunnel || rss_request_inner)
1226                 return 0;
1227 #endif
1228         /* Check if requested layer matches RSS hash fields. */
1229         if (!(rss_desc->types & layer_types))
1230                 return 0;
1231         return hash_fields;
1232 }
1233
1234 /**
1235  * Lookup and set the ptype in the data Rx part.  A single Ptype can be used,
1236  * if several tunnel rules are used on this queue, the tunnel ptype will be
1237  * cleared.
1238  *
1239  * @param rxq_ctrl
1240  *   Rx queue to update.
1241  */
1242 static void
1243 flow_rxq_tunnel_ptype_update(struct mlx5_rxq_ctrl *rxq_ctrl)
1244 {
1245         unsigned int i;
1246         uint32_t tunnel_ptype = 0;
1247
1248         /* Look up for the ptype to use. */
1249         for (i = 0; i != MLX5_FLOW_TUNNEL; ++i) {
1250                 if (!rxq_ctrl->flow_tunnels_n[i])
1251                         continue;
1252                 if (!tunnel_ptype) {
1253                         tunnel_ptype = tunnels_info[i].ptype;
1254                 } else {
1255                         tunnel_ptype = 0;
1256                         break;
1257                 }
1258         }
1259         rxq_ctrl->rxq.tunnel = tunnel_ptype;
1260 }
1261
1262 /**
1263  * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) according to the device
1264  * flow.
1265  *
1266  * @param[in] dev
1267  *   Pointer to the Ethernet device structure.
1268  * @param[in] dev_handle
1269  *   Pointer to device flow handle structure.
1270  */
1271 void
1272 flow_drv_rxq_flags_set(struct rte_eth_dev *dev,
1273                        struct mlx5_flow_handle *dev_handle)
1274 {
1275         struct mlx5_priv *priv = dev->data->dev_private;
1276         const int tunnel = !!(dev_handle->layers & MLX5_FLOW_LAYER_TUNNEL);
1277         struct mlx5_ind_table_obj *ind_tbl = NULL;
1278         unsigned int i;
1279
1280         if (dev_handle->fate_action == MLX5_FLOW_FATE_QUEUE) {
1281                 struct mlx5_hrxq *hrxq;
1282
1283                 hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ],
1284                               dev_handle->rix_hrxq);
1285                 if (hrxq)
1286                         ind_tbl = hrxq->ind_table;
1287         } else if (dev_handle->fate_action == MLX5_FLOW_FATE_SHARED_RSS) {
1288                 struct mlx5_shared_action_rss *shared_rss;
1289
1290                 shared_rss = mlx5_ipool_get
1291                         (priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS],
1292                          dev_handle->rix_srss);
1293                 if (shared_rss)
1294                         ind_tbl = shared_rss->ind_tbl;
1295         }
1296         if (!ind_tbl)
1297                 return;
1298         for (i = 0; i != ind_tbl->queues_n; ++i) {
1299                 int idx = ind_tbl->queues[i];
1300                 struct mlx5_rxq_ctrl *rxq_ctrl = mlx5_rxq_ctrl_get(dev, idx);
1301
1302                 MLX5_ASSERT(rxq_ctrl != NULL);
1303                 if (rxq_ctrl == NULL)
1304                         continue;
1305                 /*
1306                  * To support metadata register copy on Tx loopback,
1307                  * this must be always enabled (metadata may arive
1308                  * from other port - not from local flows only.
1309                  */
1310                 if (tunnel) {
1311                         unsigned int j;
1312
1313                         /* Increase the counter matching the flow. */
1314                         for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) {
1315                                 if ((tunnels_info[j].tunnel &
1316                                      dev_handle->layers) ==
1317                                     tunnels_info[j].tunnel) {
1318                                         rxq_ctrl->flow_tunnels_n[j]++;
1319                                         break;
1320                                 }
1321                         }
1322                         flow_rxq_tunnel_ptype_update(rxq_ctrl);
1323                 }
1324         }
1325 }
1326
1327 static void
1328 flow_rxq_mark_flag_set(struct rte_eth_dev *dev)
1329 {
1330         struct mlx5_priv *priv = dev->data->dev_private;
1331         struct mlx5_rxq_ctrl *rxq_ctrl;
1332
1333         if (priv->mark_enabled)
1334                 return;
1335         LIST_FOREACH(rxq_ctrl, &priv->rxqsctrl, next) {
1336                 rxq_ctrl->rxq.mark = 1;
1337         }
1338         priv->mark_enabled = 1;
1339 }
1340
1341 /**
1342  * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) for a flow
1343  *
1344  * @param[in] dev
1345  *   Pointer to the Ethernet device structure.
1346  * @param[in] flow
1347  *   Pointer to flow structure.
1348  */
1349 static void
1350 flow_rxq_flags_set(struct rte_eth_dev *dev, struct rte_flow *flow)
1351 {
1352         struct mlx5_priv *priv = dev->data->dev_private;
1353         uint32_t handle_idx;
1354         struct mlx5_flow_handle *dev_handle;
1355         struct mlx5_flow_workspace *wks = mlx5_flow_get_thread_workspace();
1356
1357         MLX5_ASSERT(wks);
1358         if (wks->mark)
1359                 flow_rxq_mark_flag_set(dev);
1360         SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles,
1361                        handle_idx, dev_handle, next)
1362                 flow_drv_rxq_flags_set(dev, dev_handle);
1363 }
1364
1365 /**
1366  * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the
1367  * device flow if no other flow uses it with the same kind of request.
1368  *
1369  * @param dev
1370  *   Pointer to Ethernet device.
1371  * @param[in] dev_handle
1372  *   Pointer to the device flow handle structure.
1373  */
1374 static void
1375 flow_drv_rxq_flags_trim(struct rte_eth_dev *dev,
1376                         struct mlx5_flow_handle *dev_handle)
1377 {
1378         struct mlx5_priv *priv = dev->data->dev_private;
1379         const int tunnel = !!(dev_handle->layers & MLX5_FLOW_LAYER_TUNNEL);
1380         struct mlx5_ind_table_obj *ind_tbl = NULL;
1381         unsigned int i;
1382
1383         if (dev_handle->fate_action == MLX5_FLOW_FATE_QUEUE) {
1384                 struct mlx5_hrxq *hrxq;
1385
1386                 hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ],
1387                               dev_handle->rix_hrxq);
1388                 if (hrxq)
1389                         ind_tbl = hrxq->ind_table;
1390         } else if (dev_handle->fate_action == MLX5_FLOW_FATE_SHARED_RSS) {
1391                 struct mlx5_shared_action_rss *shared_rss;
1392
1393                 shared_rss = mlx5_ipool_get
1394                         (priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS],
1395                          dev_handle->rix_srss);
1396                 if (shared_rss)
1397                         ind_tbl = shared_rss->ind_tbl;
1398         }
1399         if (!ind_tbl)
1400                 return;
1401         MLX5_ASSERT(dev->data->dev_started);
1402         for (i = 0; i != ind_tbl->queues_n; ++i) {
1403                 int idx = ind_tbl->queues[i];
1404                 struct mlx5_rxq_ctrl *rxq_ctrl = mlx5_rxq_ctrl_get(dev, idx);
1405
1406                 MLX5_ASSERT(rxq_ctrl != NULL);
1407                 if (rxq_ctrl == NULL)
1408                         continue;
1409                 if (tunnel) {
1410                         unsigned int j;
1411
1412                         /* Decrease the counter matching the flow. */
1413                         for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) {
1414                                 if ((tunnels_info[j].tunnel &
1415                                      dev_handle->layers) ==
1416                                     tunnels_info[j].tunnel) {
1417                                         rxq_ctrl->flow_tunnels_n[j]--;
1418                                         break;
1419                                 }
1420                         }
1421                         flow_rxq_tunnel_ptype_update(rxq_ctrl);
1422                 }
1423         }
1424 }
1425
1426 /**
1427  * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the
1428  * @p flow if no other flow uses it with the same kind of request.
1429  *
1430  * @param dev
1431  *   Pointer to Ethernet device.
1432  * @param[in] flow
1433  *   Pointer to the flow.
1434  */
1435 static void
1436 flow_rxq_flags_trim(struct rte_eth_dev *dev, struct rte_flow *flow)
1437 {
1438         struct mlx5_priv *priv = dev->data->dev_private;
1439         uint32_t handle_idx;
1440         struct mlx5_flow_handle *dev_handle;
1441
1442         SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles,
1443                        handle_idx, dev_handle, next)
1444                 flow_drv_rxq_flags_trim(dev, dev_handle);
1445 }
1446
1447 /**
1448  * Clear the Mark/Flag and Tunnel ptype information in all Rx queues.
1449  *
1450  * @param dev
1451  *   Pointer to Ethernet device.
1452  */
1453 static void
1454 flow_rxq_flags_clear(struct rte_eth_dev *dev)
1455 {
1456         struct mlx5_priv *priv = dev->data->dev_private;
1457         unsigned int i;
1458
1459         for (i = 0; i != priv->rxqs_n; ++i) {
1460                 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, i);
1461                 unsigned int j;
1462
1463                 if (rxq == NULL || rxq->ctrl == NULL)
1464                         continue;
1465                 rxq->ctrl->rxq.mark = 0;
1466                 for (j = 0; j != MLX5_FLOW_TUNNEL; ++j)
1467                         rxq->ctrl->flow_tunnels_n[j] = 0;
1468                 rxq->ctrl->rxq.tunnel = 0;
1469         }
1470         priv->mark_enabled = 0;
1471 }
1472
1473 /**
1474  * Set the Rx queue dynamic metadata (mask and offset) for a flow
1475  *
1476  * @param[in] dev
1477  *   Pointer to the Ethernet device structure.
1478  */
1479 void
1480 mlx5_flow_rxq_dynf_metadata_set(struct rte_eth_dev *dev)
1481 {
1482         struct mlx5_priv *priv = dev->data->dev_private;
1483         unsigned int i;
1484
1485         for (i = 0; i != priv->rxqs_n; ++i) {
1486                 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, i);
1487                 struct mlx5_rxq_data *data;
1488
1489                 if (rxq == NULL || rxq->ctrl == NULL)
1490                         continue;
1491                 data = &rxq->ctrl->rxq;
1492                 if (!rte_flow_dynf_metadata_avail()) {
1493                         data->dynf_meta = 0;
1494                         data->flow_meta_mask = 0;
1495                         data->flow_meta_offset = -1;
1496                         data->flow_meta_port_mask = 0;
1497                 } else {
1498                         data->dynf_meta = 1;
1499                         data->flow_meta_mask = rte_flow_dynf_metadata_mask;
1500                         data->flow_meta_offset = rte_flow_dynf_metadata_offs;
1501                         data->flow_meta_port_mask = priv->sh->dv_meta_mask;
1502                 }
1503         }
1504 }
1505
1506 /*
1507  * return a pointer to the desired action in the list of actions.
1508  *
1509  * @param[in] actions
1510  *   The list of actions to search the action in.
1511  * @param[in] action
1512  *   The action to find.
1513  *
1514  * @return
1515  *   Pointer to the action in the list, if found. NULL otherwise.
1516  */
1517 const struct rte_flow_action *
1518 mlx5_flow_find_action(const struct rte_flow_action *actions,
1519                       enum rte_flow_action_type action)
1520 {
1521         if (actions == NULL)
1522                 return NULL;
1523         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++)
1524                 if (actions->type == action)
1525                         return actions;
1526         return NULL;
1527 }
1528
1529 /*
1530  * Validate the flag action.
1531  *
1532  * @param[in] action_flags
1533  *   Bit-fields that holds the actions detected until now.
1534  * @param[in] attr
1535  *   Attributes of flow that includes this action.
1536  * @param[out] error
1537  *   Pointer to error structure.
1538  *
1539  * @return
1540  *   0 on success, a negative errno value otherwise and rte_errno is set.
1541  */
1542 int
1543 mlx5_flow_validate_action_flag(uint64_t action_flags,
1544                                const struct rte_flow_attr *attr,
1545                                struct rte_flow_error *error)
1546 {
1547         if (action_flags & MLX5_FLOW_ACTION_MARK)
1548                 return rte_flow_error_set(error, EINVAL,
1549                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1550                                           "can't mark and flag in same flow");
1551         if (action_flags & MLX5_FLOW_ACTION_FLAG)
1552                 return rte_flow_error_set(error, EINVAL,
1553                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1554                                           "can't have 2 flag"
1555                                           " actions in same flow");
1556         if (attr->egress)
1557                 return rte_flow_error_set(error, ENOTSUP,
1558                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1559                                           "flag action not supported for "
1560                                           "egress");
1561         return 0;
1562 }
1563
1564 /*
1565  * Validate the mark action.
1566  *
1567  * @param[in] action
1568  *   Pointer to the queue action.
1569  * @param[in] action_flags
1570  *   Bit-fields that holds the actions detected until now.
1571  * @param[in] attr
1572  *   Attributes of flow that includes this action.
1573  * @param[out] error
1574  *   Pointer to error structure.
1575  *
1576  * @return
1577  *   0 on success, a negative errno value otherwise and rte_errno is set.
1578  */
1579 int
1580 mlx5_flow_validate_action_mark(const struct rte_flow_action *action,
1581                                uint64_t action_flags,
1582                                const struct rte_flow_attr *attr,
1583                                struct rte_flow_error *error)
1584 {
1585         const struct rte_flow_action_mark *mark = action->conf;
1586
1587         if (!mark)
1588                 return rte_flow_error_set(error, EINVAL,
1589                                           RTE_FLOW_ERROR_TYPE_ACTION,
1590                                           action,
1591                                           "configuration cannot be null");
1592         if (mark->id >= MLX5_FLOW_MARK_MAX)
1593                 return rte_flow_error_set(error, EINVAL,
1594                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1595                                           &mark->id,
1596                                           "mark id must in 0 <= id < "
1597                                           RTE_STR(MLX5_FLOW_MARK_MAX));
1598         if (action_flags & MLX5_FLOW_ACTION_FLAG)
1599                 return rte_flow_error_set(error, EINVAL,
1600                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1601                                           "can't flag and mark in same flow");
1602         if (action_flags & MLX5_FLOW_ACTION_MARK)
1603                 return rte_flow_error_set(error, EINVAL,
1604                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1605                                           "can't have 2 mark actions in same"
1606                                           " flow");
1607         if (attr->egress)
1608                 return rte_flow_error_set(error, ENOTSUP,
1609                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1610                                           "mark action not supported for "
1611                                           "egress");
1612         return 0;
1613 }
1614
1615 /*
1616  * Validate the drop action.
1617  *
1618  * @param[in] action_flags
1619  *   Bit-fields that holds the actions detected until now.
1620  * @param[in] attr
1621  *   Attributes of flow that includes this action.
1622  * @param[out] error
1623  *   Pointer to error structure.
1624  *
1625  * @return
1626  *   0 on success, a negative errno value otherwise and rte_errno is set.
1627  */
1628 int
1629 mlx5_flow_validate_action_drop(uint64_t action_flags __rte_unused,
1630                                const struct rte_flow_attr *attr,
1631                                struct rte_flow_error *error)
1632 {
1633         if (attr->egress)
1634                 return rte_flow_error_set(error, ENOTSUP,
1635                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1636                                           "drop action not supported for "
1637                                           "egress");
1638         return 0;
1639 }
1640
1641 /*
1642  * Validate the queue action.
1643  *
1644  * @param[in] action
1645  *   Pointer to the queue action.
1646  * @param[in] action_flags
1647  *   Bit-fields that holds the actions detected until now.
1648  * @param[in] dev
1649  *   Pointer to the Ethernet device structure.
1650  * @param[in] attr
1651  *   Attributes of flow that includes this action.
1652  * @param[out] error
1653  *   Pointer to error structure.
1654  *
1655  * @return
1656  *   0 on success, a negative errno value otherwise and rte_errno is set.
1657  */
1658 int
1659 mlx5_flow_validate_action_queue(const struct rte_flow_action *action,
1660                                 uint64_t action_flags,
1661                                 struct rte_eth_dev *dev,
1662                                 const struct rte_flow_attr *attr,
1663                                 struct rte_flow_error *error)
1664 {
1665         struct mlx5_priv *priv = dev->data->dev_private;
1666         const struct rte_flow_action_queue *queue = action->conf;
1667
1668         if (action_flags & MLX5_FLOW_FATE_ACTIONS)
1669                 return rte_flow_error_set(error, EINVAL,
1670                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1671                                           "can't have 2 fate actions in"
1672                                           " same flow");
1673         if (!priv->rxqs_n)
1674                 return rte_flow_error_set(error, EINVAL,
1675                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1676                                           NULL, "No Rx queues configured");
1677         if (queue->index >= priv->rxqs_n)
1678                 return rte_flow_error_set(error, EINVAL,
1679                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1680                                           &queue->index,
1681                                           "queue index out of range");
1682         if (mlx5_rxq_get(dev, queue->index) == NULL)
1683                 return rte_flow_error_set(error, EINVAL,
1684                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1685                                           &queue->index,
1686                                           "queue is not configured");
1687         if (attr->egress)
1688                 return rte_flow_error_set(error, ENOTSUP,
1689                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1690                                           "queue action not supported for "
1691                                           "egress");
1692         return 0;
1693 }
1694
1695 /**
1696  * Validate queue numbers for device RSS.
1697  *
1698  * @param[in] dev
1699  *   Configured device.
1700  * @param[in] queues
1701  *   Array of queue numbers.
1702  * @param[in] queues_n
1703  *   Size of the @p queues array.
1704  * @param[out] error
1705  *   On error, filled with a textual error description.
1706  * @param[out] queue
1707  *   On error, filled with an offending queue index in @p queues array.
1708  *
1709  * @return
1710  *   0 on success, a negative errno code on error.
1711  */
1712 static int
1713 mlx5_validate_rss_queues(struct rte_eth_dev *dev,
1714                          const uint16_t *queues, uint32_t queues_n,
1715                          const char **error, uint32_t *queue_idx)
1716 {
1717         const struct mlx5_priv *priv = dev->data->dev_private;
1718         enum mlx5_rxq_type rxq_type = MLX5_RXQ_TYPE_UNDEFINED;
1719         uint32_t i;
1720
1721         for (i = 0; i != queues_n; ++i) {
1722                 struct mlx5_rxq_ctrl *rxq_ctrl = mlx5_rxq_ctrl_get(dev,
1723                                                                    queues[i]);
1724
1725                 if (queues[i] >= priv->rxqs_n) {
1726                         *error = "queue index out of range";
1727                         *queue_idx = i;
1728                         return -EINVAL;
1729                 }
1730                 if (rxq_ctrl == NULL) {
1731                         *error =  "queue is not configured";
1732                         *queue_idx = i;
1733                         return -EINVAL;
1734                 }
1735                 if (i == 0)
1736                         rxq_type = rxq_ctrl->type;
1737                 if (rxq_type != rxq_ctrl->type) {
1738                         *error = "combining hairpin and regular RSS queues is not supported";
1739                         *queue_idx = i;
1740                         return -ENOTSUP;
1741                 }
1742         }
1743         return 0;
1744 }
1745
1746 /*
1747  * Validate the rss action.
1748  *
1749  * @param[in] dev
1750  *   Pointer to the Ethernet device structure.
1751  * @param[in] action
1752  *   Pointer to the queue action.
1753  * @param[out] error
1754  *   Pointer to error structure.
1755  *
1756  * @return
1757  *   0 on success, a negative errno value otherwise and rte_errno is set.
1758  */
1759 int
1760 mlx5_validate_action_rss(struct rte_eth_dev *dev,
1761                          const struct rte_flow_action *action,
1762                          struct rte_flow_error *error)
1763 {
1764         struct mlx5_priv *priv = dev->data->dev_private;
1765         const struct rte_flow_action_rss *rss = action->conf;
1766         int ret;
1767         const char *message;
1768         uint32_t queue_idx;
1769
1770         if (rss->func != RTE_ETH_HASH_FUNCTION_DEFAULT &&
1771             rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ)
1772                 return rte_flow_error_set(error, ENOTSUP,
1773                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1774                                           &rss->func,
1775                                           "RSS hash function not supported");
1776 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
1777         if (rss->level > 2)
1778 #else
1779         if (rss->level > 1)
1780 #endif
1781                 return rte_flow_error_set(error, ENOTSUP,
1782                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1783                                           &rss->level,
1784                                           "tunnel RSS is not supported");
1785         /* allow RSS key_len 0 in case of NULL (default) RSS key. */
1786         if (rss->key_len == 0 && rss->key != NULL)
1787                 return rte_flow_error_set(error, ENOTSUP,
1788                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1789                                           &rss->key_len,
1790                                           "RSS hash key length 0");
1791         if (rss->key_len > 0 && rss->key_len < MLX5_RSS_HASH_KEY_LEN)
1792                 return rte_flow_error_set(error, ENOTSUP,
1793                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1794                                           &rss->key_len,
1795                                           "RSS hash key too small");
1796         if (rss->key_len > MLX5_RSS_HASH_KEY_LEN)
1797                 return rte_flow_error_set(error, ENOTSUP,
1798                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1799                                           &rss->key_len,
1800                                           "RSS hash key too large");
1801         if (rss->queue_num > priv->sh->dev_cap.ind_table_max_size)
1802                 return rte_flow_error_set(error, ENOTSUP,
1803                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1804                                           &rss->queue_num,
1805                                           "number of queues too large");
1806         if (rss->types & MLX5_RSS_HF_MASK)
1807                 return rte_flow_error_set(error, ENOTSUP,
1808                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1809                                           &rss->types,
1810                                           "some RSS protocols are not"
1811                                           " supported");
1812         if ((rss->types & (RTE_ETH_RSS_L3_SRC_ONLY | RTE_ETH_RSS_L3_DST_ONLY)) &&
1813             !(rss->types & RTE_ETH_RSS_IP))
1814                 return rte_flow_error_set(error, EINVAL,
1815                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
1816                                           "L3 partial RSS requested but L3 RSS"
1817                                           " type not specified");
1818         if ((rss->types & (RTE_ETH_RSS_L4_SRC_ONLY | RTE_ETH_RSS_L4_DST_ONLY)) &&
1819             !(rss->types & (RTE_ETH_RSS_UDP | RTE_ETH_RSS_TCP)))
1820                 return rte_flow_error_set(error, EINVAL,
1821                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
1822                                           "L4 partial RSS requested but L4 RSS"
1823                                           " type not specified");
1824         if (!priv->rxqs_n)
1825                 return rte_flow_error_set(error, EINVAL,
1826                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1827                                           NULL, "No Rx queues configured");
1828         if (!rss->queue_num)
1829                 return rte_flow_error_set(error, EINVAL,
1830                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1831                                           NULL, "No queues configured");
1832         ret = mlx5_validate_rss_queues(dev, rss->queue, rss->queue_num,
1833                                        &message, &queue_idx);
1834         if (ret != 0) {
1835                 return rte_flow_error_set(error, -ret,
1836                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1837                                           &rss->queue[queue_idx], message);
1838         }
1839         return 0;
1840 }
1841
1842 /*
1843  * Validate the rss action.
1844  *
1845  * @param[in] action
1846  *   Pointer to the queue action.
1847  * @param[in] action_flags
1848  *   Bit-fields that holds the actions detected until now.
1849  * @param[in] dev
1850  *   Pointer to the Ethernet device structure.
1851  * @param[in] attr
1852  *   Attributes of flow that includes this action.
1853  * @param[in] item_flags
1854  *   Items that were detected.
1855  * @param[out] error
1856  *   Pointer to error structure.
1857  *
1858  * @return
1859  *   0 on success, a negative errno value otherwise and rte_errno is set.
1860  */
1861 int
1862 mlx5_flow_validate_action_rss(const struct rte_flow_action *action,
1863                               uint64_t action_flags,
1864                               struct rte_eth_dev *dev,
1865                               const struct rte_flow_attr *attr,
1866                               uint64_t item_flags,
1867                               struct rte_flow_error *error)
1868 {
1869         const struct rte_flow_action_rss *rss = action->conf;
1870         int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1871         int ret;
1872
1873         if (action_flags & MLX5_FLOW_FATE_ACTIONS)
1874                 return rte_flow_error_set(error, EINVAL,
1875                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1876                                           "can't have 2 fate actions"
1877                                           " in same flow");
1878         ret = mlx5_validate_action_rss(dev, action, error);
1879         if (ret)
1880                 return ret;
1881         if (attr->egress)
1882                 return rte_flow_error_set(error, ENOTSUP,
1883                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1884                                           "rss action not supported for "
1885                                           "egress");
1886         if (rss->level > 1 && !tunnel)
1887                 return rte_flow_error_set(error, EINVAL,
1888                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
1889                                           "inner RSS is not supported for "
1890                                           "non-tunnel flows");
1891         if ((item_flags & MLX5_FLOW_LAYER_ECPRI) &&
1892             !(item_flags & MLX5_FLOW_LAYER_INNER_L4_UDP)) {
1893                 return rte_flow_error_set(error, EINVAL,
1894                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
1895                                           "RSS on eCPRI is not supported now");
1896         }
1897         if ((item_flags & MLX5_FLOW_LAYER_MPLS) &&
1898             !(item_flags &
1899               (MLX5_FLOW_LAYER_INNER_L2 | MLX5_FLOW_LAYER_INNER_L3)) &&
1900             rss->level > 1)
1901                 return rte_flow_error_set(error, EINVAL,
1902                                           RTE_FLOW_ERROR_TYPE_ITEM, NULL,
1903                                           "MPLS inner RSS needs to specify inner L2/L3 items after MPLS in pattern");
1904         return 0;
1905 }
1906
1907 /*
1908  * Validate the default miss action.
1909  *
1910  * @param[in] action_flags
1911  *   Bit-fields that holds the actions detected until now.
1912  * @param[out] error
1913  *   Pointer to error structure.
1914  *
1915  * @return
1916  *   0 on success, a negative errno value otherwise and rte_errno is set.
1917  */
1918 int
1919 mlx5_flow_validate_action_default_miss(uint64_t action_flags,
1920                                 const struct rte_flow_attr *attr,
1921                                 struct rte_flow_error *error)
1922 {
1923         if (action_flags & MLX5_FLOW_FATE_ACTIONS)
1924                 return rte_flow_error_set(error, EINVAL,
1925                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1926                                           "can't have 2 fate actions in"
1927                                           " same flow");
1928         if (attr->egress)
1929                 return rte_flow_error_set(error, ENOTSUP,
1930                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1931                                           "default miss action not supported "
1932                                           "for egress");
1933         if (attr->group)
1934                 return rte_flow_error_set(error, ENOTSUP,
1935                                           RTE_FLOW_ERROR_TYPE_ATTR_GROUP, NULL,
1936                                           "only group 0 is supported");
1937         if (attr->transfer)
1938                 return rte_flow_error_set(error, ENOTSUP,
1939                                           RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
1940                                           NULL, "transfer is not supported");
1941         return 0;
1942 }
1943
1944 /*
1945  * Validate the count action.
1946  *
1947  * @param[in] dev
1948  *   Pointer to the Ethernet device structure.
1949  * @param[in] attr
1950  *   Attributes of flow that includes this action.
1951  * @param[out] error
1952  *   Pointer to error structure.
1953  *
1954  * @return
1955  *   0 on success, a negative errno value otherwise and rte_errno is set.
1956  */
1957 int
1958 mlx5_flow_validate_action_count(struct rte_eth_dev *dev __rte_unused,
1959                                 const struct rte_flow_attr *attr,
1960                                 struct rte_flow_error *error)
1961 {
1962         if (attr->egress)
1963                 return rte_flow_error_set(error, ENOTSUP,
1964                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1965                                           "count action not supported for "
1966                                           "egress");
1967         return 0;
1968 }
1969
1970 /*
1971  * Validate the ASO CT action.
1972  *
1973  * @param[in] dev
1974  *   Pointer to the Ethernet device structure.
1975  * @param[in] conntrack
1976  *   Pointer to the CT action profile.
1977  * @param[out] error
1978  *   Pointer to error structure.
1979  *
1980  * @return
1981  *   0 on success, a negative errno value otherwise and rte_errno is set.
1982  */
1983 int
1984 mlx5_validate_action_ct(struct rte_eth_dev *dev,
1985                         const struct rte_flow_action_conntrack *conntrack,
1986                         struct rte_flow_error *error)
1987 {
1988         RTE_SET_USED(dev);
1989
1990         if (conntrack->state > RTE_FLOW_CONNTRACK_STATE_TIME_WAIT)
1991                 return rte_flow_error_set(error, EINVAL,
1992                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1993                                           "Invalid CT state");
1994         if (conntrack->last_index > RTE_FLOW_CONNTRACK_FLAG_RST)
1995                 return rte_flow_error_set(error, EINVAL,
1996                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1997                                           "Invalid last TCP packet flag");
1998         return 0;
1999 }
2000
2001 /**
2002  * Verify the @p attributes will be correctly understood by the NIC and store
2003  * them in the @p flow if everything is correct.
2004  *
2005  * @param[in] dev
2006  *   Pointer to the Ethernet device structure.
2007  * @param[in] attributes
2008  *   Pointer to flow attributes
2009  * @param[out] error
2010  *   Pointer to error structure.
2011  *
2012  * @return
2013  *   0 on success, a negative errno value otherwise and rte_errno is set.
2014  */
2015 int
2016 mlx5_flow_validate_attributes(struct rte_eth_dev *dev,
2017                               const struct rte_flow_attr *attributes,
2018                               struct rte_flow_error *error)
2019 {
2020         struct mlx5_priv *priv = dev->data->dev_private;
2021         uint32_t priority_max = priv->sh->flow_max_priority - 1;
2022
2023         if (attributes->group)
2024                 return rte_flow_error_set(error, ENOTSUP,
2025                                           RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
2026                                           NULL, "groups is not supported");
2027         if (attributes->priority != MLX5_FLOW_LOWEST_PRIO_INDICATOR &&
2028             attributes->priority >= priority_max)
2029                 return rte_flow_error_set(error, ENOTSUP,
2030                                           RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
2031                                           NULL, "priority out of range");
2032         if (attributes->egress)
2033                 return rte_flow_error_set(error, ENOTSUP,
2034                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
2035                                           "egress is not supported");
2036         if (attributes->transfer && !priv->sh->config.dv_esw_en)
2037                 return rte_flow_error_set(error, ENOTSUP,
2038                                           RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
2039                                           NULL, "transfer is not supported");
2040         if (!attributes->ingress)
2041                 return rte_flow_error_set(error, EINVAL,
2042                                           RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
2043                                           NULL,
2044                                           "ingress attribute is mandatory");
2045         return 0;
2046 }
2047
2048 /**
2049  * Validate ICMP6 item.
2050  *
2051  * @param[in] item
2052  *   Item specification.
2053  * @param[in] item_flags
2054  *   Bit-fields that holds the items detected until now.
2055  * @param[in] ext_vlan_sup
2056  *   Whether extended VLAN features are supported or not.
2057  * @param[out] error
2058  *   Pointer to error structure.
2059  *
2060  * @return
2061  *   0 on success, a negative errno value otherwise and rte_errno is set.
2062  */
2063 int
2064 mlx5_flow_validate_item_icmp6(const struct rte_flow_item *item,
2065                                uint64_t item_flags,
2066                                uint8_t target_protocol,
2067                                struct rte_flow_error *error)
2068 {
2069         const struct rte_flow_item_icmp6 *mask = item->mask;
2070         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2071         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
2072                                       MLX5_FLOW_LAYER_OUTER_L3_IPV6;
2073         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2074                                       MLX5_FLOW_LAYER_OUTER_L4;
2075         int ret;
2076
2077         if (target_protocol != 0xFF && target_protocol != IPPROTO_ICMPV6)
2078                 return rte_flow_error_set(error, EINVAL,
2079                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2080                                           "protocol filtering not compatible"
2081                                           " with ICMP6 layer");
2082         if (!(item_flags & l3m))
2083                 return rte_flow_error_set(error, EINVAL,
2084                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2085                                           "IPv6 is mandatory to filter on"
2086                                           " ICMP6");
2087         if (item_flags & l4m)
2088                 return rte_flow_error_set(error, EINVAL,
2089                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2090                                           "multiple L4 layers not supported");
2091         if (!mask)
2092                 mask = &rte_flow_item_icmp6_mask;
2093         ret = mlx5_flow_item_acceptable
2094                 (item, (const uint8_t *)mask,
2095                  (const uint8_t *)&rte_flow_item_icmp6_mask,
2096                  sizeof(struct rte_flow_item_icmp6),
2097                  MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2098         if (ret < 0)
2099                 return ret;
2100         return 0;
2101 }
2102
2103 /**
2104  * Validate ICMP item.
2105  *
2106  * @param[in] item
2107  *   Item specification.
2108  * @param[in] item_flags
2109  *   Bit-fields that holds the items detected until now.
2110  * @param[out] error
2111  *   Pointer to error structure.
2112  *
2113  * @return
2114  *   0 on success, a negative errno value otherwise and rte_errno is set.
2115  */
2116 int
2117 mlx5_flow_validate_item_icmp(const struct rte_flow_item *item,
2118                              uint64_t item_flags,
2119                              uint8_t target_protocol,
2120                              struct rte_flow_error *error)
2121 {
2122         const struct rte_flow_item_icmp *mask = item->mask;
2123         const struct rte_flow_item_icmp nic_mask = {
2124                 .hdr.icmp_type = 0xff,
2125                 .hdr.icmp_code = 0xff,
2126                 .hdr.icmp_ident = RTE_BE16(0xffff),
2127                 .hdr.icmp_seq_nb = RTE_BE16(0xffff),
2128         };
2129         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2130         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
2131                                       MLX5_FLOW_LAYER_OUTER_L3_IPV4;
2132         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2133                                       MLX5_FLOW_LAYER_OUTER_L4;
2134         int ret;
2135
2136         if (target_protocol != 0xFF && target_protocol != IPPROTO_ICMP)
2137                 return rte_flow_error_set(error, EINVAL,
2138                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2139                                           "protocol filtering not compatible"
2140                                           " with ICMP layer");
2141         if (!(item_flags & l3m))
2142                 return rte_flow_error_set(error, EINVAL,
2143                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2144                                           "IPv4 is mandatory to filter"
2145                                           " on ICMP");
2146         if (item_flags & l4m)
2147                 return rte_flow_error_set(error, EINVAL,
2148                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2149                                           "multiple L4 layers not supported");
2150         if (!mask)
2151                 mask = &nic_mask;
2152         ret = mlx5_flow_item_acceptable
2153                 (item, (const uint8_t *)mask,
2154                  (const uint8_t *)&nic_mask,
2155                  sizeof(struct rte_flow_item_icmp),
2156                  MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2157         if (ret < 0)
2158                 return ret;
2159         return 0;
2160 }
2161
2162 /**
2163  * Validate Ethernet item.
2164  *
2165  * @param[in] item
2166  *   Item specification.
2167  * @param[in] item_flags
2168  *   Bit-fields that holds the items detected until now.
2169  * @param[out] error
2170  *   Pointer to error structure.
2171  *
2172  * @return
2173  *   0 on success, a negative errno value otherwise and rte_errno is set.
2174  */
2175 int
2176 mlx5_flow_validate_item_eth(const struct rte_flow_item *item,
2177                             uint64_t item_flags, bool ext_vlan_sup,
2178                             struct rte_flow_error *error)
2179 {
2180         const struct rte_flow_item_eth *mask = item->mask;
2181         const struct rte_flow_item_eth nic_mask = {
2182                 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
2183                 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
2184                 .type = RTE_BE16(0xffff),
2185                 .has_vlan = ext_vlan_sup ? 1 : 0,
2186         };
2187         int ret;
2188         int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2189         const uint64_t ethm = tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
2190                                        MLX5_FLOW_LAYER_OUTER_L2;
2191
2192         if (item_flags & ethm)
2193                 return rte_flow_error_set(error, ENOTSUP,
2194                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2195                                           "multiple L2 layers not supported");
2196         if ((!tunnel && (item_flags & MLX5_FLOW_LAYER_OUTER_L3)) ||
2197             (tunnel && (item_flags & MLX5_FLOW_LAYER_INNER_L3)))
2198                 return rte_flow_error_set(error, EINVAL,
2199                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2200                                           "L2 layer should not follow "
2201                                           "L3 layers");
2202         if ((!tunnel && (item_flags & MLX5_FLOW_LAYER_OUTER_VLAN)) ||
2203             (tunnel && (item_flags & MLX5_FLOW_LAYER_INNER_VLAN)))
2204                 return rte_flow_error_set(error, EINVAL,
2205                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2206                                           "L2 layer should not follow VLAN");
2207         if (item_flags & MLX5_FLOW_LAYER_GTP)
2208                 return rte_flow_error_set(error, EINVAL,
2209                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2210                                           "L2 layer should not follow GTP");
2211         if (!mask)
2212                 mask = &rte_flow_item_eth_mask;
2213         ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
2214                                         (const uint8_t *)&nic_mask,
2215                                         sizeof(struct rte_flow_item_eth),
2216                                         MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2217         return ret;
2218 }
2219
2220 /**
2221  * Validate VLAN item.
2222  *
2223  * @param[in] item
2224  *   Item specification.
2225  * @param[in] item_flags
2226  *   Bit-fields that holds the items detected until now.
2227  * @param[in] dev
2228  *   Ethernet device flow is being created on.
2229  * @param[out] error
2230  *   Pointer to error structure.
2231  *
2232  * @return
2233  *   0 on success, a negative errno value otherwise and rte_errno is set.
2234  */
2235 int
2236 mlx5_flow_validate_item_vlan(const struct rte_flow_item *item,
2237                              uint64_t item_flags,
2238                              struct rte_eth_dev *dev,
2239                              struct rte_flow_error *error)
2240 {
2241         const struct rte_flow_item_vlan *spec = item->spec;
2242         const struct rte_flow_item_vlan *mask = item->mask;
2243         const struct rte_flow_item_vlan nic_mask = {
2244                 .tci = RTE_BE16(UINT16_MAX),
2245                 .inner_type = RTE_BE16(UINT16_MAX),
2246         };
2247         uint16_t vlan_tag = 0;
2248         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2249         int ret;
2250         const uint64_t l34m = tunnel ? (MLX5_FLOW_LAYER_INNER_L3 |
2251                                         MLX5_FLOW_LAYER_INNER_L4) :
2252                                        (MLX5_FLOW_LAYER_OUTER_L3 |
2253                                         MLX5_FLOW_LAYER_OUTER_L4);
2254         const uint64_t vlanm = tunnel ? MLX5_FLOW_LAYER_INNER_VLAN :
2255                                         MLX5_FLOW_LAYER_OUTER_VLAN;
2256
2257         if (item_flags & vlanm)
2258                 return rte_flow_error_set(error, EINVAL,
2259                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2260                                           "multiple VLAN layers not supported");
2261         else if ((item_flags & l34m) != 0)
2262                 return rte_flow_error_set(error, EINVAL,
2263                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2264                                           "VLAN cannot follow L3/L4 layer");
2265         if (!mask)
2266                 mask = &rte_flow_item_vlan_mask;
2267         ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
2268                                         (const uint8_t *)&nic_mask,
2269                                         sizeof(struct rte_flow_item_vlan),
2270                                         MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2271         if (ret)
2272                 return ret;
2273         if (!tunnel && mask->tci != RTE_BE16(0x0fff)) {
2274                 struct mlx5_priv *priv = dev->data->dev_private;
2275
2276                 if (priv->vmwa_context) {
2277                         /*
2278                          * Non-NULL context means we have a virtual machine
2279                          * and SR-IOV enabled, we have to create VLAN interface
2280                          * to make hypervisor to setup E-Switch vport
2281                          * context correctly. We avoid creating the multiple
2282                          * VLAN interfaces, so we cannot support VLAN tag mask.
2283                          */
2284                         return rte_flow_error_set(error, EINVAL,
2285                                                   RTE_FLOW_ERROR_TYPE_ITEM,
2286                                                   item,
2287                                                   "VLAN tag mask is not"
2288                                                   " supported in virtual"
2289                                                   " environment");
2290                 }
2291         }
2292         if (spec) {
2293                 vlan_tag = spec->tci;
2294                 vlan_tag &= mask->tci;
2295         }
2296         /*
2297          * From verbs perspective an empty VLAN is equivalent
2298          * to a packet without VLAN layer.
2299          */
2300         if (!vlan_tag)
2301                 return rte_flow_error_set(error, EINVAL,
2302                                           RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
2303                                           item->spec,
2304                                           "VLAN cannot be empty");
2305         return 0;
2306 }
2307
2308 /**
2309  * Validate IPV4 item.
2310  *
2311  * @param[in] item
2312  *   Item specification.
2313  * @param[in] item_flags
2314  *   Bit-fields that holds the items detected until now.
2315  * @param[in] last_item
2316  *   Previous validated item in the pattern items.
2317  * @param[in] ether_type
2318  *   Type in the ethernet layer header (including dot1q).
2319  * @param[in] acc_mask
2320  *   Acceptable mask, if NULL default internal default mask
2321  *   will be used to check whether item fields are supported.
2322  * @param[in] range_accepted
2323  *   True if range of values is accepted for specific fields, false otherwise.
2324  * @param[out] error
2325  *   Pointer to error structure.
2326  *
2327  * @return
2328  *   0 on success, a negative errno value otherwise and rte_errno is set.
2329  */
2330 int
2331 mlx5_flow_validate_item_ipv4(const struct rte_flow_item *item,
2332                              uint64_t item_flags,
2333                              uint64_t last_item,
2334                              uint16_t ether_type,
2335                              const struct rte_flow_item_ipv4 *acc_mask,
2336                              bool range_accepted,
2337                              struct rte_flow_error *error)
2338 {
2339         const struct rte_flow_item_ipv4 *mask = item->mask;
2340         const struct rte_flow_item_ipv4 *spec = item->spec;
2341         const struct rte_flow_item_ipv4 nic_mask = {
2342                 .hdr = {
2343                         .src_addr = RTE_BE32(0xffffffff),
2344                         .dst_addr = RTE_BE32(0xffffffff),
2345                         .type_of_service = 0xff,
2346                         .next_proto_id = 0xff,
2347                 },
2348         };
2349         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2350         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
2351                                       MLX5_FLOW_LAYER_OUTER_L3;
2352         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2353                                       MLX5_FLOW_LAYER_OUTER_L4;
2354         int ret;
2355         uint8_t next_proto = 0xFF;
2356         const uint64_t l2_vlan = (MLX5_FLOW_LAYER_L2 |
2357                                   MLX5_FLOW_LAYER_OUTER_VLAN |
2358                                   MLX5_FLOW_LAYER_INNER_VLAN);
2359
2360         if ((last_item & l2_vlan) && ether_type &&
2361             ether_type != RTE_ETHER_TYPE_IPV4)
2362                 return rte_flow_error_set(error, EINVAL,
2363                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2364                                           "IPv4 cannot follow L2/VLAN layer "
2365                                           "which ether type is not IPv4");
2366         if (item_flags & MLX5_FLOW_LAYER_TUNNEL) {
2367                 if (mask && spec)
2368                         next_proto = mask->hdr.next_proto_id &
2369                                      spec->hdr.next_proto_id;
2370                 if (next_proto == IPPROTO_IPIP || next_proto == IPPROTO_IPV6)
2371                         return rte_flow_error_set(error, EINVAL,
2372                                                   RTE_FLOW_ERROR_TYPE_ITEM,
2373                                                   item,
2374                                                   "multiple tunnel "
2375                                                   "not supported");
2376         }
2377         if (item_flags & MLX5_FLOW_LAYER_IPV6_ENCAP)
2378                 return rte_flow_error_set(error, EINVAL,
2379                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2380                                           "wrong tunnel type - IPv6 specified "
2381                                           "but IPv4 item provided");
2382         if (item_flags & l3m)
2383                 return rte_flow_error_set(error, ENOTSUP,
2384                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2385                                           "multiple L3 layers not supported");
2386         else if (item_flags & l4m)
2387                 return rte_flow_error_set(error, EINVAL,
2388                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2389                                           "L3 cannot follow an L4 layer.");
2390         else if ((item_flags & MLX5_FLOW_LAYER_NVGRE) &&
2391                   !(item_flags & MLX5_FLOW_LAYER_INNER_L2))
2392                 return rte_flow_error_set(error, EINVAL,
2393                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2394                                           "L3 cannot follow an NVGRE layer.");
2395         if (!mask)
2396                 mask = &rte_flow_item_ipv4_mask;
2397         else if (mask->hdr.next_proto_id != 0 &&
2398                  mask->hdr.next_proto_id != 0xff)
2399                 return rte_flow_error_set(error, EINVAL,
2400                                           RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
2401                                           "partial mask is not supported"
2402                                           " for protocol");
2403         ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
2404                                         acc_mask ? (const uint8_t *)acc_mask
2405                                                  : (const uint8_t *)&nic_mask,
2406                                         sizeof(struct rte_flow_item_ipv4),
2407                                         range_accepted, error);
2408         if (ret < 0)
2409                 return ret;
2410         return 0;
2411 }
2412
2413 /**
2414  * Validate IPV6 item.
2415  *
2416  * @param[in] item
2417  *   Item specification.
2418  * @param[in] item_flags
2419  *   Bit-fields that holds the items detected until now.
2420  * @param[in] last_item
2421  *   Previous validated item in the pattern items.
2422  * @param[in] ether_type
2423  *   Type in the ethernet layer header (including dot1q).
2424  * @param[in] acc_mask
2425  *   Acceptable mask, if NULL default internal default mask
2426  *   will be used to check whether item fields are supported.
2427  * @param[out] error
2428  *   Pointer to error structure.
2429  *
2430  * @return
2431  *   0 on success, a negative errno value otherwise and rte_errno is set.
2432  */
2433 int
2434 mlx5_flow_validate_item_ipv6(const struct rte_flow_item *item,
2435                              uint64_t item_flags,
2436                              uint64_t last_item,
2437                              uint16_t ether_type,
2438                              const struct rte_flow_item_ipv6 *acc_mask,
2439                              struct rte_flow_error *error)
2440 {
2441         const struct rte_flow_item_ipv6 *mask = item->mask;
2442         const struct rte_flow_item_ipv6 *spec = item->spec;
2443         const struct rte_flow_item_ipv6 nic_mask = {
2444                 .hdr = {
2445                         .src_addr =
2446                                 "\xff\xff\xff\xff\xff\xff\xff\xff"
2447                                 "\xff\xff\xff\xff\xff\xff\xff\xff",
2448                         .dst_addr =
2449                                 "\xff\xff\xff\xff\xff\xff\xff\xff"
2450                                 "\xff\xff\xff\xff\xff\xff\xff\xff",
2451                         .vtc_flow = RTE_BE32(0xffffffff),
2452                         .proto = 0xff,
2453                 },
2454         };
2455         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2456         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
2457                                       MLX5_FLOW_LAYER_OUTER_L3;
2458         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2459                                       MLX5_FLOW_LAYER_OUTER_L4;
2460         int ret;
2461         uint8_t next_proto = 0xFF;
2462         const uint64_t l2_vlan = (MLX5_FLOW_LAYER_L2 |
2463                                   MLX5_FLOW_LAYER_OUTER_VLAN |
2464                                   MLX5_FLOW_LAYER_INNER_VLAN);
2465
2466         if ((last_item & l2_vlan) && ether_type &&
2467             ether_type != RTE_ETHER_TYPE_IPV6)
2468                 return rte_flow_error_set(error, EINVAL,
2469                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2470                                           "IPv6 cannot follow L2/VLAN layer "
2471                                           "which ether type is not IPv6");
2472         if (mask && mask->hdr.proto == UINT8_MAX && spec)
2473                 next_proto = spec->hdr.proto;
2474         if (item_flags & MLX5_FLOW_LAYER_TUNNEL) {
2475                 if (next_proto == IPPROTO_IPIP || next_proto == IPPROTO_IPV6)
2476                         return rte_flow_error_set(error, EINVAL,
2477                                                   RTE_FLOW_ERROR_TYPE_ITEM,
2478                                                   item,
2479                                                   "multiple tunnel "
2480                                                   "not supported");
2481         }
2482         if (next_proto == IPPROTO_HOPOPTS  ||
2483             next_proto == IPPROTO_ROUTING  ||
2484             next_proto == IPPROTO_FRAGMENT ||
2485             next_proto == IPPROTO_ESP      ||
2486             next_proto == IPPROTO_AH       ||
2487             next_proto == IPPROTO_DSTOPTS)
2488                 return rte_flow_error_set(error, EINVAL,
2489                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2490                                           "IPv6 proto (next header) should "
2491                                           "not be set as extension header");
2492         if (item_flags & MLX5_FLOW_LAYER_IPIP)
2493                 return rte_flow_error_set(error, EINVAL,
2494                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2495                                           "wrong tunnel type - IPv4 specified "
2496                                           "but IPv6 item provided");
2497         if (item_flags & l3m)
2498                 return rte_flow_error_set(error, ENOTSUP,
2499                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2500                                           "multiple L3 layers not supported");
2501         else if (item_flags & l4m)
2502                 return rte_flow_error_set(error, EINVAL,
2503                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2504                                           "L3 cannot follow an L4 layer.");
2505         else if ((item_flags & MLX5_FLOW_LAYER_NVGRE) &&
2506                   !(item_flags & MLX5_FLOW_LAYER_INNER_L2))
2507                 return rte_flow_error_set(error, EINVAL,
2508                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2509                                           "L3 cannot follow an NVGRE layer.");
2510         if (!mask)
2511                 mask = &rte_flow_item_ipv6_mask;
2512         ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
2513                                         acc_mask ? (const uint8_t *)acc_mask
2514                                                  : (const uint8_t *)&nic_mask,
2515                                         sizeof(struct rte_flow_item_ipv6),
2516                                         MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2517         if (ret < 0)
2518                 return ret;
2519         return 0;
2520 }
2521
2522 /**
2523  * Validate UDP item.
2524  *
2525  * @param[in] item
2526  *   Item specification.
2527  * @param[in] item_flags
2528  *   Bit-fields that holds the items detected until now.
2529  * @param[in] target_protocol
2530  *   The next protocol in the previous item.
2531  * @param[in] flow_mask
2532  *   mlx5 flow-specific (DV, verbs, etc.) supported header fields mask.
2533  * @param[out] error
2534  *   Pointer to error structure.
2535  *
2536  * @return
2537  *   0 on success, a negative errno value otherwise and rte_errno is set.
2538  */
2539 int
2540 mlx5_flow_validate_item_udp(const struct rte_flow_item *item,
2541                             uint64_t item_flags,
2542                             uint8_t target_protocol,
2543                             struct rte_flow_error *error)
2544 {
2545         const struct rte_flow_item_udp *mask = item->mask;
2546         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2547         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
2548                                       MLX5_FLOW_LAYER_OUTER_L3;
2549         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2550                                       MLX5_FLOW_LAYER_OUTER_L4;
2551         int ret;
2552
2553         if (target_protocol != 0xff && target_protocol != IPPROTO_UDP)
2554                 return rte_flow_error_set(error, EINVAL,
2555                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2556                                           "protocol filtering not compatible"
2557                                           " with UDP layer");
2558         if (!(item_flags & l3m))
2559                 return rte_flow_error_set(error, EINVAL,
2560                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2561                                           "L3 is mandatory to filter on L4");
2562         if (item_flags & l4m)
2563                 return rte_flow_error_set(error, EINVAL,
2564                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2565                                           "multiple L4 layers not supported");
2566         if (!mask)
2567                 mask = &rte_flow_item_udp_mask;
2568         ret = mlx5_flow_item_acceptable
2569                 (item, (const uint8_t *)mask,
2570                  (const uint8_t *)&rte_flow_item_udp_mask,
2571                  sizeof(struct rte_flow_item_udp), MLX5_ITEM_RANGE_NOT_ACCEPTED,
2572                  error);
2573         if (ret < 0)
2574                 return ret;
2575         return 0;
2576 }
2577
2578 /**
2579  * Validate TCP item.
2580  *
2581  * @param[in] item
2582  *   Item specification.
2583  * @param[in] item_flags
2584  *   Bit-fields that holds the items detected until now.
2585  * @param[in] target_protocol
2586  *   The next protocol in the previous item.
2587  * @param[out] error
2588  *   Pointer to error structure.
2589  *
2590  * @return
2591  *   0 on success, a negative errno value otherwise and rte_errno is set.
2592  */
2593 int
2594 mlx5_flow_validate_item_tcp(const struct rte_flow_item *item,
2595                             uint64_t item_flags,
2596                             uint8_t target_protocol,
2597                             const struct rte_flow_item_tcp *flow_mask,
2598                             struct rte_flow_error *error)
2599 {
2600         const struct rte_flow_item_tcp *mask = item->mask;
2601         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2602         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
2603                                       MLX5_FLOW_LAYER_OUTER_L3;
2604         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2605                                       MLX5_FLOW_LAYER_OUTER_L4;
2606         int ret;
2607
2608         MLX5_ASSERT(flow_mask);
2609         if (target_protocol != 0xff && target_protocol != IPPROTO_TCP)
2610                 return rte_flow_error_set(error, EINVAL,
2611                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2612                                           "protocol filtering not compatible"
2613                                           " with TCP layer");
2614         if (!(item_flags & l3m))
2615                 return rte_flow_error_set(error, EINVAL,
2616                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2617                                           "L3 is mandatory to filter on L4");
2618         if (item_flags & l4m)
2619                 return rte_flow_error_set(error, EINVAL,
2620                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2621                                           "multiple L4 layers not supported");
2622         if (!mask)
2623                 mask = &rte_flow_item_tcp_mask;
2624         ret = mlx5_flow_item_acceptable
2625                 (item, (const uint8_t *)mask,
2626                  (const uint8_t *)flow_mask,
2627                  sizeof(struct rte_flow_item_tcp), MLX5_ITEM_RANGE_NOT_ACCEPTED,
2628                  error);
2629         if (ret < 0)
2630                 return ret;
2631         return 0;
2632 }
2633
2634 /**
2635  * Validate VXLAN item.
2636  *
2637  * @param[in] dev
2638  *   Pointer to the Ethernet device structure.
2639  * @param[in] udp_dport
2640  *   UDP destination port
2641  * @param[in] item
2642  *   Item specification.
2643  * @param[in] item_flags
2644  *   Bit-fields that holds the items detected until now.
2645  * @param[in] attr
2646  *   Flow rule attributes.
2647  * @param[out] error
2648  *   Pointer to error structure.
2649  *
2650  * @return
2651  *   0 on success, a negative errno value otherwise and rte_errno is set.
2652  */
2653 int
2654 mlx5_flow_validate_item_vxlan(struct rte_eth_dev *dev,
2655                               uint16_t udp_dport,
2656                               const struct rte_flow_item *item,
2657                               uint64_t item_flags,
2658                               const struct rte_flow_attr *attr,
2659                               struct rte_flow_error *error)
2660 {
2661         const struct rte_flow_item_vxlan *spec = item->spec;
2662         const struct rte_flow_item_vxlan *mask = item->mask;
2663         int ret;
2664         struct mlx5_priv *priv = dev->data->dev_private;
2665         union vni {
2666                 uint32_t vlan_id;
2667                 uint8_t vni[4];
2668         } id = { .vlan_id = 0, };
2669         const struct rte_flow_item_vxlan nic_mask = {
2670                 .vni = "\xff\xff\xff",
2671                 .rsvd1 = 0xff,
2672         };
2673         const struct rte_flow_item_vxlan *valid_mask;
2674
2675         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2676                 return rte_flow_error_set(error, ENOTSUP,
2677                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2678                                           "multiple tunnel layers not"
2679                                           " supported");
2680         valid_mask = &rte_flow_item_vxlan_mask;
2681         /*
2682          * Verify only UDPv4 is present as defined in
2683          * https://tools.ietf.org/html/rfc7348
2684          */
2685         if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
2686                 return rte_flow_error_set(error, EINVAL,
2687                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2688                                           "no outer UDP layer found");
2689         if (!mask)
2690                 mask = &rte_flow_item_vxlan_mask;
2691
2692         if (priv->sh->steering_format_version !=
2693             MLX5_STEERING_LOGIC_FORMAT_CONNECTX_5 ||
2694             !udp_dport || udp_dport == MLX5_UDP_PORT_VXLAN) {
2695                 /* FDB domain & NIC domain non-zero group */
2696                 if ((attr->transfer || attr->group) && priv->sh->misc5_cap)
2697                         valid_mask = &nic_mask;
2698                 /* Group zero in NIC domain */
2699                 if (!attr->group && !attr->transfer &&
2700                     priv->sh->tunnel_header_0_1)
2701                         valid_mask = &nic_mask;
2702         }
2703         ret = mlx5_flow_item_acceptable
2704                 (item, (const uint8_t *)mask,
2705                  (const uint8_t *)valid_mask,
2706                  sizeof(struct rte_flow_item_vxlan),
2707                  MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2708         if (ret < 0)
2709                 return ret;
2710         if (spec) {
2711                 memcpy(&id.vni[1], spec->vni, 3);
2712                 memcpy(&id.vni[1], mask->vni, 3);
2713         }
2714         if (!(item_flags & MLX5_FLOW_LAYER_OUTER))
2715                 return rte_flow_error_set(error, ENOTSUP,
2716                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2717                                           "VXLAN tunnel must be fully defined");
2718         return 0;
2719 }
2720
2721 /**
2722  * Validate VXLAN_GPE item.
2723  *
2724  * @param[in] item
2725  *   Item specification.
2726  * @param[in] item_flags
2727  *   Bit-fields that holds the items detected until now.
2728  * @param[in] priv
2729  *   Pointer to the private data structure.
2730  * @param[in] target_protocol
2731  *   The next protocol in the previous item.
2732  * @param[out] error
2733  *   Pointer to error structure.
2734  *
2735  * @return
2736  *   0 on success, a negative errno value otherwise and rte_errno is set.
2737  */
2738 int
2739 mlx5_flow_validate_item_vxlan_gpe(const struct rte_flow_item *item,
2740                                   uint64_t item_flags,
2741                                   struct rte_eth_dev *dev,
2742                                   struct rte_flow_error *error)
2743 {
2744         struct mlx5_priv *priv = dev->data->dev_private;
2745         const struct rte_flow_item_vxlan_gpe *spec = item->spec;
2746         const struct rte_flow_item_vxlan_gpe *mask = item->mask;
2747         int ret;
2748         union vni {
2749                 uint32_t vlan_id;
2750                 uint8_t vni[4];
2751         } id = { .vlan_id = 0, };
2752
2753         if (!priv->sh->config.l3_vxlan_en)
2754                 return rte_flow_error_set(error, ENOTSUP,
2755                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2756                                           "L3 VXLAN is not enabled by device"
2757                                           " parameter and/or not configured in"
2758                                           " firmware");
2759         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2760                 return rte_flow_error_set(error, ENOTSUP,
2761                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2762                                           "multiple tunnel layers not"
2763                                           " supported");
2764         /*
2765          * Verify only UDPv4 is present as defined in
2766          * https://tools.ietf.org/html/rfc7348
2767          */
2768         if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
2769                 return rte_flow_error_set(error, EINVAL,
2770                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2771                                           "no outer UDP layer found");
2772         if (!mask)
2773                 mask = &rte_flow_item_vxlan_gpe_mask;
2774         ret = mlx5_flow_item_acceptable
2775                 (item, (const uint8_t *)mask,
2776                  (const uint8_t *)&rte_flow_item_vxlan_gpe_mask,
2777                  sizeof(struct rte_flow_item_vxlan_gpe),
2778                  MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2779         if (ret < 0)
2780                 return ret;
2781         if (spec) {
2782                 if (spec->protocol)
2783                         return rte_flow_error_set(error, ENOTSUP,
2784                                                   RTE_FLOW_ERROR_TYPE_ITEM,
2785                                                   item,
2786                                                   "VxLAN-GPE protocol"
2787                                                   " not supported");
2788                 memcpy(&id.vni[1], spec->vni, 3);
2789                 memcpy(&id.vni[1], mask->vni, 3);
2790         }
2791         if (!(item_flags & MLX5_FLOW_LAYER_OUTER))
2792                 return rte_flow_error_set(error, ENOTSUP,
2793                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2794                                           "VXLAN-GPE tunnel must be fully"
2795                                           " defined");
2796         return 0;
2797 }
2798 /**
2799  * Validate GRE Key item.
2800  *
2801  * @param[in] item
2802  *   Item specification.
2803  * @param[in] item_flags
2804  *   Bit flags to mark detected items.
2805  * @param[in] gre_item
2806  *   Pointer to gre_item
2807  * @param[out] error
2808  *   Pointer to error structure.
2809  *
2810  * @return
2811  *   0 on success, a negative errno value otherwise and rte_errno is set.
2812  */
2813 int
2814 mlx5_flow_validate_item_gre_key(const struct rte_flow_item *item,
2815                                 uint64_t item_flags,
2816                                 const struct rte_flow_item *gre_item,
2817                                 struct rte_flow_error *error)
2818 {
2819         const rte_be32_t *mask = item->mask;
2820         int ret = 0;
2821         rte_be32_t gre_key_default_mask = RTE_BE32(UINT32_MAX);
2822         const struct rte_flow_item_gre *gre_spec;
2823         const struct rte_flow_item_gre *gre_mask;
2824
2825         if (item_flags & MLX5_FLOW_LAYER_GRE_KEY)
2826                 return rte_flow_error_set(error, ENOTSUP,
2827                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2828                                           "Multiple GRE key not support");
2829         if (!(item_flags & MLX5_FLOW_LAYER_GRE))
2830                 return rte_flow_error_set(error, ENOTSUP,
2831                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2832                                           "No preceding GRE header");
2833         if (item_flags & MLX5_FLOW_LAYER_INNER)
2834                 return rte_flow_error_set(error, ENOTSUP,
2835                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2836                                           "GRE key following a wrong item");
2837         gre_mask = gre_item->mask;
2838         if (!gre_mask)
2839                 gre_mask = &rte_flow_item_gre_mask;
2840         gre_spec = gre_item->spec;
2841         if (gre_spec && (gre_mask->c_rsvd0_ver & RTE_BE16(0x2000)) &&
2842                          !(gre_spec->c_rsvd0_ver & RTE_BE16(0x2000)))
2843                 return rte_flow_error_set(error, EINVAL,
2844                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2845                                           "Key bit must be on");
2846
2847         if (!mask)
2848                 mask = &gre_key_default_mask;
2849         ret = mlx5_flow_item_acceptable
2850                 (item, (const uint8_t *)mask,
2851                  (const uint8_t *)&gre_key_default_mask,
2852                  sizeof(rte_be32_t), MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2853         return ret;
2854 }
2855
2856 /**
2857  * Validate GRE item.
2858  *
2859  * @param[in] item
2860  *   Item specification.
2861  * @param[in] item_flags
2862  *   Bit flags to mark detected items.
2863  * @param[in] target_protocol
2864  *   The next protocol in the previous item.
2865  * @param[out] error
2866  *   Pointer to error structure.
2867  *
2868  * @return
2869  *   0 on success, a negative errno value otherwise and rte_errno is set.
2870  */
2871 int
2872 mlx5_flow_validate_item_gre(const struct rte_flow_item *item,
2873                             uint64_t item_flags,
2874                             uint8_t target_protocol,
2875                             struct rte_flow_error *error)
2876 {
2877         const struct rte_flow_item_gre *spec __rte_unused = item->spec;
2878         const struct rte_flow_item_gre *mask = item->mask;
2879         int ret;
2880         const struct rte_flow_item_gre nic_mask = {
2881                 .c_rsvd0_ver = RTE_BE16(0xB000),
2882                 .protocol = RTE_BE16(UINT16_MAX),
2883         };
2884
2885         if (target_protocol != 0xff && target_protocol != IPPROTO_GRE)
2886                 return rte_flow_error_set(error, EINVAL,
2887                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2888                                           "protocol filtering not compatible"
2889                                           " with this GRE layer");
2890         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2891                 return rte_flow_error_set(error, ENOTSUP,
2892                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2893                                           "multiple tunnel layers not"
2894                                           " supported");
2895         if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3))
2896                 return rte_flow_error_set(error, ENOTSUP,
2897                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2898                                           "L3 Layer is missing");
2899         if (!mask)
2900                 mask = &rte_flow_item_gre_mask;
2901         ret = mlx5_flow_item_acceptable
2902                 (item, (const uint8_t *)mask,
2903                  (const uint8_t *)&nic_mask,
2904                  sizeof(struct rte_flow_item_gre), MLX5_ITEM_RANGE_NOT_ACCEPTED,
2905                  error);
2906         if (ret < 0)
2907                 return ret;
2908 #ifndef HAVE_MLX5DV_DR
2909 #ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT
2910         if (spec && (spec->protocol & mask->protocol))
2911                 return rte_flow_error_set(error, ENOTSUP,
2912                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2913                                           "without MPLS support the"
2914                                           " specification cannot be used for"
2915                                           " filtering");
2916 #endif
2917 #endif
2918         return 0;
2919 }
2920
2921 /**
2922  * Validate Geneve item.
2923  *
2924  * @param[in] item
2925  *   Item specification.
2926  * @param[in] itemFlags
2927  *   Bit-fields that holds the items detected until now.
2928  * @param[in] enPriv
2929  *   Pointer to the private data structure.
2930  * @param[out] error
2931  *   Pointer to error structure.
2932  *
2933  * @return
2934  *   0 on success, a negative errno value otherwise and rte_errno is set.
2935  */
2936
2937 int
2938 mlx5_flow_validate_item_geneve(const struct rte_flow_item *item,
2939                                uint64_t item_flags,
2940                                struct rte_eth_dev *dev,
2941                                struct rte_flow_error *error)
2942 {
2943         struct mlx5_priv *priv = dev->data->dev_private;
2944         const struct rte_flow_item_geneve *spec = item->spec;
2945         const struct rte_flow_item_geneve *mask = item->mask;
2946         int ret;
2947         uint16_t gbhdr;
2948         uint8_t opt_len = priv->sh->cdev->config.hca_attr.geneve_max_opt_len ?
2949                           MLX5_GENEVE_OPT_LEN_1 : MLX5_GENEVE_OPT_LEN_0;
2950         const struct rte_flow_item_geneve nic_mask = {
2951                 .ver_opt_len_o_c_rsvd0 = RTE_BE16(0x3f80),
2952                 .vni = "\xff\xff\xff",
2953                 .protocol = RTE_BE16(UINT16_MAX),
2954         };
2955
2956         if (!priv->sh->cdev->config.hca_attr.tunnel_stateless_geneve_rx)
2957                 return rte_flow_error_set(error, ENOTSUP,
2958                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2959                                           "L3 Geneve is not enabled by device"
2960                                           " parameter and/or not configured in"
2961                                           " firmware");
2962         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2963                 return rte_flow_error_set(error, ENOTSUP,
2964                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2965                                           "multiple tunnel layers not"
2966                                           " supported");
2967         /*
2968          * Verify only UDPv4 is present as defined in
2969          * https://tools.ietf.org/html/rfc7348
2970          */
2971         if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
2972                 return rte_flow_error_set(error, EINVAL,
2973                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2974                                           "no outer UDP layer found");
2975         if (!mask)
2976                 mask = &rte_flow_item_geneve_mask;
2977         ret = mlx5_flow_item_acceptable
2978                                   (item, (const uint8_t *)mask,
2979                                    (const uint8_t *)&nic_mask,
2980                                    sizeof(struct rte_flow_item_geneve),
2981                                    MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2982         if (ret)
2983                 return ret;
2984         if (spec) {
2985                 gbhdr = rte_be_to_cpu_16(spec->ver_opt_len_o_c_rsvd0);
2986                 if (MLX5_GENEVE_VER_VAL(gbhdr) ||
2987                      MLX5_GENEVE_CRITO_VAL(gbhdr) ||
2988                      MLX5_GENEVE_RSVD_VAL(gbhdr) || spec->rsvd1)
2989                         return rte_flow_error_set(error, ENOTSUP,
2990                                                   RTE_FLOW_ERROR_TYPE_ITEM,
2991                                                   item,
2992                                                   "Geneve protocol unsupported"
2993                                                   " fields are being used");
2994                 if (MLX5_GENEVE_OPTLEN_VAL(gbhdr) > opt_len)
2995                         return rte_flow_error_set
2996                                         (error, ENOTSUP,
2997                                          RTE_FLOW_ERROR_TYPE_ITEM,
2998                                          item,
2999                                          "Unsupported Geneve options length");
3000         }
3001         if (!(item_flags & MLX5_FLOW_LAYER_OUTER))
3002                 return rte_flow_error_set
3003                                     (error, ENOTSUP,
3004                                      RTE_FLOW_ERROR_TYPE_ITEM, item,
3005                                      "Geneve tunnel must be fully defined");
3006         return 0;
3007 }
3008
3009 /**
3010  * Validate Geneve TLV option item.
3011  *
3012  * @param[in] item
3013  *   Item specification.
3014  * @param[in] last_item
3015  *   Previous validated item in the pattern items.
3016  * @param[in] geneve_item
3017  *   Previous GENEVE item specification.
3018  * @param[in] dev
3019  *   Pointer to the rte_eth_dev structure.
3020  * @param[out] error
3021  *   Pointer to error structure.
3022  *
3023  * @return
3024  *   0 on success, a negative errno value otherwise and rte_errno is set.
3025  */
3026 int
3027 mlx5_flow_validate_item_geneve_opt(const struct rte_flow_item *item,
3028                                    uint64_t last_item,
3029                                    const struct rte_flow_item *geneve_item,
3030                                    struct rte_eth_dev *dev,
3031                                    struct rte_flow_error *error)
3032 {
3033         struct mlx5_priv *priv = dev->data->dev_private;
3034         struct mlx5_dev_ctx_shared *sh = priv->sh;
3035         struct mlx5_geneve_tlv_option_resource *geneve_opt_resource;
3036         struct mlx5_hca_attr *hca_attr = &sh->cdev->config.hca_attr;
3037         uint8_t data_max_supported =
3038                         hca_attr->max_geneve_tlv_option_data_len * 4;
3039         const struct rte_flow_item_geneve *geneve_spec;
3040         const struct rte_flow_item_geneve *geneve_mask;
3041         const struct rte_flow_item_geneve_opt *spec = item->spec;
3042         const struct rte_flow_item_geneve_opt *mask = item->mask;
3043         unsigned int i;
3044         unsigned int data_len;
3045         uint8_t tlv_option_len;
3046         uint16_t optlen_m, optlen_v;
3047         const struct rte_flow_item_geneve_opt full_mask = {
3048                 .option_class = RTE_BE16(0xffff),
3049                 .option_type = 0xff,
3050                 .option_len = 0x1f,
3051         };
3052
3053         if (!mask)
3054                 mask = &rte_flow_item_geneve_opt_mask;
3055         if (!spec)
3056                 return rte_flow_error_set
3057                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
3058                         "Geneve TLV opt class/type/length must be specified");
3059         if ((uint32_t)spec->option_len > MLX5_GENEVE_OPTLEN_MASK)
3060                 return rte_flow_error_set
3061                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
3062                         "Geneve TLV opt length exceeds the limit (31)");
3063         /* Check if class type and length masks are full. */
3064         if (full_mask.option_class != mask->option_class ||
3065             full_mask.option_type != mask->option_type ||
3066             full_mask.option_len != (mask->option_len & full_mask.option_len))
3067                 return rte_flow_error_set
3068                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
3069                         "Geneve TLV opt class/type/length masks must be full");
3070         /* Check if length is supported */
3071         if ((uint32_t)spec->option_len >
3072                         hca_attr->max_geneve_tlv_option_data_len)
3073                 return rte_flow_error_set
3074                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
3075                         "Geneve TLV opt length not supported");
3076         if (hca_attr->max_geneve_tlv_options > 1)
3077                 DRV_LOG(DEBUG,
3078                         "max_geneve_tlv_options supports more than 1 option");
3079         /* Check GENEVE item preceding. */
3080         if (!geneve_item || !(last_item & MLX5_FLOW_LAYER_GENEVE))
3081                 return rte_flow_error_set
3082                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
3083                         "Geneve opt item must be preceded with Geneve item");
3084         geneve_spec = geneve_item->spec;
3085         geneve_mask = geneve_item->mask ? geneve_item->mask :
3086                                           &rte_flow_item_geneve_mask;
3087         /* Check if GENEVE TLV option size doesn't exceed option length */
3088         if (geneve_spec && (geneve_mask->ver_opt_len_o_c_rsvd0 ||
3089                             geneve_spec->ver_opt_len_o_c_rsvd0)) {
3090                 tlv_option_len = spec->option_len & mask->option_len;
3091                 optlen_v = rte_be_to_cpu_16(geneve_spec->ver_opt_len_o_c_rsvd0);
3092                 optlen_v = MLX5_GENEVE_OPTLEN_VAL(optlen_v);
3093                 optlen_m = rte_be_to_cpu_16(geneve_mask->ver_opt_len_o_c_rsvd0);
3094                 optlen_m = MLX5_GENEVE_OPTLEN_VAL(optlen_m);
3095                 if ((optlen_v & optlen_m) <= tlv_option_len)
3096                         return rte_flow_error_set
3097                                 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
3098                                  "GENEVE TLV option length exceeds optlen");
3099         }
3100         /* Check if length is 0 or data is 0. */
3101         if (spec->data == NULL || spec->option_len == 0)
3102                 return rte_flow_error_set
3103                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
3104                         "Geneve TLV opt with zero data/length not supported");
3105         /* Check not all data & mask are 0. */
3106         data_len = spec->option_len * 4;
3107         if (mask->data == NULL) {
3108                 for (i = 0; i < data_len; i++)
3109                         if (spec->data[i])
3110                                 break;
3111                 if (i == data_len)
3112                         return rte_flow_error_set(error, ENOTSUP,
3113                                 RTE_FLOW_ERROR_TYPE_ITEM, item,
3114                                 "Can't match on Geneve option data 0");
3115         } else {
3116                 for (i = 0; i < data_len; i++)
3117                         if (spec->data[i] & mask->data[i])
3118                                 break;
3119                 if (i == data_len)
3120                         return rte_flow_error_set(error, ENOTSUP,
3121                                 RTE_FLOW_ERROR_TYPE_ITEM, item,
3122                                 "Can't match on Geneve option data and mask 0");
3123                 /* Check data mask supported. */
3124                 for (i = data_max_supported; i < data_len ; i++)
3125                         if (mask->data[i])
3126                                 return rte_flow_error_set(error, ENOTSUP,
3127                                         RTE_FLOW_ERROR_TYPE_ITEM, item,
3128                                         "Data mask is of unsupported size");
3129         }
3130         /* Check GENEVE option is supported in NIC. */
3131         if (!hca_attr->geneve_tlv_opt)
3132                 return rte_flow_error_set
3133                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
3134                         "Geneve TLV opt not supported");
3135         /* Check if we already have geneve option with different type/class. */
3136         rte_spinlock_lock(&sh->geneve_tlv_opt_sl);
3137         geneve_opt_resource = sh->geneve_tlv_option_resource;
3138         if (geneve_opt_resource != NULL)
3139                 if (geneve_opt_resource->option_class != spec->option_class ||
3140                     geneve_opt_resource->option_type != spec->option_type ||
3141                     geneve_opt_resource->length != spec->option_len) {
3142                         rte_spinlock_unlock(&sh->geneve_tlv_opt_sl);
3143                         return rte_flow_error_set(error, ENOTSUP,
3144                                 RTE_FLOW_ERROR_TYPE_ITEM, item,
3145                                 "Only one Geneve TLV option supported");
3146                 }
3147         rte_spinlock_unlock(&sh->geneve_tlv_opt_sl);
3148         return 0;
3149 }
3150
3151 /**
3152  * Validate MPLS item.
3153  *
3154  * @param[in] dev
3155  *   Pointer to the rte_eth_dev structure.
3156  * @param[in] item
3157  *   Item specification.
3158  * @param[in] item_flags
3159  *   Bit-fields that holds the items detected until now.
3160  * @param[in] prev_layer
3161  *   The protocol layer indicated in previous item.
3162  * @param[out] error
3163  *   Pointer to error structure.
3164  *
3165  * @return
3166  *   0 on success, a negative errno value otherwise and rte_errno is set.
3167  */
3168 int
3169 mlx5_flow_validate_item_mpls(struct rte_eth_dev *dev __rte_unused,
3170                              const struct rte_flow_item *item __rte_unused,
3171                              uint64_t item_flags __rte_unused,
3172                              uint64_t prev_layer __rte_unused,
3173                              struct rte_flow_error *error)
3174 {
3175 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
3176         const struct rte_flow_item_mpls *mask = item->mask;
3177         struct mlx5_priv *priv = dev->data->dev_private;
3178         int ret;
3179
3180         if (!priv->sh->dev_cap.mpls_en)
3181                 return rte_flow_error_set(error, ENOTSUP,
3182                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3183                                           "MPLS not supported or"
3184                                           " disabled in firmware"
3185                                           " configuration.");
3186         /* MPLS over UDP, GRE is allowed */
3187         if (!(prev_layer & (MLX5_FLOW_LAYER_OUTER_L4_UDP |
3188                             MLX5_FLOW_LAYER_GRE |
3189                             MLX5_FLOW_LAYER_GRE_KEY)))
3190                 return rte_flow_error_set(error, EINVAL,
3191                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3192                                           "protocol filtering not compatible"
3193                                           " with MPLS layer");
3194         /* Multi-tunnel isn't allowed but MPLS over GRE is an exception. */
3195         if ((item_flags & MLX5_FLOW_LAYER_TUNNEL) &&
3196             !(item_flags & MLX5_FLOW_LAYER_GRE))
3197                 return rte_flow_error_set(error, ENOTSUP,
3198                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3199                                           "multiple tunnel layers not"
3200                                           " supported");
3201         if (!mask)
3202                 mask = &rte_flow_item_mpls_mask;
3203         ret = mlx5_flow_item_acceptable
3204                 (item, (const uint8_t *)mask,
3205                  (const uint8_t *)&rte_flow_item_mpls_mask,
3206                  sizeof(struct rte_flow_item_mpls),
3207                  MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
3208         if (ret < 0)
3209                 return ret;
3210         return 0;
3211 #else
3212         return rte_flow_error_set(error, ENOTSUP,
3213                                   RTE_FLOW_ERROR_TYPE_ITEM, item,
3214                                   "MPLS is not supported by Verbs, please"
3215                                   " update.");
3216 #endif
3217 }
3218
3219 /**
3220  * Validate NVGRE item.
3221  *
3222  * @param[in] item
3223  *   Item specification.
3224  * @param[in] item_flags
3225  *   Bit flags to mark detected items.
3226  * @param[in] target_protocol
3227  *   The next protocol in the previous item.
3228  * @param[out] error
3229  *   Pointer to error structure.
3230  *
3231  * @return
3232  *   0 on success, a negative errno value otherwise and rte_errno is set.
3233  */
3234 int
3235 mlx5_flow_validate_item_nvgre(const struct rte_flow_item *item,
3236                               uint64_t item_flags,
3237                               uint8_t target_protocol,
3238                               struct rte_flow_error *error)
3239 {
3240         const struct rte_flow_item_nvgre *mask = item->mask;
3241         int ret;
3242
3243         if (target_protocol != 0xff && target_protocol != IPPROTO_GRE)
3244                 return rte_flow_error_set(error, EINVAL,
3245                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3246                                           "protocol filtering not compatible"
3247                                           " with this GRE layer");
3248         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
3249                 return rte_flow_error_set(error, ENOTSUP,
3250                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3251                                           "multiple tunnel layers not"
3252                                           " supported");
3253         if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3))
3254                 return rte_flow_error_set(error, ENOTSUP,
3255                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3256                                           "L3 Layer is missing");
3257         if (!mask)
3258                 mask = &rte_flow_item_nvgre_mask;
3259         ret = mlx5_flow_item_acceptable
3260                 (item, (const uint8_t *)mask,
3261                  (const uint8_t *)&rte_flow_item_nvgre_mask,
3262                  sizeof(struct rte_flow_item_nvgre),
3263                  MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
3264         if (ret < 0)
3265                 return ret;
3266         return 0;
3267 }
3268
3269 /**
3270  * Validate eCPRI item.
3271  *
3272  * @param[in] item
3273  *   Item specification.
3274  * @param[in] item_flags
3275  *   Bit-fields that holds the items detected until now.
3276  * @param[in] last_item
3277  *   Previous validated item in the pattern items.
3278  * @param[in] ether_type
3279  *   Type in the ethernet layer header (including dot1q).
3280  * @param[in] acc_mask
3281  *   Acceptable mask, if NULL default internal default mask
3282  *   will be used to check whether item fields are supported.
3283  * @param[out] error
3284  *   Pointer to error structure.
3285  *
3286  * @return
3287  *   0 on success, a negative errno value otherwise and rte_errno is set.
3288  */
3289 int
3290 mlx5_flow_validate_item_ecpri(const struct rte_flow_item *item,
3291                               uint64_t item_flags,
3292                               uint64_t last_item,
3293                               uint16_t ether_type,
3294                               const struct rte_flow_item_ecpri *acc_mask,
3295                               struct rte_flow_error *error)
3296 {
3297         const struct rte_flow_item_ecpri *mask = item->mask;
3298         const struct rte_flow_item_ecpri nic_mask = {
3299                 .hdr = {
3300                         .common = {
3301                                 .u32 =
3302                                 RTE_BE32(((const struct rte_ecpri_common_hdr) {
3303                                         .type = 0xFF,
3304                                         }).u32),
3305                         },
3306                         .dummy[0] = 0xFFFFFFFF,
3307                 },
3308         };
3309         const uint64_t outer_l2_vlan = (MLX5_FLOW_LAYER_OUTER_L2 |
3310                                         MLX5_FLOW_LAYER_OUTER_VLAN);
3311         struct rte_flow_item_ecpri mask_lo;
3312
3313         if (!(last_item & outer_l2_vlan) &&
3314             last_item != MLX5_FLOW_LAYER_OUTER_L4_UDP)
3315                 return rte_flow_error_set(error, EINVAL,
3316                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3317                                           "eCPRI can only follow L2/VLAN layer or UDP layer");
3318         if ((last_item & outer_l2_vlan) && ether_type &&
3319             ether_type != RTE_ETHER_TYPE_ECPRI)
3320                 return rte_flow_error_set(error, EINVAL,
3321                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3322                                           "eCPRI cannot follow L2/VLAN layer which ether type is not 0xAEFE");
3323         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
3324                 return rte_flow_error_set(error, EINVAL,
3325                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3326                                           "eCPRI with tunnel is not supported right now");
3327         if (item_flags & MLX5_FLOW_LAYER_OUTER_L3)
3328                 return rte_flow_error_set(error, ENOTSUP,
3329                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3330                                           "multiple L3 layers not supported");
3331         else if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_TCP)
3332                 return rte_flow_error_set(error, EINVAL,
3333                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3334                                           "eCPRI cannot coexist with a TCP layer");
3335         /* In specification, eCPRI could be over UDP layer. */
3336         else if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)
3337                 return rte_flow_error_set(error, EINVAL,
3338                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3339                                           "eCPRI over UDP layer is not yet supported right now");
3340         /* Mask for type field in common header could be zero. */
3341         if (!mask)
3342                 mask = &rte_flow_item_ecpri_mask;
3343         mask_lo.hdr.common.u32 = rte_be_to_cpu_32(mask->hdr.common.u32);
3344         /* Input mask is in big-endian format. */
3345         if (mask_lo.hdr.common.type != 0 && mask_lo.hdr.common.type != 0xff)
3346                 return rte_flow_error_set(error, EINVAL,
3347                                           RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
3348                                           "partial mask is not supported for protocol");
3349         else if (mask_lo.hdr.common.type == 0 && mask->hdr.dummy[0] != 0)
3350                 return rte_flow_error_set(error, EINVAL,
3351                                           RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
3352                                           "message header mask must be after a type mask");
3353         return mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
3354                                          acc_mask ? (const uint8_t *)acc_mask
3355                                                   : (const uint8_t *)&nic_mask,
3356                                          sizeof(struct rte_flow_item_ecpri),
3357                                          MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
3358 }
3359
3360 static int
3361 flow_null_validate(struct rte_eth_dev *dev __rte_unused,
3362                    const struct rte_flow_attr *attr __rte_unused,
3363                    const struct rte_flow_item items[] __rte_unused,
3364                    const struct rte_flow_action actions[] __rte_unused,
3365                    bool external __rte_unused,
3366                    int hairpin __rte_unused,
3367                    struct rte_flow_error *error)
3368 {
3369         return rte_flow_error_set(error, ENOTSUP,
3370                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
3371 }
3372
3373 static struct mlx5_flow *
3374 flow_null_prepare(struct rte_eth_dev *dev __rte_unused,
3375                   const struct rte_flow_attr *attr __rte_unused,
3376                   const struct rte_flow_item items[] __rte_unused,
3377                   const struct rte_flow_action actions[] __rte_unused,
3378                   struct rte_flow_error *error)
3379 {
3380         rte_flow_error_set(error, ENOTSUP,
3381                            RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
3382         return NULL;
3383 }
3384
3385 static int
3386 flow_null_translate(struct rte_eth_dev *dev __rte_unused,
3387                     struct mlx5_flow *dev_flow __rte_unused,
3388                     const struct rte_flow_attr *attr __rte_unused,
3389                     const struct rte_flow_item items[] __rte_unused,
3390                     const struct rte_flow_action actions[] __rte_unused,
3391                     struct rte_flow_error *error)
3392 {
3393         return rte_flow_error_set(error, ENOTSUP,
3394                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
3395 }
3396
3397 static int
3398 flow_null_apply(struct rte_eth_dev *dev __rte_unused,
3399                 struct rte_flow *flow __rte_unused,
3400                 struct rte_flow_error *error)
3401 {
3402         return rte_flow_error_set(error, ENOTSUP,
3403                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
3404 }
3405
3406 static void
3407 flow_null_remove(struct rte_eth_dev *dev __rte_unused,
3408                  struct rte_flow *flow __rte_unused)
3409 {
3410 }
3411
3412 static void
3413 flow_null_destroy(struct rte_eth_dev *dev __rte_unused,
3414                   struct rte_flow *flow __rte_unused)
3415 {
3416 }
3417
3418 static int
3419 flow_null_query(struct rte_eth_dev *dev __rte_unused,
3420                 struct rte_flow *flow __rte_unused,
3421                 const struct rte_flow_action *actions __rte_unused,
3422                 void *data __rte_unused,
3423                 struct rte_flow_error *error)
3424 {
3425         return rte_flow_error_set(error, ENOTSUP,
3426                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
3427 }
3428
3429 static int
3430 flow_null_sync_domain(struct rte_eth_dev *dev __rte_unused,
3431                       uint32_t domains __rte_unused,
3432                       uint32_t flags __rte_unused)
3433 {
3434         return 0;
3435 }
3436
3437 /* Void driver to protect from null pointer reference. */
3438 const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops = {
3439         .validate = flow_null_validate,
3440         .prepare = flow_null_prepare,
3441         .translate = flow_null_translate,
3442         .apply = flow_null_apply,
3443         .remove = flow_null_remove,
3444         .destroy = flow_null_destroy,
3445         .query = flow_null_query,
3446         .sync_domain = flow_null_sync_domain,
3447 };
3448
3449 /**
3450  * Select flow driver type according to flow attributes and device
3451  * configuration.
3452  *
3453  * @param[in] dev
3454  *   Pointer to the dev structure.
3455  * @param[in] attr
3456  *   Pointer to the flow attributes.
3457  *
3458  * @return
3459  *   flow driver type, MLX5_FLOW_TYPE_MAX otherwise.
3460  */
3461 static enum mlx5_flow_drv_type
3462 flow_get_drv_type(struct rte_eth_dev *dev, const struct rte_flow_attr *attr)
3463 {
3464         struct mlx5_priv *priv = dev->data->dev_private;
3465         /* The OS can determine first a specific flow type (DV, VERBS) */
3466         enum mlx5_flow_drv_type type = mlx5_flow_os_get_type();
3467
3468         if (type != MLX5_FLOW_TYPE_MAX)
3469                 return type;
3470         /*
3471          * Currently when dv_flow_en == 2, only HW steering engine is
3472          * supported. New engines can also be chosen here if ready.
3473          */
3474         if (priv->sh->config.dv_flow_en == 2)
3475                 return MLX5_FLOW_TYPE_HW;
3476         /* If no OS specific type - continue with DV/VERBS selection */
3477         if (attr->transfer && priv->sh->config.dv_esw_en)
3478                 type = MLX5_FLOW_TYPE_DV;
3479         if (!attr->transfer)
3480                 type = priv->sh->config.dv_flow_en ? MLX5_FLOW_TYPE_DV :
3481                                                      MLX5_FLOW_TYPE_VERBS;
3482         return type;
3483 }
3484
3485 #define flow_get_drv_ops(type) flow_drv_ops[type]
3486
3487 /**
3488  * Flow driver validation API. This abstracts calling driver specific functions.
3489  * The type of flow driver is determined according to flow attributes.
3490  *
3491  * @param[in] dev
3492  *   Pointer to the dev structure.
3493  * @param[in] attr
3494  *   Pointer to the flow attributes.
3495  * @param[in] items
3496  *   Pointer to the list of items.
3497  * @param[in] actions
3498  *   Pointer to the list of actions.
3499  * @param[in] external
3500  *   This flow rule is created by request external to PMD.
3501  * @param[in] hairpin
3502  *   Number of hairpin TX actions, 0 means classic flow.
3503  * @param[out] error
3504  *   Pointer to the error structure.
3505  *
3506  * @return
3507  *   0 on success, a negative errno value otherwise and rte_errno is set.
3508  */
3509 static inline int
3510 flow_drv_validate(struct rte_eth_dev *dev,
3511                   const struct rte_flow_attr *attr,
3512                   const struct rte_flow_item items[],
3513                   const struct rte_flow_action actions[],
3514                   bool external, int hairpin, struct rte_flow_error *error)
3515 {
3516         const struct mlx5_flow_driver_ops *fops;
3517         enum mlx5_flow_drv_type type = flow_get_drv_type(dev, attr);
3518
3519         fops = flow_get_drv_ops(type);
3520         return fops->validate(dev, attr, items, actions, external,
3521                               hairpin, error);
3522 }
3523
3524 /**
3525  * Flow driver preparation API. This abstracts calling driver specific
3526  * functions. Parent flow (rte_flow) should have driver type (drv_type). It
3527  * calculates the size of memory required for device flow, allocates the memory,
3528  * initializes the device flow and returns the pointer.
3529  *
3530  * @note
3531  *   This function initializes device flow structure such as dv or verbs in
3532  *   struct mlx5_flow. However, it is caller's responsibility to initialize the
3533  *   rest. For example, adding returning device flow to flow->dev_flow list and
3534  *   setting backward reference to the flow should be done out of this function.
3535  *   layers field is not filled either.
3536  *
3537  * @param[in] dev
3538  *   Pointer to the dev structure.
3539  * @param[in] attr
3540  *   Pointer to the flow attributes.
3541  * @param[in] items
3542  *   Pointer to the list of items.
3543  * @param[in] actions
3544  *   Pointer to the list of actions.
3545  * @param[in] flow_idx
3546  *   This memory pool index to the flow.
3547  * @param[out] error
3548  *   Pointer to the error structure.
3549  *
3550  * @return
3551  *   Pointer to device flow on success, otherwise NULL and rte_errno is set.
3552  */
3553 static inline struct mlx5_flow *
3554 flow_drv_prepare(struct rte_eth_dev *dev,
3555                  const struct rte_flow *flow,
3556                  const struct rte_flow_attr *attr,
3557                  const struct rte_flow_item items[],
3558                  const struct rte_flow_action actions[],
3559                  uint32_t flow_idx,
3560                  struct rte_flow_error *error)
3561 {
3562         const struct mlx5_flow_driver_ops *fops;
3563         enum mlx5_flow_drv_type type = flow->drv_type;
3564         struct mlx5_flow *mlx5_flow = NULL;
3565
3566         MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
3567         fops = flow_get_drv_ops(type);
3568         mlx5_flow = fops->prepare(dev, attr, items, actions, error);
3569         if (mlx5_flow)
3570                 mlx5_flow->flow_idx = flow_idx;
3571         return mlx5_flow;
3572 }
3573
3574 /**
3575  * Flow driver translation API. This abstracts calling driver specific
3576  * functions. Parent flow (rte_flow) should have driver type (drv_type). It
3577  * translates a generic flow into a driver flow. flow_drv_prepare() must
3578  * precede.
3579  *
3580  * @note
3581  *   dev_flow->layers could be filled as a result of parsing during translation
3582  *   if needed by flow_drv_apply(). dev_flow->flow->actions can also be filled
3583  *   if necessary. As a flow can have multiple dev_flows by RSS flow expansion,
3584  *   flow->actions could be overwritten even though all the expanded dev_flows
3585  *   have the same actions.
3586  *
3587  * @param[in] dev
3588  *   Pointer to the rte dev structure.
3589  * @param[in, out] dev_flow
3590  *   Pointer to the mlx5 flow.
3591  * @param[in] attr
3592  *   Pointer to the flow attributes.
3593  * @param[in] items
3594  *   Pointer to the list of items.
3595  * @param[in] actions
3596  *   Pointer to the list of actions.
3597  * @param[out] error
3598  *   Pointer to the error structure.
3599  *
3600  * @return
3601  *   0 on success, a negative errno value otherwise and rte_errno is set.
3602  */
3603 static inline int
3604 flow_drv_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
3605                    const struct rte_flow_attr *attr,
3606                    const struct rte_flow_item items[],
3607                    const struct rte_flow_action actions[],
3608                    struct rte_flow_error *error)
3609 {
3610         const struct mlx5_flow_driver_ops *fops;
3611         enum mlx5_flow_drv_type type = dev_flow->flow->drv_type;
3612
3613         MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
3614         fops = flow_get_drv_ops(type);
3615         return fops->translate(dev, dev_flow, attr, items, actions, error);
3616 }
3617
3618 /**
3619  * Flow driver apply API. This abstracts calling driver specific functions.
3620  * Parent flow (rte_flow) should have driver type (drv_type). It applies
3621  * translated driver flows on to device. flow_drv_translate() must precede.
3622  *
3623  * @param[in] dev
3624  *   Pointer to Ethernet device structure.
3625  * @param[in, out] flow
3626  *   Pointer to flow structure.
3627  * @param[out] error
3628  *   Pointer to error structure.
3629  *
3630  * @return
3631  *   0 on success, a negative errno value otherwise and rte_errno is set.
3632  */
3633 static inline int
3634 flow_drv_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
3635                struct rte_flow_error *error)
3636 {
3637         const struct mlx5_flow_driver_ops *fops;
3638         enum mlx5_flow_drv_type type = flow->drv_type;
3639
3640         MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
3641         fops = flow_get_drv_ops(type);
3642         return fops->apply(dev, flow, error);
3643 }
3644
3645 /**
3646  * Flow driver destroy API. This abstracts calling driver specific functions.
3647  * Parent flow (rte_flow) should have driver type (drv_type). It removes a flow
3648  * on device and releases resources of the flow.
3649  *
3650  * @param[in] dev
3651  *   Pointer to Ethernet device.
3652  * @param[in, out] flow
3653  *   Pointer to flow structure.
3654  */
3655 static inline void
3656 flow_drv_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
3657 {
3658         const struct mlx5_flow_driver_ops *fops;
3659         enum mlx5_flow_drv_type type = flow->drv_type;
3660
3661         MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
3662         fops = flow_get_drv_ops(type);
3663         fops->destroy(dev, flow);
3664 }
3665
3666 /**
3667  * Flow driver find RSS policy tbl API. This abstracts calling driver
3668  * specific functions. Parent flow (rte_flow) should have driver
3669  * type (drv_type). It will find the RSS policy table that has the rss_desc.
3670  *
3671  * @param[in] dev
3672  *   Pointer to Ethernet device.
3673  * @param[in, out] flow
3674  *   Pointer to flow structure.
3675  * @param[in] policy
3676  *   Pointer to meter policy table.
3677  * @param[in] rss_desc
3678  *   Pointer to rss_desc
3679  */
3680 static struct mlx5_flow_meter_sub_policy *
3681 flow_drv_meter_sub_policy_rss_prepare(struct rte_eth_dev *dev,
3682                 struct rte_flow *flow,
3683                 struct mlx5_flow_meter_policy *policy,
3684                 struct mlx5_flow_rss_desc *rss_desc[MLX5_MTR_RTE_COLORS])
3685 {
3686         const struct mlx5_flow_driver_ops *fops;
3687         enum mlx5_flow_drv_type type = flow->drv_type;
3688
3689         MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
3690         fops = flow_get_drv_ops(type);
3691         return fops->meter_sub_policy_rss_prepare(dev, policy, rss_desc);
3692 }
3693
3694 /**
3695  * Flow driver color tag rule API. This abstracts calling driver
3696  * specific functions. Parent flow (rte_flow) should have driver
3697  * type (drv_type). It will create the color tag rules in hierarchy meter.
3698  *
3699  * @param[in] dev
3700  *   Pointer to Ethernet device.
3701  * @param[in, out] flow
3702  *   Pointer to flow structure.
3703  * @param[in] fm
3704  *   Pointer to flow meter structure.
3705  * @param[in] src_port
3706  *   The src port this extra rule should use.
3707  * @param[in] item
3708  *   The src port id match item.
3709  * @param[out] error
3710  *   Pointer to error structure.
3711  */
3712 static int
3713 flow_drv_mtr_hierarchy_rule_create(struct rte_eth_dev *dev,
3714                 struct rte_flow *flow,
3715                 struct mlx5_flow_meter_info *fm,
3716                 int32_t src_port,
3717                 const struct rte_flow_item *item,
3718                 struct rte_flow_error *error)
3719 {
3720         const struct mlx5_flow_driver_ops *fops;
3721         enum mlx5_flow_drv_type type = flow->drv_type;
3722
3723         MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
3724         fops = flow_get_drv_ops(type);
3725         return fops->meter_hierarchy_rule_create(dev, fm,
3726                                                 src_port, item, error);
3727 }
3728
3729 /**
3730  * Get RSS action from the action list.
3731  *
3732  * @param[in] dev
3733  *   Pointer to Ethernet device.
3734  * @param[in] actions
3735  *   Pointer to the list of actions.
3736  * @param[in] flow
3737  *   Parent flow structure pointer.
3738  *
3739  * @return
3740  *   Pointer to the RSS action if exist, else return NULL.
3741  */
3742 static const struct rte_flow_action_rss*
3743 flow_get_rss_action(struct rte_eth_dev *dev,
3744                     const struct rte_flow_action actions[])
3745 {
3746         struct mlx5_priv *priv = dev->data->dev_private;
3747         const struct rte_flow_action_rss *rss = NULL;
3748         struct mlx5_meter_policy_action_container *acg;
3749         struct mlx5_meter_policy_action_container *acy;
3750
3751         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
3752                 switch (actions->type) {
3753                 case RTE_FLOW_ACTION_TYPE_RSS:
3754                         rss = actions->conf;
3755                         break;
3756                 case RTE_FLOW_ACTION_TYPE_SAMPLE:
3757                 {
3758                         const struct rte_flow_action_sample *sample =
3759                                                                 actions->conf;
3760                         const struct rte_flow_action *act = sample->actions;
3761                         for (; act->type != RTE_FLOW_ACTION_TYPE_END; act++)
3762                                 if (act->type == RTE_FLOW_ACTION_TYPE_RSS)
3763                                         rss = act->conf;
3764                         break;
3765                 }
3766                 case RTE_FLOW_ACTION_TYPE_METER:
3767                 {
3768                         uint32_t mtr_idx;
3769                         struct mlx5_flow_meter_info *fm;
3770                         struct mlx5_flow_meter_policy *policy;
3771                         const struct rte_flow_action_meter *mtr = actions->conf;
3772
3773                         fm = mlx5_flow_meter_find(priv, mtr->mtr_id, &mtr_idx);
3774                         if (fm && !fm->def_policy) {
3775                                 policy = mlx5_flow_meter_policy_find(dev,
3776                                                 fm->policy_id, NULL);
3777                                 MLX5_ASSERT(policy);
3778                                 if (policy->is_hierarchy) {
3779                                         policy =
3780                                 mlx5_flow_meter_hierarchy_get_final_policy(dev,
3781                                                                         policy);
3782                                         if (!policy)
3783                                                 return NULL;
3784                                 }
3785                                 if (policy->is_rss) {
3786                                         acg =
3787                                         &policy->act_cnt[RTE_COLOR_GREEN];
3788                                         acy =
3789                                         &policy->act_cnt[RTE_COLOR_YELLOW];
3790                                         if (acg->fate_action ==
3791                                             MLX5_FLOW_FATE_SHARED_RSS)
3792                                                 rss = acg->rss->conf;
3793                                         else if (acy->fate_action ==
3794                                                  MLX5_FLOW_FATE_SHARED_RSS)
3795                                                 rss = acy->rss->conf;
3796                                 }
3797                         }
3798                         break;
3799                 }
3800                 default:
3801                         break;
3802                 }
3803         }
3804         return rss;
3805 }
3806
3807 /**
3808  * Get ASO age action by index.
3809  *
3810  * @param[in] dev
3811  *   Pointer to the Ethernet device structure.
3812  * @param[in] age_idx
3813  *   Index to the ASO age action.
3814  *
3815  * @return
3816  *   The specified ASO age action.
3817  */
3818 struct mlx5_aso_age_action*
3819 flow_aso_age_get_by_idx(struct rte_eth_dev *dev, uint32_t age_idx)
3820 {
3821         uint16_t pool_idx = age_idx & UINT16_MAX;
3822         uint16_t offset = (age_idx >> 16) & UINT16_MAX;
3823         struct mlx5_priv *priv = dev->data->dev_private;
3824         struct mlx5_aso_age_mng *mng = priv->sh->aso_age_mng;
3825         struct mlx5_aso_age_pool *pool;
3826
3827         rte_rwlock_read_lock(&mng->resize_rwl);
3828         pool = mng->pools[pool_idx];
3829         rte_rwlock_read_unlock(&mng->resize_rwl);
3830         return &pool->actions[offset - 1];
3831 }
3832
3833 /* maps indirect action to translated direct in some actions array */
3834 struct mlx5_translated_action_handle {
3835         struct rte_flow_action_handle *action; /**< Indirect action handle. */
3836         int index; /**< Index in related array of rte_flow_action. */
3837 };
3838
3839 /**
3840  * Translates actions of type RTE_FLOW_ACTION_TYPE_INDIRECT to related
3841  * direct action if translation possible.
3842  * This functionality used to run same execution path for both direct and
3843  * indirect actions on flow create. All necessary preparations for indirect
3844  * action handling should be performed on *handle* actions list returned
3845  * from this call.
3846  *
3847  * @param[in] dev
3848  *   Pointer to Ethernet device.
3849  * @param[in] actions
3850  *   List of actions to translate.
3851  * @param[out] handle
3852  *   List to store translated indirect action object handles.
3853  * @param[in, out] indir_n
3854  *   Size of *handle* array. On return should be updated with number of
3855  *   indirect actions retrieved from the *actions* list.
3856  * @param[out] translated_actions
3857  *   List of actions where all indirect actions were translated to direct
3858  *   if possible. NULL if no translation took place.
3859  * @param[out] error
3860  *   Pointer to the error structure.
3861  *
3862  * @return
3863  *   0 on success, a negative errno value otherwise and rte_errno is set.
3864  */
3865 static int
3866 flow_action_handles_translate(struct rte_eth_dev *dev,
3867                               const struct rte_flow_action actions[],
3868                               struct mlx5_translated_action_handle *handle,
3869                               int *indir_n,
3870                               struct rte_flow_action **translated_actions,
3871                               struct rte_flow_error *error)
3872 {
3873         struct mlx5_priv *priv = dev->data->dev_private;
3874         struct rte_flow_action *translated = NULL;
3875         size_t actions_size;
3876         int n;
3877         int copied_n = 0;
3878         struct mlx5_translated_action_handle *handle_end = NULL;
3879
3880         for (n = 0; actions[n].type != RTE_FLOW_ACTION_TYPE_END; n++) {
3881                 if (actions[n].type != RTE_FLOW_ACTION_TYPE_INDIRECT)
3882                         continue;
3883                 if (copied_n == *indir_n) {
3884                         return rte_flow_error_set
3885                                 (error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION_NUM,
3886                                  NULL, "too many shared actions");
3887                 }
3888                 rte_memcpy(&handle[copied_n].action, &actions[n].conf,
3889                            sizeof(actions[n].conf));
3890                 handle[copied_n].index = n;
3891                 copied_n++;
3892         }
3893         n++;
3894         *indir_n = copied_n;
3895         if (!copied_n)
3896                 return 0;
3897         actions_size = sizeof(struct rte_flow_action) * n;
3898         translated = mlx5_malloc(MLX5_MEM_ZERO, actions_size, 0, SOCKET_ID_ANY);
3899         if (!translated) {
3900                 rte_errno = ENOMEM;
3901                 return -ENOMEM;
3902         }
3903         memcpy(translated, actions, actions_size);
3904         for (handle_end = handle + copied_n; handle < handle_end; handle++) {
3905                 struct mlx5_shared_action_rss *shared_rss;
3906                 uint32_t act_idx = (uint32_t)(uintptr_t)handle->action;
3907                 uint32_t type = act_idx >> MLX5_INDIRECT_ACTION_TYPE_OFFSET;
3908                 uint32_t idx = act_idx &
3909                                ((1u << MLX5_INDIRECT_ACTION_TYPE_OFFSET) - 1);
3910
3911                 switch (type) {
3912                 case MLX5_INDIRECT_ACTION_TYPE_RSS:
3913                         shared_rss = mlx5_ipool_get
3914                           (priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS], idx);
3915                         translated[handle->index].type =
3916                                 RTE_FLOW_ACTION_TYPE_RSS;
3917                         translated[handle->index].conf =
3918                                 &shared_rss->origin;
3919                         break;
3920                 case MLX5_INDIRECT_ACTION_TYPE_COUNT:
3921                         translated[handle->index].type =
3922                                                 (enum rte_flow_action_type)
3923                                                 MLX5_RTE_FLOW_ACTION_TYPE_COUNT;
3924                         translated[handle->index].conf = (void *)(uintptr_t)idx;
3925                         break;
3926                 case MLX5_INDIRECT_ACTION_TYPE_AGE:
3927                         if (priv->sh->flow_hit_aso_en) {
3928                                 translated[handle->index].type =
3929                                         (enum rte_flow_action_type)
3930                                         MLX5_RTE_FLOW_ACTION_TYPE_AGE;
3931                                 translated[handle->index].conf =
3932                                                          (void *)(uintptr_t)idx;
3933                                 break;
3934                         }
3935                         /* Fall-through */
3936                 case MLX5_INDIRECT_ACTION_TYPE_CT:
3937                         if (priv->sh->ct_aso_en) {
3938                                 translated[handle->index].type =
3939                                         RTE_FLOW_ACTION_TYPE_CONNTRACK;
3940                                 translated[handle->index].conf =
3941                                                          (void *)(uintptr_t)idx;
3942                                 break;
3943                         }
3944                         /* Fall-through */
3945                 default:
3946                         mlx5_free(translated);
3947                         return rte_flow_error_set
3948                                 (error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION,
3949                                  NULL, "invalid indirect action type");
3950                 }
3951         }
3952         *translated_actions = translated;
3953         return 0;
3954 }
3955
3956 /**
3957  * Get Shared RSS action from the action list.
3958  *
3959  * @param[in] dev
3960  *   Pointer to Ethernet device.
3961  * @param[in] shared
3962  *   Pointer to the list of actions.
3963  * @param[in] shared_n
3964  *   Actions list length.
3965  *
3966  * @return
3967  *   The MLX5 RSS action ID if exists, otherwise return 0.
3968  */
3969 static uint32_t
3970 flow_get_shared_rss_action(struct rte_eth_dev *dev,
3971                            struct mlx5_translated_action_handle *handle,
3972                            int shared_n)
3973 {
3974         struct mlx5_translated_action_handle *handle_end;
3975         struct mlx5_priv *priv = dev->data->dev_private;
3976         struct mlx5_shared_action_rss *shared_rss;
3977
3978
3979         for (handle_end = handle + shared_n; handle < handle_end; handle++) {
3980                 uint32_t act_idx = (uint32_t)(uintptr_t)handle->action;
3981                 uint32_t type = act_idx >> MLX5_INDIRECT_ACTION_TYPE_OFFSET;
3982                 uint32_t idx = act_idx &
3983                                ((1u << MLX5_INDIRECT_ACTION_TYPE_OFFSET) - 1);
3984                 switch (type) {
3985                 case MLX5_INDIRECT_ACTION_TYPE_RSS:
3986                         shared_rss = mlx5_ipool_get
3987                                 (priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS],
3988                                                                            idx);
3989                         __atomic_add_fetch(&shared_rss->refcnt, 1,
3990                                            __ATOMIC_RELAXED);
3991                         return idx;
3992                 default:
3993                         break;
3994                 }
3995         }
3996         return 0;
3997 }
3998
3999 static unsigned int
4000 find_graph_root(uint32_t rss_level)
4001 {
4002         return rss_level < 2 ? MLX5_EXPANSION_ROOT :
4003                                MLX5_EXPANSION_ROOT_OUTER;
4004 }
4005
4006 /**
4007  *  Get layer flags from the prefix flow.
4008  *
4009  *  Some flows may be split to several subflows, the prefix subflow gets the
4010  *  match items and the suffix sub flow gets the actions.
4011  *  Some actions need the user defined match item flags to get the detail for
4012  *  the action.
4013  *  This function helps the suffix flow to get the item layer flags from prefix
4014  *  subflow.
4015  *
4016  * @param[in] dev_flow
4017  *   Pointer the created prefix subflow.
4018  *
4019  * @return
4020  *   The layers get from prefix subflow.
4021  */
4022 static inline uint64_t
4023 flow_get_prefix_layer_flags(struct mlx5_flow *dev_flow)
4024 {
4025         uint64_t layers = 0;
4026
4027         /*
4028          * Layers bits could be localization, but usually the compiler will
4029          * help to do the optimization work for source code.
4030          * If no decap actions, use the layers directly.
4031          */
4032         if (!(dev_flow->act_flags & MLX5_FLOW_ACTION_DECAP))
4033                 return dev_flow->handle->layers;
4034         /* Convert L3 layers with decap action. */
4035         if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L3_IPV4)
4036                 layers |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
4037         else if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L3_IPV6)
4038                 layers |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
4039         /* Convert L4 layers with decap action.  */
4040         if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L4_TCP)
4041                 layers |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
4042         else if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L4_UDP)
4043                 layers |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
4044         return layers;
4045 }
4046
4047 /**
4048  * Get metadata split action information.
4049  *
4050  * @param[in] actions
4051  *   Pointer to the list of actions.
4052  * @param[out] qrss
4053  *   Pointer to the return pointer.
4054  * @param[out] qrss_type
4055  *   Pointer to the action type to return. RTE_FLOW_ACTION_TYPE_END is returned
4056  *   if no QUEUE/RSS is found.
4057  * @param[out] encap_idx
4058  *   Pointer to the index of the encap action if exists, otherwise the last
4059  *   action index.
4060  *
4061  * @return
4062  *   Total number of actions.
4063  */
4064 static int
4065 flow_parse_metadata_split_actions_info(const struct rte_flow_action actions[],
4066                                        const struct rte_flow_action **qrss,
4067                                        int *encap_idx)
4068 {
4069         const struct rte_flow_action_raw_encap *raw_encap;
4070         int actions_n = 0;
4071         int raw_decap_idx = -1;
4072
4073         *encap_idx = -1;
4074         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
4075                 switch (actions->type) {
4076                 case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
4077                 case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
4078                         *encap_idx = actions_n;
4079                         break;
4080                 case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
4081                         raw_decap_idx = actions_n;
4082                         break;
4083                 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
4084                         raw_encap = actions->conf;
4085                         if (raw_encap->size > MLX5_ENCAPSULATION_DECISION_SIZE)
4086                                 *encap_idx = raw_decap_idx != -1 ?
4087                                                       raw_decap_idx : actions_n;
4088                         break;
4089                 case RTE_FLOW_ACTION_TYPE_QUEUE:
4090                 case RTE_FLOW_ACTION_TYPE_RSS:
4091                         *qrss = actions;
4092                         break;
4093                 default:
4094                         break;
4095                 }
4096                 actions_n++;
4097         }
4098         if (*encap_idx == -1)
4099                 *encap_idx = actions_n;
4100         /* Count RTE_FLOW_ACTION_TYPE_END. */
4101         return actions_n + 1;
4102 }
4103
4104 /**
4105  * Check if the action will change packet.
4106  *
4107  * @param dev
4108  *   Pointer to Ethernet device.
4109  * @param[in] type
4110  *   action type.
4111  *
4112  * @return
4113  *   true if action will change packet, false otherwise.
4114  */
4115 static bool flow_check_modify_action_type(struct rte_eth_dev *dev,
4116                                           enum rte_flow_action_type type)
4117 {
4118         struct mlx5_priv *priv = dev->data->dev_private;
4119
4120         switch (type) {
4121         case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
4122         case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
4123         case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
4124         case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
4125         case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
4126         case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
4127         case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
4128         case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
4129         case RTE_FLOW_ACTION_TYPE_DEC_TTL:
4130         case RTE_FLOW_ACTION_TYPE_SET_TTL:
4131         case RTE_FLOW_ACTION_TYPE_INC_TCP_SEQ:
4132         case RTE_FLOW_ACTION_TYPE_DEC_TCP_SEQ:
4133         case RTE_FLOW_ACTION_TYPE_INC_TCP_ACK:
4134         case RTE_FLOW_ACTION_TYPE_DEC_TCP_ACK:
4135         case RTE_FLOW_ACTION_TYPE_SET_IPV4_DSCP:
4136         case RTE_FLOW_ACTION_TYPE_SET_IPV6_DSCP:
4137         case RTE_FLOW_ACTION_TYPE_SET_META:
4138         case RTE_FLOW_ACTION_TYPE_SET_TAG:
4139         case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
4140         case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
4141         case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
4142         case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
4143         case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
4144         case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP:
4145         case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
4146         case RTE_FLOW_ACTION_TYPE_NVGRE_DECAP:
4147         case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
4148         case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
4149         case RTE_FLOW_ACTION_TYPE_MODIFY_FIELD:
4150                 return true;
4151         case RTE_FLOW_ACTION_TYPE_FLAG:
4152         case RTE_FLOW_ACTION_TYPE_MARK:
4153                 if (priv->sh->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY)
4154                         return true;
4155                 else
4156                         return false;
4157         default:
4158                 return false;
4159         }
4160 }
4161
4162 /**
4163  * Check meter action from the action list.
4164  *
4165  * @param dev
4166  *   Pointer to Ethernet device.
4167  * @param[in] actions
4168  *   Pointer to the list of actions.
4169  * @param[out] has_mtr
4170  *   Pointer to the meter exist flag.
4171  * @param[out] has_modify
4172  *   Pointer to the flag showing there's packet change action.
4173  * @param[out] meter_id
4174  *   Pointer to the meter id.
4175  *
4176  * @return
4177  *   Total number of actions.
4178  */
4179 static int
4180 flow_check_meter_action(struct rte_eth_dev *dev,
4181                         const struct rte_flow_action actions[],
4182                         bool *has_mtr, bool *has_modify, uint32_t *meter_id)
4183 {
4184         const struct rte_flow_action_meter *mtr = NULL;
4185         int actions_n = 0;
4186
4187         MLX5_ASSERT(has_mtr);
4188         *has_mtr = false;
4189         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
4190                 switch (actions->type) {
4191                 case RTE_FLOW_ACTION_TYPE_METER:
4192                         mtr = actions->conf;
4193                         *meter_id = mtr->mtr_id;
4194                         *has_mtr = true;
4195                         break;
4196                 default:
4197                         break;
4198                 }
4199                 if (!*has_mtr)
4200                         *has_modify |= flow_check_modify_action_type(dev,
4201                                                                 actions->type);
4202                 actions_n++;
4203         }
4204         /* Count RTE_FLOW_ACTION_TYPE_END. */
4205         return actions_n + 1;
4206 }
4207
4208 /**
4209  * Check if the flow should be split due to hairpin.
4210  * The reason for the split is that in current HW we can't
4211  * support encap and push-vlan on Rx, so if a flow contains
4212  * these actions we move it to Tx.
4213  *
4214  * @param dev
4215  *   Pointer to Ethernet device.
4216  * @param[in] attr
4217  *   Flow rule attributes.
4218  * @param[in] actions
4219  *   Associated actions (list terminated by the END action).
4220  *
4221  * @return
4222  *   > 0 the number of actions and the flow should be split,
4223  *   0 when no split required.
4224  */
4225 static int
4226 flow_check_hairpin_split(struct rte_eth_dev *dev,
4227                          const struct rte_flow_attr *attr,
4228                          const struct rte_flow_action actions[])
4229 {
4230         int queue_action = 0;
4231         int action_n = 0;
4232         int split = 0;
4233         const struct rte_flow_action_queue *queue;
4234         const struct rte_flow_action_rss *rss;
4235         const struct rte_flow_action_raw_encap *raw_encap;
4236         const struct rte_eth_hairpin_conf *conf;
4237
4238         if (!attr->ingress)
4239                 return 0;
4240         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
4241                 switch (actions->type) {
4242                 case RTE_FLOW_ACTION_TYPE_QUEUE:
4243                         queue = actions->conf;
4244                         if (queue == NULL)
4245                                 return 0;
4246                         conf = mlx5_rxq_get_hairpin_conf(dev, queue->index);
4247                         if (conf == NULL || conf->tx_explicit != 0)
4248                                 return 0;
4249                         queue_action = 1;
4250                         action_n++;
4251                         break;
4252                 case RTE_FLOW_ACTION_TYPE_RSS:
4253                         rss = actions->conf;
4254                         if (rss == NULL || rss->queue_num == 0)
4255                                 return 0;
4256                         conf = mlx5_rxq_get_hairpin_conf(dev, rss->queue[0]);
4257                         if (conf == NULL || conf->tx_explicit != 0)
4258                                 return 0;
4259                         queue_action = 1;
4260                         action_n++;
4261                         break;
4262                 case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
4263                 case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
4264                 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
4265                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
4266                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
4267                         split++;
4268                         action_n++;
4269                         break;
4270                 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
4271                         raw_encap = actions->conf;
4272                         if (raw_encap->size > MLX5_ENCAPSULATION_DECISION_SIZE)
4273                                 split++;
4274                         action_n++;
4275                         break;
4276                 default:
4277                         action_n++;
4278                         break;
4279                 }
4280         }
4281         if (split && queue_action)
4282                 return action_n;
4283         return 0;
4284 }
4285
4286 /* Declare flow create/destroy prototype in advance. */
4287 static uint32_t
4288 flow_list_create(struct rte_eth_dev *dev, enum mlx5_flow_type type,
4289                  const struct rte_flow_attr *attr,
4290                  const struct rte_flow_item items[],
4291                  const struct rte_flow_action actions[],
4292                  bool external, struct rte_flow_error *error);
4293
4294 static void
4295 flow_list_destroy(struct rte_eth_dev *dev, enum mlx5_flow_type type,
4296                   uint32_t flow_idx);
4297
4298 int
4299 flow_dv_mreg_match_cb(void *tool_ctx __rte_unused,
4300                       struct mlx5_list_entry *entry, void *cb_ctx)
4301 {
4302         struct mlx5_flow_cb_ctx *ctx = cb_ctx;
4303         struct mlx5_flow_mreg_copy_resource *mcp_res =
4304                                container_of(entry, typeof(*mcp_res), hlist_ent);
4305
4306         return mcp_res->mark_id != *(uint32_t *)(ctx->data);
4307 }
4308
4309 struct mlx5_list_entry *
4310 flow_dv_mreg_create_cb(void *tool_ctx, void *cb_ctx)
4311 {
4312         struct rte_eth_dev *dev = tool_ctx;
4313         struct mlx5_priv *priv = dev->data->dev_private;
4314         struct mlx5_flow_cb_ctx *ctx = cb_ctx;
4315         struct mlx5_flow_mreg_copy_resource *mcp_res;
4316         struct rte_flow_error *error = ctx->error;
4317         uint32_t idx = 0;
4318         int ret;
4319         uint32_t mark_id = *(uint32_t *)(ctx->data);
4320         struct rte_flow_attr attr = {
4321                 .group = MLX5_FLOW_MREG_CP_TABLE_GROUP,
4322                 .ingress = 1,
4323         };
4324         struct mlx5_rte_flow_item_tag tag_spec = {
4325                 .data = mark_id,
4326         };
4327         struct rte_flow_item items[] = {
4328                 [1] = { .type = RTE_FLOW_ITEM_TYPE_END, },
4329         };
4330         struct rte_flow_action_mark ftag = {
4331                 .id = mark_id,
4332         };
4333         struct mlx5_flow_action_copy_mreg cp_mreg = {
4334                 .dst = REG_B,
4335                 .src = REG_NON,
4336         };
4337         struct rte_flow_action_jump jump = {
4338                 .group = MLX5_FLOW_MREG_ACT_TABLE_GROUP,
4339         };
4340         struct rte_flow_action actions[] = {
4341                 [3] = { .type = RTE_FLOW_ACTION_TYPE_END, },
4342         };
4343
4344         /* Fill the register fields in the flow. */
4345         ret = mlx5_flow_get_reg_id(dev, MLX5_FLOW_MARK, 0, error);
4346         if (ret < 0)
4347                 return NULL;
4348         tag_spec.id = ret;
4349         ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_RX, 0, error);
4350         if (ret < 0)
4351                 return NULL;
4352         cp_mreg.src = ret;
4353         /* Provide the full width of FLAG specific value. */
4354         if (mark_id == (priv->sh->dv_regc0_mask & MLX5_FLOW_MARK_DEFAULT))
4355                 tag_spec.data = MLX5_FLOW_MARK_DEFAULT;
4356         /* Build a new flow. */
4357         if (mark_id != MLX5_DEFAULT_COPY_ID) {
4358                 items[0] = (struct rte_flow_item){
4359                         .type = (enum rte_flow_item_type)
4360                                 MLX5_RTE_FLOW_ITEM_TYPE_TAG,
4361                         .spec = &tag_spec,
4362                 };
4363                 items[1] = (struct rte_flow_item){
4364                         .type = RTE_FLOW_ITEM_TYPE_END,
4365                 };
4366                 actions[0] = (struct rte_flow_action){
4367                         .type = (enum rte_flow_action_type)
4368                                 MLX5_RTE_FLOW_ACTION_TYPE_MARK,
4369                         .conf = &ftag,
4370                 };
4371                 actions[1] = (struct rte_flow_action){
4372                         .type = (enum rte_flow_action_type)
4373                                 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
4374                         .conf = &cp_mreg,
4375                 };
4376                 actions[2] = (struct rte_flow_action){
4377                         .type = RTE_FLOW_ACTION_TYPE_JUMP,
4378                         .conf = &jump,
4379                 };
4380                 actions[3] = (struct rte_flow_action){
4381                         .type = RTE_FLOW_ACTION_TYPE_END,
4382                 };
4383         } else {
4384                 /* Default rule, wildcard match. */
4385                 attr.priority = MLX5_FLOW_LOWEST_PRIO_INDICATOR;
4386                 items[0] = (struct rte_flow_item){
4387                         .type = RTE_FLOW_ITEM_TYPE_END,
4388                 };
4389                 actions[0] = (struct rte_flow_action){
4390                         .type = (enum rte_flow_action_type)
4391                                 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
4392                         .conf = &cp_mreg,
4393                 };
4394                 actions[1] = (struct rte_flow_action){
4395                         .type = RTE_FLOW_ACTION_TYPE_JUMP,
4396                         .conf = &jump,
4397                 };
4398                 actions[2] = (struct rte_flow_action){
4399                         .type = RTE_FLOW_ACTION_TYPE_END,
4400                 };
4401         }
4402         /* Build a new entry. */
4403         mcp_res = mlx5_ipool_zmalloc(priv->sh->ipool[MLX5_IPOOL_MCP], &idx);
4404         if (!mcp_res) {
4405                 rte_errno = ENOMEM;
4406                 return NULL;
4407         }
4408         mcp_res->idx = idx;
4409         mcp_res->mark_id = mark_id;
4410         /*
4411          * The copy Flows are not included in any list. There
4412          * ones are referenced from other Flows and can not
4413          * be applied, removed, deleted in arbitrary order
4414          * by list traversing.
4415          */
4416         mcp_res->rix_flow = flow_list_create(dev, MLX5_FLOW_TYPE_MCP,
4417                                         &attr, items, actions, false, error);
4418         if (!mcp_res->rix_flow) {
4419                 mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MCP], idx);
4420                 return NULL;
4421         }
4422         return &mcp_res->hlist_ent;
4423 }
4424
4425 struct mlx5_list_entry *
4426 flow_dv_mreg_clone_cb(void *tool_ctx, struct mlx5_list_entry *oentry,
4427                       void *cb_ctx __rte_unused)
4428 {
4429         struct rte_eth_dev *dev = tool_ctx;
4430         struct mlx5_priv *priv = dev->data->dev_private;
4431         struct mlx5_flow_mreg_copy_resource *mcp_res;
4432         uint32_t idx = 0;
4433
4434         mcp_res = mlx5_ipool_malloc(priv->sh->ipool[MLX5_IPOOL_MCP], &idx);
4435         if (!mcp_res) {
4436                 rte_errno = ENOMEM;
4437                 return NULL;
4438         }
4439         memcpy(mcp_res, oentry, sizeof(*mcp_res));
4440         mcp_res->idx = idx;
4441         return &mcp_res->hlist_ent;
4442 }
4443
4444 void
4445 flow_dv_mreg_clone_free_cb(void *tool_ctx, struct mlx5_list_entry *entry)
4446 {
4447         struct mlx5_flow_mreg_copy_resource *mcp_res =
4448                                container_of(entry, typeof(*mcp_res), hlist_ent);
4449         struct rte_eth_dev *dev = tool_ctx;
4450         struct mlx5_priv *priv = dev->data->dev_private;
4451
4452         mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MCP], mcp_res->idx);
4453 }
4454
4455 /**
4456  * Add a flow of copying flow metadata registers in RX_CP_TBL.
4457  *
4458  * As mark_id is unique, if there's already a registered flow for the mark_id,
4459  * return by increasing the reference counter of the resource. Otherwise, create
4460  * the resource (mcp_res) and flow.
4461  *
4462  * Flow looks like,
4463  *   - If ingress port is ANY and reg_c[1] is mark_id,
4464  *     flow_tag := mark_id, reg_b := reg_c[0] and jump to RX_ACT_TBL.
4465  *
4466  * For default flow (zero mark_id), flow is like,
4467  *   - If ingress port is ANY,
4468  *     reg_b := reg_c[0] and jump to RX_ACT_TBL.
4469  *
4470  * @param dev
4471  *   Pointer to Ethernet device.
4472  * @param mark_id
4473  *   ID of MARK action, zero means default flow for META.
4474  * @param[out] error
4475  *   Perform verbose error reporting if not NULL.
4476  *
4477  * @return
4478  *   Associated resource on success, NULL otherwise and rte_errno is set.
4479  */
4480 static struct mlx5_flow_mreg_copy_resource *
4481 flow_mreg_add_copy_action(struct rte_eth_dev *dev, uint32_t mark_id,
4482                           struct rte_flow_error *error)
4483 {
4484         struct mlx5_priv *priv = dev->data->dev_private;
4485         struct mlx5_list_entry *entry;
4486         struct mlx5_flow_cb_ctx ctx = {
4487                 .dev = dev,
4488                 .error = error,
4489                 .data = &mark_id,
4490         };
4491
4492         /* Check if already registered. */
4493         MLX5_ASSERT(priv->mreg_cp_tbl);
4494         entry = mlx5_hlist_register(priv->mreg_cp_tbl, mark_id, &ctx);
4495         if (!entry)
4496                 return NULL;
4497         return container_of(entry, struct mlx5_flow_mreg_copy_resource,
4498                             hlist_ent);
4499 }
4500
4501 void
4502 flow_dv_mreg_remove_cb(void *tool_ctx, struct mlx5_list_entry *entry)
4503 {
4504         struct mlx5_flow_mreg_copy_resource *mcp_res =
4505                                container_of(entry, typeof(*mcp_res), hlist_ent);
4506         struct rte_eth_dev *dev = tool_ctx;
4507         struct mlx5_priv *priv = dev->data->dev_private;
4508
4509         MLX5_ASSERT(mcp_res->rix_flow);
4510         flow_list_destroy(dev, MLX5_FLOW_TYPE_MCP, mcp_res->rix_flow);
4511         mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MCP], mcp_res->idx);
4512 }
4513
4514 /**
4515  * Release flow in RX_CP_TBL.
4516  *
4517  * @param dev
4518  *   Pointer to Ethernet device.
4519  * @flow
4520  *   Parent flow for wich copying is provided.
4521  */
4522 static void
4523 flow_mreg_del_copy_action(struct rte_eth_dev *dev,
4524                           struct rte_flow *flow)
4525 {
4526         struct mlx5_flow_mreg_copy_resource *mcp_res;
4527         struct mlx5_priv *priv = dev->data->dev_private;
4528
4529         if (!flow->rix_mreg_copy)
4530                 return;
4531         mcp_res = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_MCP],
4532                                  flow->rix_mreg_copy);
4533         if (!mcp_res || !priv->mreg_cp_tbl)
4534                 return;
4535         MLX5_ASSERT(mcp_res->rix_flow);
4536         mlx5_hlist_unregister(priv->mreg_cp_tbl, &mcp_res->hlist_ent);
4537         flow->rix_mreg_copy = 0;
4538 }
4539
4540 /**
4541  * Remove the default copy action from RX_CP_TBL.
4542  *
4543  * This functions is called in the mlx5_dev_start(). No thread safe
4544  * is guaranteed.
4545  *
4546  * @param dev
4547  *   Pointer to Ethernet device.
4548  */
4549 static void
4550 flow_mreg_del_default_copy_action(struct rte_eth_dev *dev)
4551 {
4552         struct mlx5_list_entry *entry;
4553         struct mlx5_priv *priv = dev->data->dev_private;
4554         struct mlx5_flow_cb_ctx ctx;
4555         uint32_t mark_id;
4556
4557         /* Check if default flow is registered. */
4558         if (!priv->mreg_cp_tbl)
4559                 return;
4560         mark_id = MLX5_DEFAULT_COPY_ID;
4561         ctx.data = &mark_id;
4562         entry = mlx5_hlist_lookup(priv->mreg_cp_tbl, mark_id, &ctx);
4563         if (!entry)
4564                 return;
4565         mlx5_hlist_unregister(priv->mreg_cp_tbl, entry);
4566 }
4567
4568 /**
4569  * Add the default copy action in in RX_CP_TBL.
4570  *
4571  * This functions is called in the mlx5_dev_start(). No thread safe
4572  * is guaranteed.
4573  *
4574  * @param dev
4575  *   Pointer to Ethernet device.
4576  * @param[out] error
4577  *   Perform verbose error reporting if not NULL.
4578  *
4579  * @return
4580  *   0 for success, negative value otherwise and rte_errno is set.
4581  */
4582 static int
4583 flow_mreg_add_default_copy_action(struct rte_eth_dev *dev,
4584                                   struct rte_flow_error *error)
4585 {
4586         struct mlx5_priv *priv = dev->data->dev_private;
4587         struct mlx5_flow_mreg_copy_resource *mcp_res;
4588         struct mlx5_flow_cb_ctx ctx;
4589         uint32_t mark_id;
4590
4591         /* Check whether extensive metadata feature is engaged. */
4592         if (!priv->sh->config.dv_flow_en ||
4593             priv->sh->config.dv_xmeta_en == MLX5_XMETA_MODE_LEGACY ||
4594             !mlx5_flow_ext_mreg_supported(dev) ||
4595             !priv->sh->dv_regc0_mask)
4596                 return 0;
4597         /*
4598          * Add default mreg copy flow may be called multiple time, but
4599          * only be called once in stop. Avoid register it twice.
4600          */
4601         mark_id = MLX5_DEFAULT_COPY_ID;
4602         ctx.data = &mark_id;
4603         if (mlx5_hlist_lookup(priv->mreg_cp_tbl, mark_id, &ctx))
4604                 return 0;
4605         mcp_res = flow_mreg_add_copy_action(dev, mark_id, error);
4606         if (!mcp_res)
4607                 return -rte_errno;
4608         return 0;
4609 }
4610
4611 /**
4612  * Add a flow of copying flow metadata registers in RX_CP_TBL.
4613  *
4614  * All the flow having Q/RSS action should be split by
4615  * flow_mreg_split_qrss_prep() to pass by RX_CP_TBL. A flow in the RX_CP_TBL
4616  * performs the following,
4617  *   - CQE->flow_tag := reg_c[1] (MARK)
4618  *   - CQE->flow_table_metadata (reg_b) := reg_c[0] (META)
4619  * As CQE's flow_tag is not a register, it can't be simply copied from reg_c[1]
4620  * but there should be a flow per each MARK ID set by MARK action.
4621  *
4622  * For the aforementioned reason, if there's a MARK action in flow's action
4623  * list, a corresponding flow should be added to the RX_CP_TBL in order to copy
4624  * the MARK ID to CQE's flow_tag like,
4625  *   - If reg_c[1] is mark_id,
4626  *     flow_tag := mark_id, reg_b := reg_c[0] and jump to RX_ACT_TBL.
4627  *
4628  * For SET_META action which stores value in reg_c[0], as the destination is
4629  * also a flow metadata register (reg_b), adding a default flow is enough. Zero
4630  * MARK ID means the default flow. The default flow looks like,
4631  *   - For all flow, reg_b := reg_c[0] and jump to RX_ACT_TBL.
4632  *
4633  * @param dev
4634  *   Pointer to Ethernet device.
4635  * @param flow
4636  *   Pointer to flow structure.
4637  * @param[in] actions
4638  *   Pointer to the list of actions.
4639  * @param[out] error
4640  *   Perform verbose error reporting if not NULL.
4641  *
4642  * @return
4643  *   0 on success, negative value otherwise and rte_errno is set.
4644  */
4645 static int
4646 flow_mreg_update_copy_table(struct rte_eth_dev *dev,
4647                             struct rte_flow *flow,
4648                             const struct rte_flow_action *actions,
4649                             struct rte_flow_error *error)
4650 {
4651         struct mlx5_priv *priv = dev->data->dev_private;
4652         struct mlx5_sh_config *config = &priv->sh->config;
4653         struct mlx5_flow_mreg_copy_resource *mcp_res;
4654         const struct rte_flow_action_mark *mark;
4655
4656         /* Check whether extensive metadata feature is engaged. */
4657         if (!config->dv_flow_en ||
4658             config->dv_xmeta_en == MLX5_XMETA_MODE_LEGACY ||
4659             !mlx5_flow_ext_mreg_supported(dev) ||
4660             !priv->sh->dv_regc0_mask)
4661                 return 0;
4662         /* Find MARK action. */
4663         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
4664                 switch (actions->type) {
4665                 case RTE_FLOW_ACTION_TYPE_FLAG:
4666                         mcp_res = flow_mreg_add_copy_action
4667                                 (dev, MLX5_FLOW_MARK_DEFAULT, error);
4668                         if (!mcp_res)
4669                                 return -rte_errno;
4670                         flow->rix_mreg_copy = mcp_res->idx;
4671                         return 0;
4672                 case RTE_FLOW_ACTION_TYPE_MARK:
4673                         mark = (const struct rte_flow_action_mark *)
4674                                 actions->conf;
4675                         mcp_res =
4676                                 flow_mreg_add_copy_action(dev, mark->id, error);
4677                         if (!mcp_res)
4678                                 return -rte_errno;
4679                         flow->rix_mreg_copy = mcp_res->idx;
4680                         return 0;
4681                 default:
4682                         break;
4683                 }
4684         }
4685         return 0;
4686 }
4687
4688 #define MLX5_MAX_SPLIT_ACTIONS 24
4689 #define MLX5_MAX_SPLIT_ITEMS 24
4690
4691 /**
4692  * Split the hairpin flow.
4693  * Since HW can't support encap and push-vlan on Rx, we move these
4694  * actions to Tx.
4695  * If the count action is after the encap then we also
4696  * move the count action. in this case the count will also measure
4697  * the outer bytes.
4698  *
4699  * @param dev
4700  *   Pointer to Ethernet device.
4701  * @param[in] actions
4702  *   Associated actions (list terminated by the END action).
4703  * @param[out] actions_rx
4704  *   Rx flow actions.
4705  * @param[out] actions_tx
4706  *   Tx flow actions..
4707  * @param[out] pattern_tx
4708  *   The pattern items for the Tx flow.
4709  * @param[out] flow_id
4710  *   The flow ID connected to this flow.
4711  *
4712  * @return
4713  *   0 on success.
4714  */
4715 static int
4716 flow_hairpin_split(struct rte_eth_dev *dev,
4717                    const struct rte_flow_action actions[],
4718                    struct rte_flow_action actions_rx[],
4719                    struct rte_flow_action actions_tx[],
4720                    struct rte_flow_item pattern_tx[],
4721                    uint32_t flow_id)
4722 {
4723         const struct rte_flow_action_raw_encap *raw_encap;
4724         const struct rte_flow_action_raw_decap *raw_decap;
4725         struct mlx5_rte_flow_action_set_tag *set_tag;
4726         struct rte_flow_action *tag_action;
4727         struct mlx5_rte_flow_item_tag *tag_item;
4728         struct rte_flow_item *item;
4729         char *addr;
4730         int encap = 0;
4731
4732         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
4733                 switch (actions->type) {
4734                 case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
4735                 case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
4736                 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
4737                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
4738                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
4739                         rte_memcpy(actions_tx, actions,
4740                                sizeof(struct rte_flow_action));
4741                         actions_tx++;
4742                         break;
4743                 case RTE_FLOW_ACTION_TYPE_COUNT:
4744                         if (encap) {
4745                                 rte_memcpy(actions_tx, actions,
4746                                            sizeof(struct rte_flow_action));
4747                                 actions_tx++;
4748                         } else {
4749                                 rte_memcpy(actions_rx, actions,
4750                                            sizeof(struct rte_flow_action));
4751                                 actions_rx++;
4752                         }
4753                         break;
4754                 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
4755                         raw_encap = actions->conf;
4756                         if (raw_encap->size > MLX5_ENCAPSULATION_DECISION_SIZE) {
4757                                 memcpy(actions_tx, actions,
4758                                        sizeof(struct rte_flow_action));
4759                                 actions_tx++;
4760                                 encap = 1;
4761                         } else {
4762                                 rte_memcpy(actions_rx, actions,
4763                                            sizeof(struct rte_flow_action));
4764                                 actions_rx++;
4765                         }
4766                         break;
4767                 case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
4768                         raw_decap = actions->conf;
4769                         if (raw_decap->size < MLX5_ENCAPSULATION_DECISION_SIZE) {
4770                                 memcpy(actions_tx, actions,
4771                                        sizeof(struct rte_flow_action));
4772                                 actions_tx++;
4773                         } else {
4774                                 rte_memcpy(actions_rx, actions,
4775                                            sizeof(struct rte_flow_action));
4776                                 actions_rx++;
4777                         }
4778                         break;
4779                 default:
4780                         rte_memcpy(actions_rx, actions,
4781                                    sizeof(struct rte_flow_action));
4782                         actions_rx++;
4783                         break;
4784                 }
4785         }
4786         /* Add set meta action and end action for the Rx flow. */
4787         tag_action = actions_rx;
4788         tag_action->type = (enum rte_flow_action_type)
4789                            MLX5_RTE_FLOW_ACTION_TYPE_TAG;
4790         actions_rx++;
4791         rte_memcpy(actions_rx, actions, sizeof(struct rte_flow_action));
4792         actions_rx++;
4793         set_tag = (void *)actions_rx;
4794         *set_tag = (struct mlx5_rte_flow_action_set_tag) {
4795                 .id = mlx5_flow_get_reg_id(dev, MLX5_HAIRPIN_RX, 0, NULL),
4796                 .data = flow_id,
4797         };
4798         MLX5_ASSERT(set_tag->id > REG_NON);
4799         tag_action->conf = set_tag;
4800         /* Create Tx item list. */
4801         rte_memcpy(actions_tx, actions, sizeof(struct rte_flow_action));
4802         addr = (void *)&pattern_tx[2];
4803         item = pattern_tx;
4804         item->type = (enum rte_flow_item_type)
4805                      MLX5_RTE_FLOW_ITEM_TYPE_TAG;
4806         tag_item = (void *)addr;
4807         tag_item->data = flow_id;
4808         tag_item->id = mlx5_flow_get_reg_id(dev, MLX5_HAIRPIN_TX, 0, NULL);
4809         MLX5_ASSERT(set_tag->id > REG_NON);
4810         item->spec = tag_item;
4811         addr += sizeof(struct mlx5_rte_flow_item_tag);
4812         tag_item = (void *)addr;
4813         tag_item->data = UINT32_MAX;
4814         tag_item->id = UINT16_MAX;
4815         item->mask = tag_item;
4816         item->last = NULL;
4817         item++;
4818         item->type = RTE_FLOW_ITEM_TYPE_END;
4819         return 0;
4820 }
4821
4822 /**
4823  * The last stage of splitting chain, just creates the subflow
4824  * without any modification.
4825  *
4826  * @param[in] dev
4827  *   Pointer to Ethernet device.
4828  * @param[in] flow
4829  *   Parent flow structure pointer.
4830  * @param[in, out] sub_flow
4831  *   Pointer to return the created subflow, may be NULL.
4832  * @param[in] attr
4833  *   Flow rule attributes.
4834  * @param[in] items
4835  *   Pattern specification (list terminated by the END pattern item).
4836  * @param[in] actions
4837  *   Associated actions (list terminated by the END action).
4838  * @param[in] flow_split_info
4839  *   Pointer to flow split info structure.
4840  * @param[out] error
4841  *   Perform verbose error reporting if not NULL.
4842  * @return
4843  *   0 on success, negative value otherwise
4844  */
4845 static int
4846 flow_create_split_inner(struct rte_eth_dev *dev,
4847                         struct rte_flow *flow,
4848                         struct mlx5_flow **sub_flow,
4849                         const struct rte_flow_attr *attr,
4850                         const struct rte_flow_item items[],
4851                         const struct rte_flow_action actions[],
4852                         struct mlx5_flow_split_info *flow_split_info,
4853                         struct rte_flow_error *error)
4854 {
4855         struct mlx5_flow *dev_flow;
4856         struct mlx5_flow_workspace *wks = mlx5_flow_get_thread_workspace();
4857
4858         dev_flow = flow_drv_prepare(dev, flow, attr, items, actions,
4859                                     flow_split_info->flow_idx, error);
4860         if (!dev_flow)
4861                 return -rte_errno;
4862         dev_flow->flow = flow;
4863         dev_flow->external = flow_split_info->external;
4864         dev_flow->skip_scale = flow_split_info->skip_scale;
4865         /* Subflow object was created, we must include one in the list. */
4866         SILIST_INSERT(&flow->dev_handles, dev_flow->handle_idx,
4867                       dev_flow->handle, next);
4868         /*
4869          * If dev_flow is as one of the suffix flow, some actions in suffix
4870          * flow may need some user defined item layer flags, and pass the
4871          * Metadata rxq mark flag to suffix flow as well.
4872          */
4873         if (flow_split_info->prefix_layers)
4874                 dev_flow->handle->layers = flow_split_info->prefix_layers;
4875         if (flow_split_info->prefix_mark) {
4876                 MLX5_ASSERT(wks);
4877                 wks->mark = 1;
4878         }
4879         if (sub_flow)
4880                 *sub_flow = dev_flow;
4881 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
4882         dev_flow->dv.table_id = flow_split_info->table_id;
4883 #endif
4884         return flow_drv_translate(dev, dev_flow, attr, items, actions, error);
4885 }
4886
4887 /**
4888  * Get the sub policy of a meter.
4889  *
4890  * @param[in] dev
4891  *   Pointer to Ethernet device.
4892  * @param[in] flow
4893  *   Parent flow structure pointer.
4894  * @param wks
4895  *   Pointer to thread flow work space.
4896  * @param[in] attr
4897  *   Flow rule attributes.
4898  * @param[in] items
4899  *   Pattern specification (list terminated by the END pattern item).
4900  * @param[out] error
4901  *   Perform verbose error reporting if not NULL.
4902  *
4903  * @return
4904  *   Pointer to the meter sub policy, NULL otherwise and rte_errno is set.
4905  */
4906 static struct mlx5_flow_meter_sub_policy *
4907 get_meter_sub_policy(struct rte_eth_dev *dev,
4908                      struct rte_flow *flow,
4909                      struct mlx5_flow_workspace *wks,
4910                      const struct rte_flow_attr *attr,
4911                      const struct rte_flow_item items[],
4912                      struct rte_flow_error *error)
4913 {
4914         struct mlx5_flow_meter_policy *policy;
4915         struct mlx5_flow_meter_policy *final_policy;
4916         struct mlx5_flow_meter_sub_policy *sub_policy = NULL;
4917
4918         policy = wks->policy;
4919         final_policy = policy->is_hierarchy ? wks->final_policy : policy;
4920         if (final_policy->is_rss || final_policy->is_queue) {
4921                 struct mlx5_flow_rss_desc rss_desc_v[MLX5_MTR_RTE_COLORS];
4922                 struct mlx5_flow_rss_desc *rss_desc[MLX5_MTR_RTE_COLORS] = {0};
4923                 uint32_t i;
4924
4925                 /*
4926                  * This is a tmp dev_flow,
4927                  * no need to register any matcher for it in translate.
4928                  */
4929                 wks->skip_matcher_reg = 1;
4930                 for (i = 0; i < MLX5_MTR_RTE_COLORS; i++) {
4931                         struct mlx5_flow dev_flow = {0};
4932                         struct mlx5_flow_handle dev_handle = { {0} };
4933                         uint8_t fate = final_policy->act_cnt[i].fate_action;
4934
4935                         if (fate == MLX5_FLOW_FATE_SHARED_RSS) {
4936                                 const struct rte_flow_action_rss *rss_act =
4937                                         final_policy->act_cnt[i].rss->conf;
4938                                 struct rte_flow_action rss_actions[2] = {
4939                                         [0] = {
4940                                         .type = RTE_FLOW_ACTION_TYPE_RSS,
4941                                         .conf = rss_act,
4942                                         },
4943                                         [1] = {
4944                                         .type = RTE_FLOW_ACTION_TYPE_END,
4945                                         .conf = NULL,
4946                                         }
4947                                 };
4948
4949                                 dev_flow.handle = &dev_handle;
4950                                 dev_flow.ingress = attr->ingress;
4951                                 dev_flow.flow = flow;
4952                                 dev_flow.external = 0;
4953 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
4954                                 dev_flow.dv.transfer = attr->transfer;
4955 #endif
4956                                 /**
4957                                  * Translate RSS action to get rss hash fields.
4958                                  */
4959                                 if (flow_drv_translate(dev, &dev_flow, attr,
4960                                                 items, rss_actions, error))
4961                                         goto exit;
4962                                 rss_desc_v[i] = wks->rss_desc;
4963                                 rss_desc_v[i].key_len = MLX5_RSS_HASH_KEY_LEN;
4964                                 rss_desc_v[i].hash_fields =
4965                                                 dev_flow.hash_fields;
4966                                 rss_desc_v[i].queue_num =
4967                                                 rss_desc_v[i].hash_fields ?
4968                                                 rss_desc_v[i].queue_num : 1;
4969                                 rss_desc_v[i].tunnel =
4970                                                 !!(dev_flow.handle->layers &
4971                                                    MLX5_FLOW_LAYER_TUNNEL);
4972                                 /* Use the RSS queues in the containers. */
4973                                 rss_desc_v[i].queue =
4974                                         (uint16_t *)(uintptr_t)rss_act->queue;
4975                                 rss_desc[i] = &rss_desc_v[i];
4976                         } else if (fate == MLX5_FLOW_FATE_QUEUE) {
4977                                 /* This is queue action. */
4978                                 rss_desc_v[i] = wks->rss_desc;
4979                                 rss_desc_v[i].key_len = 0;
4980                                 rss_desc_v[i].hash_fields = 0;
4981                                 rss_desc_v[i].queue =
4982                                         &final_policy->act_cnt[i].queue;
4983                                 rss_desc_v[i].queue_num = 1;
4984                                 rss_desc[i] = &rss_desc_v[i];
4985                         } else {
4986                                 rss_desc[i] = NULL;
4987                         }
4988                 }
4989                 sub_policy = flow_drv_meter_sub_policy_rss_prepare(dev,
4990                                                 flow, policy, rss_desc);
4991         } else {
4992                 enum mlx5_meter_domain mtr_domain =
4993                         attr->transfer ? MLX5_MTR_DOMAIN_TRANSFER :
4994                                 (attr->egress ? MLX5_MTR_DOMAIN_EGRESS :
4995                                                 MLX5_MTR_DOMAIN_INGRESS);
4996                 sub_policy = policy->sub_policys[mtr_domain][0];
4997         }
4998         if (!sub_policy)
4999                 rte_flow_error_set(error, EINVAL,
5000                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
5001                                    "Failed to get meter sub-policy.");
5002 exit:
5003         return sub_policy;
5004 }
5005
5006 /**
5007  * Split the meter flow.
5008  *
5009  * As meter flow will split to three sub flow, other than meter
5010  * action, the other actions make sense to only meter accepts
5011  * the packet. If it need to be dropped, no other additional
5012  * actions should be take.
5013  *
5014  * One kind of special action which decapsulates the L3 tunnel
5015  * header will be in the prefix sub flow, as not to take the
5016  * L3 tunnel header into account.
5017  *
5018  * @param[in] dev
5019  *   Pointer to Ethernet device.
5020  * @param[in] flow
5021  *   Parent flow structure pointer.
5022  * @param wks
5023  *   Pointer to thread flow work space.
5024  * @param[in] attr
5025  *   Flow rule attributes.
5026  * @param[in] items
5027  *   Pattern specification (list terminated by the END pattern item).
5028  * @param[out] sfx_items
5029  *   Suffix flow match items (list terminated by the END pattern item).
5030  * @param[in] actions
5031  *   Associated actions (list terminated by the END action).
5032  * @param[out] actions_sfx
5033  *   Suffix flow actions.
5034  * @param[out] actions_pre
5035  *   Prefix flow actions.
5036  * @param[out] mtr_flow_id
5037  *   Pointer to meter flow id.
5038  * @param[out] error
5039  *   Perform verbose error reporting if not NULL.
5040  *
5041  * @return
5042  *   0 on success, a negative errno value otherwise and rte_errno is set.
5043  */
5044 static int
5045 flow_meter_split_prep(struct rte_eth_dev *dev,
5046                       struct rte_flow *flow,
5047                       struct mlx5_flow_workspace *wks,
5048                       const struct rte_flow_attr *attr,
5049                       const struct rte_flow_item items[],
5050                       struct rte_flow_item sfx_items[],
5051                       const struct rte_flow_action actions[],
5052                       struct rte_flow_action actions_sfx[],
5053                       struct rte_flow_action actions_pre[],
5054                       uint32_t *mtr_flow_id,
5055                       struct rte_flow_error *error)
5056 {
5057         struct mlx5_priv *priv = dev->data->dev_private;
5058         struct mlx5_flow_meter_info *fm = wks->fm;
5059         struct rte_flow_action *tag_action = NULL;
5060         struct rte_flow_item *tag_item;
5061         struct mlx5_rte_flow_action_set_tag *set_tag;
5062         const struct rte_flow_action_raw_encap *raw_encap;
5063         const struct rte_flow_action_raw_decap *raw_decap;
5064         struct mlx5_rte_flow_item_tag *tag_item_spec;
5065         struct mlx5_rte_flow_item_tag *tag_item_mask;
5066         uint32_t tag_id = 0;
5067         struct rte_flow_item *vlan_item_dst = NULL;
5068         const struct rte_flow_item *vlan_item_src = NULL;
5069         const struct rte_flow_item *orig_items = items;
5070         struct rte_flow_action *hw_mtr_action;
5071         struct rte_flow_action *action_pre_head = NULL;
5072         int32_t flow_src_port = priv->representor_id;
5073         bool mtr_first;
5074         uint8_t mtr_id_offset = priv->mtr_reg_share ? MLX5_MTR_COLOR_BITS : 0;
5075         uint8_t mtr_reg_bits = priv->mtr_reg_share ?
5076                                 MLX5_MTR_IDLE_BITS_IN_COLOR_REG : MLX5_REG_BITS;
5077         uint32_t flow_id = 0;
5078         uint32_t flow_id_reversed = 0;
5079         uint8_t flow_id_bits = 0;
5080         int shift;
5081
5082         /* Prepare the suffix subflow items. */
5083         tag_item = sfx_items++;
5084         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
5085                 struct mlx5_priv *port_priv;
5086                 const struct rte_flow_item_port_id *pid_v;
5087                 int item_type = items->type;
5088
5089                 switch (item_type) {
5090                 case RTE_FLOW_ITEM_TYPE_PORT_ID:
5091                         pid_v = items->spec;
5092                         MLX5_ASSERT(pid_v);
5093                         port_priv = mlx5_port_to_eswitch_info(pid_v->id, false);
5094                         if (!port_priv)
5095                                 return rte_flow_error_set(error,
5096                                                 rte_errno,
5097                                                 RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
5098                                                 pid_v,
5099                                                 "Failed to get port info.");
5100                         flow_src_port = port_priv->representor_id;
5101                         if (!fm->def_policy && wks->policy->is_hierarchy &&
5102                             flow_src_port != priv->representor_id) {
5103                                 if (flow_drv_mtr_hierarchy_rule_create(dev,
5104                                                                 flow, fm,
5105                                                                 flow_src_port,
5106                                                                 items,
5107                                                                 error))
5108                                         return -rte_errno;
5109                         }
5110                         memcpy(sfx_items, items, sizeof(*sfx_items));
5111                         sfx_items++;
5112                         break;
5113                 case RTE_FLOW_ITEM_TYPE_VLAN:
5114                         /* Determine if copy vlan item below. */
5115                         vlan_item_src = items;
5116                         vlan_item_dst = sfx_items++;
5117                         vlan_item_dst->type = RTE_FLOW_ITEM_TYPE_VOID;
5118                         break;
5119                 default:
5120                         break;
5121                 }
5122         }
5123         sfx_items->type = RTE_FLOW_ITEM_TYPE_END;
5124         sfx_items++;
5125         mtr_first = priv->sh->meter_aso_en &&
5126                 (attr->egress || (attr->transfer && flow_src_port != UINT16_MAX));
5127         /* For ASO meter, meter must be before tag in TX direction. */
5128         if (mtr_first) {
5129                 action_pre_head = actions_pre++;
5130                 /* Leave space for tag action. */
5131                 tag_action = actions_pre++;
5132         }
5133         /* Prepare the actions for prefix and suffix flow. */
5134         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
5135                 struct rte_flow_action *action_cur = NULL;
5136
5137                 switch (actions->type) {
5138                 case RTE_FLOW_ACTION_TYPE_METER:
5139                         if (mtr_first) {
5140                                 action_cur = action_pre_head;
5141                         } else {
5142                                 /* Leave space for tag action. */
5143                                 tag_action = actions_pre++;
5144                                 action_cur = actions_pre++;
5145                         }
5146                         break;
5147                 case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP:
5148                 case RTE_FLOW_ACTION_TYPE_NVGRE_DECAP:
5149                         action_cur = actions_pre++;
5150                         break;
5151                 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
5152                         raw_encap = actions->conf;
5153                         if (raw_encap->size < MLX5_ENCAPSULATION_DECISION_SIZE)
5154                                 action_cur = actions_pre++;
5155                         break;
5156                 case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
5157                         raw_decap = actions->conf;
5158                         if (raw_decap->size > MLX5_ENCAPSULATION_DECISION_SIZE)
5159                                 action_cur = actions_pre++;
5160                         break;
5161                 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
5162                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
5163                         if (vlan_item_dst && vlan_item_src) {
5164                                 memcpy(vlan_item_dst, vlan_item_src,
5165                                         sizeof(*vlan_item_dst));
5166                                 /*
5167                                  * Convert to internal match item, it is used
5168                                  * for vlan push and set vid.
5169                                  */
5170                                 vlan_item_dst->type = (enum rte_flow_item_type)
5171                                                 MLX5_RTE_FLOW_ITEM_TYPE_VLAN;
5172                         }
5173                         break;
5174                 default:
5175                         break;
5176                 }
5177                 if (!action_cur)
5178                         action_cur = (fm->def_policy) ?
5179                                         actions_sfx++ : actions_pre++;
5180                 memcpy(action_cur, actions, sizeof(struct rte_flow_action));
5181         }
5182         /* Add end action to the actions. */
5183         actions_sfx->type = RTE_FLOW_ACTION_TYPE_END;
5184         if (priv->sh->meter_aso_en) {
5185                 /**
5186                  * For ASO meter, need to add an extra jump action explicitly,
5187                  * to jump from meter to policer table.
5188                  */
5189                 struct mlx5_flow_meter_sub_policy *sub_policy;
5190                 struct mlx5_flow_tbl_data_entry *tbl_data;
5191
5192                 if (!fm->def_policy) {
5193                         sub_policy = get_meter_sub_policy(dev, flow, wks,
5194                                                           attr, orig_items,
5195                                                           error);
5196                         if (!sub_policy)
5197                                 return -rte_errno;
5198                 } else {
5199                         enum mlx5_meter_domain mtr_domain =
5200                         attr->transfer ? MLX5_MTR_DOMAIN_TRANSFER :
5201                                 (attr->egress ? MLX5_MTR_DOMAIN_EGRESS :
5202                                                 MLX5_MTR_DOMAIN_INGRESS);
5203
5204                         sub_policy =
5205                         &priv->sh->mtrmng->def_policy[mtr_domain]->sub_policy;
5206                 }
5207                 tbl_data = container_of(sub_policy->tbl_rsc,
5208                                         struct mlx5_flow_tbl_data_entry, tbl);
5209                 hw_mtr_action = actions_pre++;
5210                 hw_mtr_action->type = (enum rte_flow_action_type)
5211                                       MLX5_RTE_FLOW_ACTION_TYPE_JUMP;
5212                 hw_mtr_action->conf = tbl_data->jump.action;
5213         }
5214         actions_pre->type = RTE_FLOW_ACTION_TYPE_END;
5215         actions_pre++;
5216         if (!tag_action)
5217                 return rte_flow_error_set(error, ENOMEM,
5218                                           RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
5219                                           NULL, "No tag action space.");
5220         if (!mtr_flow_id) {
5221                 tag_action->type = RTE_FLOW_ACTION_TYPE_VOID;
5222                 goto exit;
5223         }
5224         /* Only default-policy Meter creates mtr flow id. */
5225         if (fm->def_policy) {
5226                 mlx5_ipool_malloc(fm->flow_ipool, &tag_id);
5227                 if (!tag_id)
5228                         return rte_flow_error_set(error, ENOMEM,
5229                                         RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
5230                                         "Failed to allocate meter flow id.");
5231                 flow_id = tag_id - 1;
5232                 flow_id_bits = (!flow_id) ? 1 :
5233                                 (MLX5_REG_BITS - __builtin_clz(flow_id));
5234                 if ((flow_id_bits + priv->sh->mtrmng->max_mtr_bits) >
5235                     mtr_reg_bits) {
5236                         mlx5_ipool_free(fm->flow_ipool, tag_id);
5237                         return rte_flow_error_set(error, EINVAL,
5238                                         RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
5239                                         "Meter flow id exceeds max limit.");
5240                 }
5241                 if (flow_id_bits > priv->sh->mtrmng->max_mtr_flow_bits)
5242                         priv->sh->mtrmng->max_mtr_flow_bits = flow_id_bits;
5243         }
5244         /* Build tag actions and items for meter_id/meter flow_id. */
5245         set_tag = (struct mlx5_rte_flow_action_set_tag *)actions_pre;
5246         tag_item_spec = (struct mlx5_rte_flow_item_tag *)sfx_items;
5247         tag_item_mask = tag_item_spec + 1;
5248         /* Both flow_id and meter_id share the same register. */
5249         *set_tag = (struct mlx5_rte_flow_action_set_tag) {
5250                 .id = (enum modify_reg)mlx5_flow_get_reg_id(dev, MLX5_MTR_ID,
5251                                                             0, error),
5252                 .offset = mtr_id_offset,
5253                 .length = mtr_reg_bits,
5254                 .data = flow->meter,
5255         };
5256         /*
5257          * The color Reg bits used by flow_id are growing from
5258          * msb to lsb, so must do bit reverse for flow_id val in RegC.
5259          */
5260         for (shift = 0; shift < flow_id_bits; shift++)
5261                 flow_id_reversed = (flow_id_reversed << 1) |
5262                                 ((flow_id >> shift) & 0x1);
5263         set_tag->data |=
5264                 flow_id_reversed << (mtr_reg_bits - flow_id_bits);
5265         tag_item_spec->id = set_tag->id;
5266         tag_item_spec->data = set_tag->data << mtr_id_offset;
5267         tag_item_mask->data = UINT32_MAX << mtr_id_offset;
5268         tag_action->type = (enum rte_flow_action_type)
5269                                 MLX5_RTE_FLOW_ACTION_TYPE_TAG;
5270         tag_action->conf = set_tag;
5271         tag_item->type = (enum rte_flow_item_type)
5272                                 MLX5_RTE_FLOW_ITEM_TYPE_TAG;
5273         tag_item->spec = tag_item_spec;
5274         tag_item->last = NULL;
5275         tag_item->mask = tag_item_mask;
5276 exit:
5277         if (mtr_flow_id)
5278                 *mtr_flow_id = tag_id;
5279         return 0;
5280 }
5281
5282 /**
5283  * Split action list having QUEUE/RSS for metadata register copy.
5284  *
5285  * Once Q/RSS action is detected in user's action list, the flow action
5286  * should be split in order to copy metadata registers, which will happen in
5287  * RX_CP_TBL like,
5288  *   - CQE->flow_tag := reg_c[1] (MARK)
5289  *   - CQE->flow_table_metadata (reg_b) := reg_c[0] (META)
5290  * The Q/RSS action will be performed on RX_ACT_TBL after passing by RX_CP_TBL.
5291  * This is because the last action of each flow must be a terminal action
5292  * (QUEUE, RSS or DROP).
5293  *
5294  * Flow ID must be allocated to identify actions in the RX_ACT_TBL and it is
5295  * stored and kept in the mlx5_flow structure per each sub_flow.
5296  *
5297  * The Q/RSS action is replaced with,
5298  *   - SET_TAG, setting the allocated flow ID to reg_c[2].
5299  * And the following JUMP action is added at the end,
5300  *   - JUMP, to RX_CP_TBL.
5301  *
5302  * A flow to perform remained Q/RSS action will be created in RX_ACT_TBL by
5303  * flow_create_split_metadata() routine. The flow will look like,
5304  *   - If flow ID matches (reg_c[2]), perform Q/RSS.
5305  *
5306  * @param dev
5307  *   Pointer to Ethernet device.
5308  * @param[out] split_actions
5309  *   Pointer to store split actions to jump to CP_TBL.
5310  * @param[in] actions
5311  *   Pointer to the list of original flow actions.
5312  * @param[in] qrss
5313  *   Pointer to the Q/RSS action.
5314  * @param[in] actions_n
5315  *   Number of original actions.
5316  * @param[in] mtr_sfx
5317  *   Check if it is in meter suffix table.
5318  * @param[out] error
5319  *   Perform verbose error reporting if not NULL.
5320  *
5321  * @return
5322  *   non-zero unique flow_id on success, otherwise 0 and
5323  *   error/rte_error are set.
5324  */
5325 static uint32_t
5326 flow_mreg_split_qrss_prep(struct rte_eth_dev *dev,
5327                           struct rte_flow_action *split_actions,
5328                           const struct rte_flow_action *actions,
5329                           const struct rte_flow_action *qrss,
5330                           int actions_n, int mtr_sfx,
5331                           struct rte_flow_error *error)
5332 {
5333         struct mlx5_priv *priv = dev->data->dev_private;
5334         struct mlx5_rte_flow_action_set_tag *set_tag;
5335         struct rte_flow_action_jump *jump;
5336         const int qrss_idx = qrss - actions;
5337         uint32_t flow_id = 0;
5338         int ret = 0;
5339
5340         /*
5341          * Given actions will be split
5342          * - Replace QUEUE/RSS action with SET_TAG to set flow ID.
5343          * - Add jump to mreg CP_TBL.
5344          * As a result, there will be one more action.
5345          */
5346         memcpy(split_actions, actions, sizeof(*split_actions) * actions_n);
5347         /* Count MLX5_RTE_FLOW_ACTION_TYPE_TAG. */
5348         ++actions_n;
5349         set_tag = (void *)(split_actions + actions_n);
5350         /*
5351          * If we are not the meter suffix flow, add the tag action.
5352          * Since meter suffix flow already has the tag added.
5353          */
5354         if (!mtr_sfx) {
5355                 /*
5356                  * Allocate the new subflow ID. This one is unique within
5357                  * device and not shared with representors. Otherwise,
5358                  * we would have to resolve multi-thread access synch
5359                  * issue. Each flow on the shared device is appended
5360                  * with source vport identifier, so the resulting
5361                  * flows will be unique in the shared (by master and
5362                  * representors) domain even if they have coinciding
5363                  * IDs.
5364                  */
5365                 mlx5_ipool_malloc(priv->sh->ipool
5366                                   [MLX5_IPOOL_RSS_EXPANTION_FLOW_ID], &flow_id);
5367                 if (!flow_id)
5368                         return rte_flow_error_set(error, ENOMEM,
5369                                                   RTE_FLOW_ERROR_TYPE_ACTION,
5370                                                   NULL, "can't allocate id "
5371                                                   "for split Q/RSS subflow");
5372                 /* Internal SET_TAG action to set flow ID. */
5373                 *set_tag = (struct mlx5_rte_flow_action_set_tag){
5374                         .data = flow_id,
5375                 };
5376                 ret = mlx5_flow_get_reg_id(dev, MLX5_COPY_MARK, 0, error);
5377                 if (ret < 0)
5378                         return ret;
5379                 set_tag->id = ret;
5380                 /* Construct new actions array. */
5381                 /* Replace QUEUE/RSS action. */
5382                 split_actions[qrss_idx] = (struct rte_flow_action){
5383                         .type = (enum rte_flow_action_type)
5384                                 MLX5_RTE_FLOW_ACTION_TYPE_TAG,
5385                         .conf = set_tag,
5386                 };
5387         } else {
5388                 /*
5389                  * If we are the suffix flow of meter, tag already exist.
5390                  * Set the QUEUE/RSS action to void.
5391                  */
5392                 split_actions[qrss_idx].type = RTE_FLOW_ACTION_TYPE_VOID;
5393         }
5394         /* JUMP action to jump to mreg copy table (CP_TBL). */
5395         jump = (void *)(set_tag + 1);
5396         *jump = (struct rte_flow_action_jump){
5397                 .group = MLX5_FLOW_MREG_CP_TABLE_GROUP,
5398         };
5399         split_actions[actions_n - 2] = (struct rte_flow_action){
5400                 .type = RTE_FLOW_ACTION_TYPE_JUMP,
5401                 .conf = jump,
5402         };
5403         split_actions[actions_n - 1] = (struct rte_flow_action){
5404                 .type = RTE_FLOW_ACTION_TYPE_END,
5405         };
5406         return flow_id;
5407 }
5408
5409 /**
5410  * Extend the given action list for Tx metadata copy.
5411  *
5412  * Copy the given action list to the ext_actions and add flow metadata register
5413  * copy action in order to copy reg_a set by WQE to reg_c[0].
5414  *
5415  * @param[out] ext_actions
5416  *   Pointer to the extended action list.
5417  * @param[in] actions
5418  *   Pointer to the list of actions.
5419  * @param[in] actions_n
5420  *   Number of actions in the list.
5421  * @param[out] error
5422  *   Perform verbose error reporting if not NULL.
5423  * @param[in] encap_idx
5424  *   The encap action index.
5425  *
5426  * @return
5427  *   0 on success, negative value otherwise
5428  */
5429 static int
5430 flow_mreg_tx_copy_prep(struct rte_eth_dev *dev,
5431                        struct rte_flow_action *ext_actions,
5432                        const struct rte_flow_action *actions,
5433                        int actions_n, struct rte_flow_error *error,
5434                        int encap_idx)
5435 {
5436         struct mlx5_flow_action_copy_mreg *cp_mreg =
5437                 (struct mlx5_flow_action_copy_mreg *)
5438                         (ext_actions + actions_n + 1);
5439         int ret;
5440
5441         ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_RX, 0, error);
5442         if (ret < 0)
5443                 return ret;
5444         cp_mreg->dst = ret;
5445         ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_TX, 0, error);
5446         if (ret < 0)
5447                 return ret;
5448         cp_mreg->src = ret;
5449         if (encap_idx != 0)
5450                 memcpy(ext_actions, actions, sizeof(*ext_actions) * encap_idx);
5451         if (encap_idx == actions_n - 1) {
5452                 ext_actions[actions_n - 1] = (struct rte_flow_action){
5453                         .type = (enum rte_flow_action_type)
5454                                 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
5455                         .conf = cp_mreg,
5456                 };
5457                 ext_actions[actions_n] = (struct rte_flow_action){
5458                         .type = RTE_FLOW_ACTION_TYPE_END,
5459                 };
5460         } else {
5461                 ext_actions[encap_idx] = (struct rte_flow_action){
5462                         .type = (enum rte_flow_action_type)
5463                                 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
5464                         .conf = cp_mreg,
5465                 };
5466                 memcpy(ext_actions + encap_idx + 1, actions + encap_idx,
5467                                 sizeof(*ext_actions) * (actions_n - encap_idx));
5468         }
5469         return 0;
5470 }
5471
5472 /**
5473  * Check the match action from the action list.
5474  *
5475  * @param[in] actions
5476  *   Pointer to the list of actions.
5477  * @param[in] attr
5478  *   Flow rule attributes.
5479  * @param[in] action
5480  *   The action to be check if exist.
5481  * @param[out] match_action_pos
5482  *   Pointer to the position of the matched action if exists, otherwise is -1.
5483  * @param[out] qrss_action_pos
5484  *   Pointer to the position of the Queue/RSS action if exists, otherwise is -1.
5485  * @param[out] modify_after_mirror
5486  *   Pointer to the flag of modify action after FDB mirroring.
5487  *
5488  * @return
5489  *   > 0 the total number of actions.
5490  *   0 if not found match action in action list.
5491  */
5492 static int
5493 flow_check_match_action(const struct rte_flow_action actions[],
5494                         const struct rte_flow_attr *attr,
5495                         enum rte_flow_action_type action,
5496                         int *match_action_pos, int *qrss_action_pos,
5497                         int *modify_after_mirror)
5498 {
5499         const struct rte_flow_action_sample *sample;
5500         const struct rte_flow_action_raw_decap *decap;
5501         int actions_n = 0;
5502         uint32_t ratio = 0;
5503         int sub_type = 0;
5504         int flag = 0;
5505         int fdb_mirror = 0;
5506
5507         *match_action_pos = -1;
5508         *qrss_action_pos = -1;
5509         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
5510                 if (actions->type == action) {
5511                         flag = 1;
5512                         *match_action_pos = actions_n;
5513                 }
5514                 switch (actions->type) {
5515                 case RTE_FLOW_ACTION_TYPE_QUEUE:
5516                 case RTE_FLOW_ACTION_TYPE_RSS:
5517                         *qrss_action_pos = actions_n;
5518                         break;
5519                 case RTE_FLOW_ACTION_TYPE_SAMPLE:
5520                         sample = actions->conf;
5521                         ratio = sample->ratio;
5522                         sub_type = ((const struct rte_flow_action *)
5523                                         (sample->actions))->type;
5524                         if (ratio == 1 && attr->transfer)
5525                                 fdb_mirror = 1;
5526                         break;
5527                 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
5528                 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
5529                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
5530                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
5531                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
5532                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
5533                 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
5534                 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
5535                 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
5536                 case RTE_FLOW_ACTION_TYPE_SET_TTL:
5537                 case RTE_FLOW_ACTION_TYPE_INC_TCP_SEQ:
5538                 case RTE_FLOW_ACTION_TYPE_DEC_TCP_SEQ:
5539                 case RTE_FLOW_ACTION_TYPE_INC_TCP_ACK:
5540                 case RTE_FLOW_ACTION_TYPE_DEC_TCP_ACK:
5541                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DSCP:
5542                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DSCP:
5543                 case RTE_FLOW_ACTION_TYPE_FLAG:
5544                 case RTE_FLOW_ACTION_TYPE_MARK:
5545                 case RTE_FLOW_ACTION_TYPE_SET_META:
5546                 case RTE_FLOW_ACTION_TYPE_SET_TAG:
5547                 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
5548                 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
5549                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
5550                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
5551                 case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP:
5552                 case RTE_FLOW_ACTION_TYPE_NVGRE_DECAP:
5553                 case RTE_FLOW_ACTION_TYPE_MODIFY_FIELD:
5554                 case RTE_FLOW_ACTION_TYPE_METER:
5555                         if (fdb_mirror)
5556                                 *modify_after_mirror = 1;
5557                         break;
5558                 case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
5559                         decap = actions->conf;
5560                         while ((++actions)->type == RTE_FLOW_ACTION_TYPE_VOID)
5561                                 ;
5562                         actions_n++;
5563                         if (actions->type == RTE_FLOW_ACTION_TYPE_RAW_ENCAP) {
5564                                 const struct rte_flow_action_raw_encap *encap =
5565                                                                 actions->conf;
5566                                 if (decap->size <=
5567                                         MLX5_ENCAPSULATION_DECISION_SIZE &&
5568                                     encap->size >
5569                                         MLX5_ENCAPSULATION_DECISION_SIZE)
5570                                         /* L3 encap. */
5571                                         break;
5572                         }
5573                         if (fdb_mirror)
5574                                 *modify_after_mirror = 1;
5575                         break;
5576                 default:
5577                         break;
5578                 }
5579                 actions_n++;
5580         }
5581         if (flag && fdb_mirror && !*modify_after_mirror) {
5582                 /* FDB mirroring uses the destination array to implement
5583                  * instead of FLOW_SAMPLER object.
5584                  */
5585                 if (sub_type != RTE_FLOW_ACTION_TYPE_END)
5586                         flag = 0;
5587         }
5588         /* Count RTE_FLOW_ACTION_TYPE_END. */
5589         return flag ? actions_n + 1 : 0;
5590 }
5591
5592 #define SAMPLE_SUFFIX_ITEM 2
5593
5594 /**
5595  * Split the sample flow.
5596  *
5597  * As sample flow will split to two sub flow, sample flow with
5598  * sample action, the other actions will move to new suffix flow.
5599  *
5600  * Also add unique tag id with tag action in the sample flow,
5601  * the same tag id will be as match in the suffix flow.
5602  *
5603  * @param dev
5604  *   Pointer to Ethernet device.
5605  * @param[in] add_tag
5606  *   Add extra tag action flag.
5607  * @param[out] sfx_items
5608  *   Suffix flow match items (list terminated by the END pattern item).
5609  * @param[in] actions
5610  *   Associated actions (list terminated by the END action).
5611  * @param[out] actions_sfx
5612  *   Suffix flow actions.
5613  * @param[out] actions_pre
5614  *   Prefix flow actions.
5615  * @param[in] actions_n
5616  *  The total number of actions.
5617  * @param[in] sample_action_pos
5618  *   The sample action position.
5619  * @param[in] qrss_action_pos
5620  *   The Queue/RSS action position.
5621  * @param[in] jump_table
5622  *   Add extra jump action flag.
5623  * @param[out] error
5624  *   Perform verbose error reporting if not NULL.
5625  *
5626  * @return
5627  *   0 on success, or unique flow_id, a negative errno value
5628  *   otherwise and rte_errno is set.
5629  */
5630 static int
5631 flow_sample_split_prep(struct rte_eth_dev *dev,
5632                        int add_tag,
5633                        struct rte_flow_item sfx_items[],
5634                        const struct rte_flow_action actions[],
5635                        struct rte_flow_action actions_sfx[],
5636                        struct rte_flow_action actions_pre[],
5637                        int actions_n,
5638                        int sample_action_pos,
5639                        int qrss_action_pos,
5640                        int jump_table,
5641                        struct rte_flow_error *error)
5642 {
5643         struct mlx5_priv *priv = dev->data->dev_private;
5644         struct mlx5_rte_flow_action_set_tag *set_tag;
5645         struct mlx5_rte_flow_item_tag *tag_spec;
5646         struct mlx5_rte_flow_item_tag *tag_mask;
5647         struct rte_flow_action_jump *jump_action;
5648         uint32_t tag_id = 0;
5649         int index;
5650         int append_index = 0;
5651         int ret;
5652
5653         if (sample_action_pos < 0)
5654                 return rte_flow_error_set(error, EINVAL,
5655                                           RTE_FLOW_ERROR_TYPE_ACTION,
5656                                           NULL, "invalid position of sample "
5657                                           "action in list");
5658         /* Prepare the actions for prefix and suffix flow. */
5659         if (qrss_action_pos >= 0 && qrss_action_pos < sample_action_pos) {
5660                 index = qrss_action_pos;
5661                 /* Put the preceding the Queue/RSS action into prefix flow. */
5662                 if (index != 0)
5663                         memcpy(actions_pre, actions,
5664                                sizeof(struct rte_flow_action) * index);
5665                 /* Put others preceding the sample action into prefix flow. */
5666                 if (sample_action_pos > index + 1)
5667                         memcpy(actions_pre + index, actions + index + 1,
5668                                sizeof(struct rte_flow_action) *
5669                                (sample_action_pos - index - 1));
5670                 index = sample_action_pos - 1;
5671                 /* Put Queue/RSS action into Suffix flow. */
5672                 memcpy(actions_sfx, actions + qrss_action_pos,
5673                        sizeof(struct rte_flow_action));
5674                 actions_sfx++;
5675         } else {
5676                 index = sample_action_pos;
5677                 if (index != 0)
5678                         memcpy(actions_pre, actions,
5679                                sizeof(struct rte_flow_action) * index);
5680         }
5681         /* For CX5, add an extra tag action for NIC-RX and E-Switch ingress.
5682          * For CX6DX and above, metadata registers Cx preserve their value,
5683          * add an extra tag action for NIC-RX and E-Switch Domain.
5684          */
5685         if (add_tag) {
5686                 /* Prepare the prefix tag action. */
5687                 append_index++;
5688                 set_tag = (void *)(actions_pre + actions_n + append_index);
5689                 ret = mlx5_flow_get_reg_id(dev, MLX5_SAMPLE_ID, 0, error);
5690                 if (ret < 0)
5691                         return ret;
5692                 mlx5_ipool_malloc(priv->sh->ipool
5693                                   [MLX5_IPOOL_RSS_EXPANTION_FLOW_ID], &tag_id);
5694                 *set_tag = (struct mlx5_rte_flow_action_set_tag) {
5695                         .id = ret,
5696                         .data = tag_id,
5697                 };
5698                 /* Prepare the suffix subflow items. */
5699                 tag_spec = (void *)(sfx_items + SAMPLE_SUFFIX_ITEM);
5700                 tag_spec->data = tag_id;
5701                 tag_spec->id = set_tag->id;
5702                 tag_mask = tag_spec + 1;
5703                 tag_mask->data = UINT32_MAX;
5704                 sfx_items[0] = (struct rte_flow_item){
5705                         .type = (enum rte_flow_item_type)
5706                                 MLX5_RTE_FLOW_ITEM_TYPE_TAG,
5707                         .spec = tag_spec,
5708                         .last = NULL,
5709                         .mask = tag_mask,
5710                 };
5711                 sfx_items[1] = (struct rte_flow_item){
5712                         .type = (enum rte_flow_item_type)
5713                                 RTE_FLOW_ITEM_TYPE_END,
5714                 };
5715                 /* Prepare the tag action in prefix subflow. */
5716                 actions_pre[index++] =
5717                         (struct rte_flow_action){
5718                         .type = (enum rte_flow_action_type)
5719                                 MLX5_RTE_FLOW_ACTION_TYPE_TAG,
5720                         .conf = set_tag,
5721                 };
5722         }
5723         memcpy(actions_pre + index, actions + sample_action_pos,
5724                sizeof(struct rte_flow_action));
5725         index += 1;
5726         /* For the modify action after the sample action in E-Switch mirroring,
5727          * Add the extra jump action in prefix subflow and jump into the next
5728          * table, then do the modify action in the new table.
5729          */
5730         if (jump_table) {
5731                 /* Prepare the prefix jump action. */
5732                 append_index++;
5733                 jump_action = (void *)(actions_pre + actions_n + append_index);
5734                 jump_action->group = jump_table;
5735                 actions_pre[index++] =
5736                         (struct rte_flow_action){
5737                         .type = (enum rte_flow_action_type)
5738                                 RTE_FLOW_ACTION_TYPE_JUMP,
5739                         .conf = jump_action,
5740                 };
5741         }
5742         actions_pre[index] = (struct rte_flow_action){
5743                 .type = (enum rte_flow_action_type)
5744                         RTE_FLOW_ACTION_TYPE_END,
5745         };
5746         /* Put the actions after sample into Suffix flow. */
5747         memcpy(actions_sfx, actions + sample_action_pos + 1,
5748                sizeof(struct rte_flow_action) *
5749                (actions_n - sample_action_pos - 1));
5750         return tag_id;
5751 }
5752
5753 /**
5754  * The splitting for metadata feature.
5755  *
5756  * - Q/RSS action on NIC Rx should be split in order to pass by
5757  *   the mreg copy table (RX_CP_TBL) and then it jumps to the
5758  *   action table (RX_ACT_TBL) which has the split Q/RSS action.
5759  *
5760  * - All the actions on NIC Tx should have a mreg copy action to
5761  *   copy reg_a from WQE to reg_c[0].
5762  *
5763  * @param dev
5764  *   Pointer to Ethernet device.
5765  * @param[in] flow
5766  *   Parent flow structure pointer.
5767  * @param[in] attr
5768  *   Flow rule attributes.
5769  * @param[in] items
5770  *   Pattern specification (list terminated by the END pattern item).
5771  * @param[in] actions
5772  *   Associated actions (list terminated by the END action).
5773  * @param[in] flow_split_info
5774  *   Pointer to flow split info structure.
5775  * @param[out] error
5776  *   Perform verbose error reporting if not NULL.
5777  * @return
5778  *   0 on success, negative value otherwise
5779  */
5780 static int
5781 flow_create_split_metadata(struct rte_eth_dev *dev,
5782                            struct rte_flow *flow,
5783                            const struct rte_flow_attr *attr,
5784                            const struct rte_flow_item items[],
5785                            const struct rte_flow_action actions[],
5786                            struct mlx5_flow_split_info *flow_split_info,
5787                            struct rte_flow_error *error)
5788 {
5789         struct mlx5_priv *priv = dev->data->dev_private;
5790         struct mlx5_sh_config *config = &priv->sh->config;
5791         const struct rte_flow_action *qrss = NULL;
5792         struct rte_flow_action *ext_actions = NULL;
5793         struct mlx5_flow *dev_flow = NULL;
5794         uint32_t qrss_id = 0;
5795         int mtr_sfx = 0;
5796         size_t act_size;
5797         int actions_n;
5798         int encap_idx;
5799         int ret;
5800
5801         /* Check whether extensive metadata feature is engaged. */
5802         if (!config->dv_flow_en ||
5803             config->dv_xmeta_en == MLX5_XMETA_MODE_LEGACY ||
5804             !mlx5_flow_ext_mreg_supported(dev))
5805                 return flow_create_split_inner(dev, flow, NULL, attr, items,
5806                                                actions, flow_split_info, error);
5807         actions_n = flow_parse_metadata_split_actions_info(actions, &qrss,
5808                                                            &encap_idx);
5809         if (qrss) {
5810                 /* Exclude hairpin flows from splitting. */
5811                 if (qrss->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
5812                         const struct rte_flow_action_queue *queue;
5813
5814                         queue = qrss->conf;
5815                         if (mlx5_rxq_get_type(dev, queue->index) ==
5816                             MLX5_RXQ_TYPE_HAIRPIN)
5817                                 qrss = NULL;
5818                 } else if (qrss->type == RTE_FLOW_ACTION_TYPE_RSS) {
5819                         const struct rte_flow_action_rss *rss;
5820
5821                         rss = qrss->conf;
5822                         if (mlx5_rxq_get_type(dev, rss->queue[0]) ==
5823                             MLX5_RXQ_TYPE_HAIRPIN)
5824                                 qrss = NULL;
5825                 }
5826         }
5827         if (qrss) {
5828                 /* Check if it is in meter suffix table. */
5829                 mtr_sfx = attr->group == (attr->transfer ?
5830                           (MLX5_FLOW_TABLE_LEVEL_METER - 1) :
5831                           MLX5_FLOW_TABLE_LEVEL_METER);
5832                 /*
5833                  * Q/RSS action on NIC Rx should be split in order to pass by
5834                  * the mreg copy table (RX_CP_TBL) and then it jumps to the
5835                  * action table (RX_ACT_TBL) which has the split Q/RSS action.
5836                  */
5837                 act_size = sizeof(struct rte_flow_action) * (actions_n + 1) +
5838                            sizeof(struct rte_flow_action_set_tag) +
5839                            sizeof(struct rte_flow_action_jump);
5840                 ext_actions = mlx5_malloc(MLX5_MEM_ZERO, act_size, 0,
5841                                           SOCKET_ID_ANY);
5842                 if (!ext_actions)
5843                         return rte_flow_error_set(error, ENOMEM,
5844                                                   RTE_FLOW_ERROR_TYPE_ACTION,
5845                                                   NULL, "no memory to split "
5846                                                   "metadata flow");
5847                 /*
5848                  * Create the new actions list with removed Q/RSS action
5849                  * and appended set tag and jump to register copy table
5850                  * (RX_CP_TBL). We should preallocate unique tag ID here
5851                  * in advance, because it is needed for set tag action.
5852                  */
5853                 qrss_id = flow_mreg_split_qrss_prep(dev, ext_actions, actions,
5854                                                     qrss, actions_n,
5855                                                     mtr_sfx, error);
5856                 if (!mtr_sfx && !qrss_id) {
5857                         ret = -rte_errno;
5858                         goto exit;
5859                 }
5860         } else if (attr->egress && !attr->transfer) {
5861                 /*
5862                  * All the actions on NIC Tx should have a metadata register
5863                  * copy action to copy reg_a from WQE to reg_c[meta]
5864                  */
5865                 act_size = sizeof(struct rte_flow_action) * (actions_n + 1) +
5866                            sizeof(struct mlx5_flow_action_copy_mreg);
5867                 ext_actions = mlx5_malloc(MLX5_MEM_ZERO, act_size, 0,
5868                                           SOCKET_ID_ANY);
5869                 if (!ext_actions)
5870                         return rte_flow_error_set(error, ENOMEM,
5871                                                   RTE_FLOW_ERROR_TYPE_ACTION,
5872                                                   NULL, "no memory to split "
5873                                                   "metadata flow");
5874                 /* Create the action list appended with copy register. */
5875                 ret = flow_mreg_tx_copy_prep(dev, ext_actions, actions,
5876                                              actions_n, error, encap_idx);
5877                 if (ret < 0)
5878                         goto exit;
5879         }
5880         /* Add the unmodified original or prefix subflow. */
5881         ret = flow_create_split_inner(dev, flow, &dev_flow, attr,
5882                                       items, ext_actions ? ext_actions :
5883                                       actions, flow_split_info, error);
5884         if (ret < 0)
5885                 goto exit;
5886         MLX5_ASSERT(dev_flow);
5887         if (qrss) {
5888                 const struct rte_flow_attr q_attr = {
5889                         .group = MLX5_FLOW_MREG_ACT_TABLE_GROUP,
5890                         .ingress = 1,
5891                 };
5892                 /* Internal PMD action to set register. */
5893                 struct mlx5_rte_flow_item_tag q_tag_spec = {
5894                         .data = qrss_id,
5895                         .id = REG_NON,
5896                 };
5897                 struct rte_flow_item q_items[] = {
5898                         {
5899                                 .type = (enum rte_flow_item_type)
5900                                         MLX5_RTE_FLOW_ITEM_TYPE_TAG,
5901                                 .spec = &q_tag_spec,
5902                                 .last = NULL,
5903                                 .mask = NULL,
5904                         },
5905                         {
5906                                 .type = RTE_FLOW_ITEM_TYPE_END,
5907                         },
5908                 };
5909                 struct rte_flow_action q_actions[] = {
5910                         {
5911                                 .type = qrss->type,
5912                                 .conf = qrss->conf,
5913                         },
5914                         {
5915                                 .type = RTE_FLOW_ACTION_TYPE_END,
5916                         },
5917                 };
5918                 uint64_t layers = flow_get_prefix_layer_flags(dev_flow);
5919
5920                 /*
5921                  * Configure the tag item only if there is no meter subflow.
5922                  * Since tag is already marked in the meter suffix subflow
5923                  * we can just use the meter suffix items as is.
5924                  */
5925                 if (qrss_id) {
5926                         /* Not meter subflow. */
5927                         MLX5_ASSERT(!mtr_sfx);
5928                         /*
5929                          * Put unique id in prefix flow due to it is destroyed
5930                          * after suffix flow and id will be freed after there
5931                          * is no actual flows with this id and identifier
5932                          * reallocation becomes possible (for example, for
5933                          * other flows in other threads).
5934                          */
5935                         dev_flow->handle->split_flow_id = qrss_id;
5936                         ret = mlx5_flow_get_reg_id(dev, MLX5_COPY_MARK, 0,
5937                                                    error);
5938                         if (ret < 0)
5939                                 goto exit;
5940                         q_tag_spec.id = ret;
5941                 }
5942                 dev_flow = NULL;
5943                 /* Add suffix subflow to execute Q/RSS. */
5944                 flow_split_info->prefix_layers = layers;
5945                 flow_split_info->prefix_mark = 0;
5946                 flow_split_info->table_id = 0;
5947                 ret = flow_create_split_inner(dev, flow, &dev_flow,
5948                                               &q_attr, mtr_sfx ? items :
5949                                               q_items, q_actions,
5950                                               flow_split_info, error);
5951                 if (ret < 0)
5952                         goto exit;
5953                 /* qrss ID should be freed if failed. */
5954                 qrss_id = 0;
5955                 MLX5_ASSERT(dev_flow);
5956         }
5957
5958 exit:
5959         /*
5960          * We do not destroy the partially created sub_flows in case of error.
5961          * These ones are included into parent flow list and will be destroyed
5962          * by flow_drv_destroy.
5963          */
5964         mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_RSS_EXPANTION_FLOW_ID],
5965                         qrss_id);
5966         mlx5_free(ext_actions);
5967         return ret;
5968 }
5969
5970 /**
5971  * Create meter internal drop flow with the original pattern.
5972  *
5973  * @param dev
5974  *   Pointer to Ethernet device.
5975  * @param[in] flow
5976  *   Parent flow structure pointer.
5977  * @param[in] attr
5978  *   Flow rule attributes.
5979  * @param[in] items
5980  *   Pattern specification (list terminated by the END pattern item).
5981  * @param[in] flow_split_info
5982  *   Pointer to flow split info structure.
5983  * @param[in] fm
5984  *   Pointer to flow meter structure.
5985  * @param[out] error
5986  *   Perform verbose error reporting if not NULL.
5987  * @return
5988  *   0 on success, negative value otherwise
5989  */
5990 static uint32_t
5991 flow_meter_create_drop_flow_with_org_pattern(struct rte_eth_dev *dev,
5992                         struct rte_flow *flow,
5993                         const struct rte_flow_attr *attr,
5994                         const struct rte_flow_item items[],
5995                         struct mlx5_flow_split_info *flow_split_info,
5996                         struct mlx5_flow_meter_info *fm,
5997                         struct rte_flow_error *error)
5998 {
5999         struct mlx5_flow *dev_flow = NULL;
6000         struct rte_flow_attr drop_attr = *attr;
6001         struct rte_flow_action drop_actions[3];
6002         struct mlx5_flow_split_info drop_split_info = *flow_split_info;
6003
6004         MLX5_ASSERT(fm->drop_cnt);
6005         drop_actions[0].type =
6006                 (enum rte_flow_action_type)MLX5_RTE_FLOW_ACTION_TYPE_COUNT;
6007         drop_actions[0].conf = (void *)(uintptr_t)fm->drop_cnt;
6008         drop_actions[1].type = RTE_FLOW_ACTION_TYPE_DROP;
6009         drop_actions[1].conf = NULL;
6010         drop_actions[2].type = RTE_FLOW_ACTION_TYPE_END;
6011         drop_actions[2].conf = NULL;
6012         drop_split_info.external = false;
6013         drop_split_info.skip_scale |= 1 << MLX5_SCALE_FLOW_GROUP_BIT;
6014         drop_split_info.table_id = MLX5_MTR_TABLE_ID_DROP;
6015         drop_attr.group = MLX5_FLOW_TABLE_LEVEL_METER;
6016         return flow_create_split_inner(dev, flow, &dev_flow,
6017                                 &drop_attr, items, drop_actions,
6018                                 &drop_split_info, error);
6019 }
6020
6021 /**
6022  * The splitting for meter feature.
6023  *
6024  * - The meter flow will be split to two flows as prefix and
6025  *   suffix flow. The packets make sense only it pass the prefix
6026  *   meter action.
6027  *
6028  * - Reg_C_5 is used for the packet to match betweend prefix and
6029  *   suffix flow.
6030  *
6031  * @param dev
6032  *   Pointer to Ethernet device.
6033  * @param[in] flow
6034  *   Parent flow structure pointer.
6035  * @param[in] attr
6036  *   Flow rule attributes.
6037  * @param[in] items
6038  *   Pattern specification (list terminated by the END pattern item).
6039  * @param[in] actions
6040  *   Associated actions (list terminated by the END action).
6041  * @param[in] flow_split_info
6042  *   Pointer to flow split info structure.
6043  * @param[out] error
6044  *   Perform verbose error reporting if not NULL.
6045  * @return
6046  *   0 on success, negative value otherwise
6047  */
6048 static int
6049 flow_create_split_meter(struct rte_eth_dev *dev,
6050                         struct rte_flow *flow,
6051                         const struct rte_flow_attr *attr,
6052                         const struct rte_flow_item items[],
6053                         const struct rte_flow_action actions[],
6054                         struct mlx5_flow_split_info *flow_split_info,
6055                         struct rte_flow_error *error)
6056 {
6057         struct mlx5_priv *priv = dev->data->dev_private;
6058         struct mlx5_flow_workspace *wks = mlx5_flow_get_thread_workspace();
6059         struct rte_flow_action *sfx_actions = NULL;
6060         struct rte_flow_action *pre_actions = NULL;
6061         struct rte_flow_item *sfx_items = NULL;
6062         struct mlx5_flow *dev_flow = NULL;
6063         struct rte_flow_attr sfx_attr = *attr;
6064         struct mlx5_flow_meter_info *fm = NULL;
6065         uint8_t skip_scale_restore;
6066         bool has_mtr = false;
6067         bool has_modify = false;
6068         bool set_mtr_reg = true;
6069         bool is_mtr_hierarchy = false;
6070         uint32_t meter_id = 0;
6071         uint32_t mtr_idx = 0;
6072         uint32_t mtr_flow_id = 0;
6073         size_t act_size;
6074         size_t item_size;
6075         int actions_n = 0;
6076         int ret = 0;
6077
6078         if (priv->mtr_en)
6079                 actions_n = flow_check_meter_action(dev, actions, &has_mtr,
6080                                                     &has_modify, &meter_id);
6081         if (has_mtr) {
6082                 if (flow->meter) {
6083                         fm = flow_dv_meter_find_by_idx(priv, flow->meter);
6084                         if (!fm)
6085                                 return rte_flow_error_set(error, EINVAL,
6086                                                 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
6087                                                 NULL, "Meter not found.");
6088                 } else {
6089                         fm = mlx5_flow_meter_find(priv, meter_id, &mtr_idx);
6090                         if (!fm)
6091                                 return rte_flow_error_set(error, EINVAL,
6092                                                 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
6093                                                 NULL, "Meter not found.");
6094                         ret = mlx5_flow_meter_attach(priv, fm,
6095                                                      &sfx_attr, error);
6096                         if (ret)
6097                                 return -rte_errno;
6098                         flow->meter = mtr_idx;
6099                 }
6100                 MLX5_ASSERT(wks);
6101                 wks->fm = fm;
6102                 if (!fm->def_policy) {
6103                         wks->policy = mlx5_flow_meter_policy_find(dev,
6104                                                                   fm->policy_id,
6105                                                                   NULL);
6106                         MLX5_ASSERT(wks->policy);
6107                         if (wks->policy->is_hierarchy) {
6108                                 wks->final_policy =
6109                                 mlx5_flow_meter_hierarchy_get_final_policy(dev,
6110                                                                 wks->policy);
6111                                 if (!wks->final_policy)
6112                                         return rte_flow_error_set(error,
6113                                         EINVAL,
6114                                         RTE_FLOW_ERROR_TYPE_ACTION, NULL,
6115                                 "Failed to find terminal policy of hierarchy.");
6116                                 is_mtr_hierarchy = true;
6117                         }
6118                 }
6119                 /*
6120                  * If it isn't default-policy Meter, and
6121                  * 1. There's no action in flow to change
6122                  *    packet (modify/encap/decap etc.), OR
6123                  * 2. No drop count needed for this meter.
6124                  * 3. It's not meter hierarchy.
6125                  * Then no need to use regC to save meter id anymore.
6126                  */
6127                 if (!fm->def_policy && !is_mtr_hierarchy &&
6128                     (!has_modify || !fm->drop_cnt))
6129                         set_mtr_reg = false;
6130                 /* Prefix actions: meter, decap, encap, tag, jump, end. */
6131                 act_size = sizeof(struct rte_flow_action) * (actions_n + 6) +
6132                            sizeof(struct mlx5_rte_flow_action_set_tag);
6133                 /* Suffix items: tag, vlan, port id, end. */
6134 #define METER_SUFFIX_ITEM 4
6135                 item_size = sizeof(struct rte_flow_item) * METER_SUFFIX_ITEM +
6136                             sizeof(struct mlx5_rte_flow_item_tag) * 2;
6137                 sfx_actions = mlx5_malloc(MLX5_MEM_ZERO, (act_size + item_size),
6138                                           0, SOCKET_ID_ANY);
6139                 if (!sfx_actions)
6140                         return rte_flow_error_set(error, ENOMEM,
6141                                                   RTE_FLOW_ERROR_TYPE_ACTION,
6142                                                   NULL, "no memory to split "
6143                                                   "meter flow");
6144                 sfx_items = (struct rte_flow_item *)((char *)sfx_actions +
6145                              act_size);
6146                 /* There's no suffix flow for meter of non-default policy. */
6147                 if (!fm->def_policy)
6148                         pre_actions = sfx_actions + 1;
6149                 else
6150                         pre_actions = sfx_actions + actions_n;
6151                 ret = flow_meter_split_prep(dev, flow, wks, &sfx_attr,
6152                                             items, sfx_items, actions,
6153                                             sfx_actions, pre_actions,
6154                                             (set_mtr_reg ? &mtr_flow_id : NULL),
6155                                             error);
6156                 if (ret) {
6157                         ret = -rte_errno;
6158                         goto exit;
6159                 }
6160                 /* Add the prefix subflow. */
6161                 skip_scale_restore = flow_split_info->skip_scale;
6162                 flow_split_info->skip_scale |=
6163                         1 << MLX5_SCALE_JUMP_FLOW_GROUP_BIT;
6164                 ret = flow_create_split_inner(dev, flow, &dev_flow,
6165                                               attr, items, pre_actions,
6166                                               flow_split_info, error);
6167                 flow_split_info->skip_scale = skip_scale_restore;
6168                 if (ret) {
6169                         if (mtr_flow_id)
6170                                 mlx5_ipool_free(fm->flow_ipool, mtr_flow_id);
6171                         ret = -rte_errno;
6172                         goto exit;
6173                 }
6174                 if (mtr_flow_id) {
6175                         dev_flow->handle->split_flow_id = mtr_flow_id;
6176                         dev_flow->handle->is_meter_flow_id = 1;
6177                 }
6178                 if (!fm->def_policy) {
6179                         if (!set_mtr_reg && fm->drop_cnt)
6180                                 ret =
6181                         flow_meter_create_drop_flow_with_org_pattern(dev, flow,
6182                                                         &sfx_attr, items,
6183                                                         flow_split_info,
6184                                                         fm, error);
6185                         goto exit;
6186                 }
6187                 /* Setting the sfx group atrr. */
6188                 sfx_attr.group = sfx_attr.transfer ?
6189                                 (MLX5_FLOW_TABLE_LEVEL_METER - 1) :
6190                                  MLX5_FLOW_TABLE_LEVEL_METER;
6191                 flow_split_info->prefix_layers =
6192                                 flow_get_prefix_layer_flags(dev_flow);
6193                 flow_split_info->prefix_mark |= wks->mark;
6194                 flow_split_info->table_id = MLX5_MTR_TABLE_ID_SUFFIX;
6195         }
6196         /* Add the prefix subflow. */
6197         ret = flow_create_split_metadata(dev, flow,
6198                                          &sfx_attr, sfx_items ?
6199                                          sfx_items : items,
6200                                          sfx_actions ? sfx_actions : actions,
6201                                          flow_split_info, error);
6202 exit:
6203         if (sfx_actions)
6204                 mlx5_free(sfx_actions);
6205         return ret;
6206 }
6207
6208 /**
6209  * The splitting for sample feature.
6210  *
6211  * Once Sample action is detected in the action list, the flow actions should
6212  * be split into prefix sub flow and suffix sub flow.
6213  *
6214  * The original items remain in the prefix sub flow, all actions preceding the
6215  * sample action and the sample action itself will be copied to the prefix
6216  * sub flow, the actions following the sample action will be copied to the
6217  * suffix sub flow, Queue action always be located in the suffix sub flow.
6218  *
6219  * In order to make the packet from prefix sub flow matches with suffix sub
6220  * flow, an extra tag action be added into prefix sub flow, and the suffix sub
6221  * flow uses tag item with the unique flow id.
6222  *
6223  * @param dev
6224  *   Pointer to Ethernet device.
6225  * @param[in] flow
6226  *   Parent flow structure pointer.
6227  * @param[in] attr
6228  *   Flow rule attributes.
6229  * @param[in] items
6230  *   Pattern specification (list terminated by the END pattern item).
6231  * @param[in] actions
6232  *   Associated actions (list terminated by the END action).
6233  * @param[in] flow_split_info
6234  *   Pointer to flow split info structure.
6235  * @param[out] error
6236  *   Perform verbose error reporting if not NULL.
6237  * @return
6238  *   0 on success, negative value otherwise
6239  */
6240 static int
6241 flow_create_split_sample(struct rte_eth_dev *dev,
6242                          struct rte_flow *flow,
6243                          const struct rte_flow_attr *attr,
6244                          const struct rte_flow_item items[],
6245                          const struct rte_flow_action actions[],
6246                          struct mlx5_flow_split_info *flow_split_info,
6247                          struct rte_flow_error *error)
6248 {
6249         struct mlx5_priv *priv = dev->data->dev_private;
6250         struct rte_flow_action *sfx_actions = NULL;
6251         struct rte_flow_action *pre_actions = NULL;
6252         struct rte_flow_item *sfx_items = NULL;
6253         struct mlx5_flow *dev_flow = NULL;
6254         struct rte_flow_attr sfx_attr = *attr;
6255 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
6256         struct mlx5_flow_dv_sample_resource *sample_res;
6257         struct mlx5_flow_tbl_data_entry *sfx_tbl_data;
6258         struct mlx5_flow_tbl_resource *sfx_tbl;
6259         struct mlx5_flow_workspace *wks = mlx5_flow_get_thread_workspace();
6260 #endif
6261         size_t act_size;
6262         size_t item_size;
6263         uint32_t fdb_tx = 0;
6264         int32_t tag_id = 0;
6265         int actions_n = 0;
6266         int sample_action_pos;
6267         int qrss_action_pos;
6268         int add_tag = 0;
6269         int modify_after_mirror = 0;
6270         uint16_t jump_table = 0;
6271         const uint32_t next_ft_step = 1;
6272         int ret = 0;
6273
6274         if (priv->sampler_en)
6275                 actions_n = flow_check_match_action(actions, attr,
6276                                         RTE_FLOW_ACTION_TYPE_SAMPLE,
6277                                         &sample_action_pos, &qrss_action_pos,
6278                                         &modify_after_mirror);
6279         if (actions_n) {
6280                 /* The prefix actions must includes sample, tag, end. */
6281                 act_size = sizeof(struct rte_flow_action) * (actions_n * 2 + 1)
6282                            + sizeof(struct mlx5_rte_flow_action_set_tag);
6283                 item_size = sizeof(struct rte_flow_item) * SAMPLE_SUFFIX_ITEM +
6284                             sizeof(struct mlx5_rte_flow_item_tag) * 2;
6285                 sfx_actions = mlx5_malloc(MLX5_MEM_ZERO, (act_size +
6286                                           item_size), 0, SOCKET_ID_ANY);
6287                 if (!sfx_actions)
6288                         return rte_flow_error_set(error, ENOMEM,
6289                                                   RTE_FLOW_ERROR_TYPE_ACTION,
6290                                                   NULL, "no memory to split "
6291                                                   "sample flow");
6292                 /* The representor_id is UINT16_MAX for uplink. */
6293                 fdb_tx = (attr->transfer && priv->representor_id != UINT16_MAX);
6294                 /*
6295                  * When reg_c_preserve is set, metadata registers Cx preserve
6296                  * their value even through packet duplication.
6297                  */
6298                 add_tag = (!fdb_tx ||
6299                            priv->sh->cdev->config.hca_attr.reg_c_preserve);
6300                 if (add_tag)
6301                         sfx_items = (struct rte_flow_item *)((char *)sfx_actions
6302                                         + act_size);
6303                 if (modify_after_mirror)
6304                         jump_table = attr->group * MLX5_FLOW_TABLE_FACTOR +
6305                                      next_ft_step;
6306                 pre_actions = sfx_actions + actions_n;
6307                 tag_id = flow_sample_split_prep(dev, add_tag, sfx_items,
6308                                                 actions, sfx_actions,
6309                                                 pre_actions, actions_n,
6310                                                 sample_action_pos,
6311                                                 qrss_action_pos, jump_table,
6312                                                 error);
6313                 if (tag_id < 0 || (add_tag && !tag_id)) {
6314                         ret = -rte_errno;
6315                         goto exit;
6316                 }
6317                 if (modify_after_mirror)
6318                         flow_split_info->skip_scale =
6319                                         1 << MLX5_SCALE_JUMP_FLOW_GROUP_BIT;
6320                 /* Add the prefix subflow. */
6321                 ret = flow_create_split_inner(dev, flow, &dev_flow, attr,
6322                                               items, pre_actions,
6323                                               flow_split_info, error);
6324                 if (ret) {
6325                         ret = -rte_errno;
6326                         goto exit;
6327                 }
6328                 dev_flow->handle->split_flow_id = tag_id;
6329 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
6330                 if (!modify_after_mirror) {
6331                         /* Set the sfx group attr. */
6332                         sample_res = (struct mlx5_flow_dv_sample_resource *)
6333                                                 dev_flow->dv.sample_res;
6334                         sfx_tbl = (struct mlx5_flow_tbl_resource *)
6335                                                 sample_res->normal_path_tbl;
6336                         sfx_tbl_data = container_of(sfx_tbl,
6337                                                 struct mlx5_flow_tbl_data_entry,
6338                                                 tbl);
6339                         sfx_attr.group = sfx_attr.transfer ?
6340                         (sfx_tbl_data->level - 1) : sfx_tbl_data->level;
6341                 } else {
6342                         MLX5_ASSERT(attr->transfer);
6343                         sfx_attr.group = jump_table;
6344                 }
6345                 flow_split_info->prefix_layers =
6346                                 flow_get_prefix_layer_flags(dev_flow);
6347                 MLX5_ASSERT(wks);
6348                 flow_split_info->prefix_mark |= wks->mark;
6349                 /* Suffix group level already be scaled with factor, set
6350                  * MLX5_SCALE_FLOW_GROUP_BIT of skip_scale to 1 to avoid scale
6351                  * again in translation.
6352                  */
6353                 flow_split_info->skip_scale = 1 << MLX5_SCALE_FLOW_GROUP_BIT;
6354 #endif
6355         }
6356         /* Add the suffix subflow. */
6357         ret = flow_create_split_meter(dev, flow, &sfx_attr,
6358                                       sfx_items ? sfx_items : items,
6359                                       sfx_actions ? sfx_actions : actions,
6360                                       flow_split_info, error);
6361 exit:
6362         if (sfx_actions)
6363                 mlx5_free(sfx_actions);
6364         return ret;
6365 }
6366
6367 /**
6368  * Split the flow to subflow set. The splitters might be linked
6369  * in the chain, like this:
6370  * flow_create_split_outer() calls:
6371  *   flow_create_split_meter() calls:
6372  *     flow_create_split_metadata(meter_subflow_0) calls:
6373  *       flow_create_split_inner(metadata_subflow_0)
6374  *       flow_create_split_inner(metadata_subflow_1)
6375  *       flow_create_split_inner(metadata_subflow_2)
6376  *     flow_create_split_metadata(meter_subflow_1) calls:
6377  *       flow_create_split_inner(metadata_subflow_0)
6378  *       flow_create_split_inner(metadata_subflow_1)
6379  *       flow_create_split_inner(metadata_subflow_2)
6380  *
6381  * This provide flexible way to add new levels of flow splitting.
6382  * The all of successfully created subflows are included to the
6383  * parent flow dev_flow list.
6384  *
6385  * @param dev
6386  *   Pointer to Ethernet device.
6387  * @param[in] flow
6388  *   Parent flow structure pointer.
6389  * @param[in] attr
6390  *   Flow rule attributes.
6391  * @param[in] items
6392  *   Pattern specification (list terminated by the END pattern item).
6393  * @param[in] actions
6394  *   Associated actions (list terminated by the END action).
6395  * @param[in] flow_split_info
6396  *   Pointer to flow split info structure.
6397  * @param[out] error
6398  *   Perform verbose error reporting if not NULL.
6399  * @return
6400  *   0 on success, negative value otherwise
6401  */
6402 static int
6403 flow_create_split_outer(struct rte_eth_dev *dev,
6404                         struct rte_flow *flow,
6405                         const struct rte_flow_attr *attr,
6406                         const struct rte_flow_item items[],
6407                         const struct rte_flow_action actions[],
6408                         struct mlx5_flow_split_info *flow_split_info,
6409                         struct rte_flow_error *error)
6410 {
6411         int ret;
6412
6413         ret = flow_create_split_sample(dev, flow, attr, items,
6414                                        actions, flow_split_info, error);
6415         MLX5_ASSERT(ret <= 0);
6416         return ret;
6417 }
6418
6419 static inline struct mlx5_flow_tunnel *
6420 flow_tunnel_from_rule(const struct mlx5_flow *flow)
6421 {
6422         struct mlx5_flow_tunnel *tunnel;
6423
6424 #pragma GCC diagnostic push
6425 #pragma GCC diagnostic ignored "-Wcast-qual"
6426         tunnel = (typeof(tunnel))flow->tunnel;
6427 #pragma GCC diagnostic pop
6428
6429         return tunnel;
6430 }
6431
6432 /**
6433  * Adjust flow RSS workspace if needed.
6434  *
6435  * @param wks
6436  *   Pointer to thread flow work space.
6437  * @param rss_desc
6438  *   Pointer to RSS descriptor.
6439  * @param[in] nrssq_num
6440  *   New RSS queue number.
6441  *
6442  * @return
6443  *   0 on success, -1 otherwise and rte_errno is set.
6444  */
6445 static int
6446 flow_rss_workspace_adjust(struct mlx5_flow_workspace *wks,
6447                           struct mlx5_flow_rss_desc *rss_desc,
6448                           uint32_t nrssq_num)
6449 {
6450         if (likely(nrssq_num <= wks->rssq_num))
6451                 return 0;
6452         rss_desc->queue = realloc(rss_desc->queue,
6453                           sizeof(*rss_desc->queue) * RTE_ALIGN(nrssq_num, 2));
6454         if (!rss_desc->queue) {
6455                 rte_errno = ENOMEM;
6456                 return -1;
6457         }
6458         wks->rssq_num = RTE_ALIGN(nrssq_num, 2);
6459         return 0;
6460 }
6461
6462 /**
6463  * Create a flow and add it to @p list.
6464  *
6465  * @param dev
6466  *   Pointer to Ethernet device.
6467  * @param list
6468  *   Pointer to a TAILQ flow list. If this parameter NULL,
6469  *   no list insertion occurred, flow is just created,
6470  *   this is caller's responsibility to track the
6471  *   created flow.
6472  * @param[in] attr
6473  *   Flow rule attributes.
6474  * @param[in] items
6475  *   Pattern specification (list terminated by the END pattern item).
6476  * @param[in] actions
6477  *   Associated actions (list terminated by the END action).
6478  * @param[in] external
6479  *   This flow rule is created by request external to PMD.
6480  * @param[out] error
6481  *   Perform verbose error reporting if not NULL.
6482  *
6483  * @return
6484  *   A flow index on success, 0 otherwise and rte_errno is set.
6485  */
6486 static uint32_t
6487 flow_list_create(struct rte_eth_dev *dev, enum mlx5_flow_type type,
6488                  const struct rte_flow_attr *attr,
6489                  const struct rte_flow_item items[],
6490                  const struct rte_flow_action original_actions[],
6491                  bool external, struct rte_flow_error *error)
6492 {
6493         struct mlx5_priv *priv = dev->data->dev_private;
6494         struct rte_flow *flow = NULL;
6495         struct mlx5_flow *dev_flow;
6496         const struct rte_flow_action_rss *rss = NULL;
6497         struct mlx5_translated_action_handle
6498                 indir_actions[MLX5_MAX_INDIRECT_ACTIONS];
6499         int indir_actions_n = MLX5_MAX_INDIRECT_ACTIONS;
6500         union {
6501                 struct mlx5_flow_expand_rss buf;
6502                 uint8_t buffer[4096];
6503         } expand_buffer;
6504         union {
6505                 struct rte_flow_action actions[MLX5_MAX_SPLIT_ACTIONS];
6506                 uint8_t buffer[2048];
6507         } actions_rx;
6508         union {
6509                 struct rte_flow_action actions[MLX5_MAX_SPLIT_ACTIONS];
6510                 uint8_t buffer[2048];
6511         } actions_hairpin_tx;
6512         union {
6513                 struct rte_flow_item items[MLX5_MAX_SPLIT_ITEMS];
6514                 uint8_t buffer[2048];
6515         } items_tx;
6516         struct mlx5_flow_expand_rss *buf = &expand_buffer.buf;
6517         struct mlx5_flow_rss_desc *rss_desc;
6518         const struct rte_flow_action *p_actions_rx;
6519         uint32_t i;
6520         uint32_t idx = 0;
6521         int hairpin_flow;
6522         struct rte_flow_attr attr_tx = { .priority = 0 };
6523         const struct rte_flow_action *actions;
6524         struct rte_flow_action *translated_actions = NULL;
6525         struct mlx5_flow_tunnel *tunnel;
6526         struct tunnel_default_miss_ctx default_miss_ctx = { 0, };
6527         struct mlx5_flow_workspace *wks = mlx5_flow_push_thread_workspace();
6528         struct mlx5_flow_split_info flow_split_info = {
6529                 .external = !!external,
6530                 .skip_scale = 0,
6531                 .flow_idx = 0,
6532                 .prefix_mark = 0,
6533                 .prefix_layers = 0,
6534                 .table_id = 0
6535         };
6536         int ret;
6537
6538         MLX5_ASSERT(wks);
6539         rss_desc = &wks->rss_desc;
6540         ret = flow_action_handles_translate(dev, original_actions,
6541                                             indir_actions,
6542                                             &indir_actions_n,
6543                                             &translated_actions, error);
6544         if (ret < 0) {
6545                 MLX5_ASSERT(translated_actions == NULL);
6546                 return 0;
6547         }
6548         actions = translated_actions ? translated_actions : original_actions;
6549         p_actions_rx = actions;
6550         hairpin_flow = flow_check_hairpin_split(dev, attr, actions);
6551         ret = flow_drv_validate(dev, attr, items, p_actions_rx,
6552                                 external, hairpin_flow, error);
6553         if (ret < 0)
6554                 goto error_before_hairpin_split;
6555         flow = mlx5_ipool_zmalloc(priv->flows[type], &idx);
6556         if (!flow) {
6557                 rte_errno = ENOMEM;
6558                 goto error_before_hairpin_split;
6559         }
6560         if (hairpin_flow > 0) {
6561                 if (hairpin_flow > MLX5_MAX_SPLIT_ACTIONS) {
6562                         rte_errno = EINVAL;
6563                         goto error_before_hairpin_split;
6564                 }
6565                 flow_hairpin_split(dev, actions, actions_rx.actions,
6566                                    actions_hairpin_tx.actions, items_tx.items,
6567                                    idx);
6568                 p_actions_rx = actions_rx.actions;
6569         }
6570         flow_split_info.flow_idx = idx;
6571         flow->drv_type = flow_get_drv_type(dev, attr);
6572         MLX5_ASSERT(flow->drv_type > MLX5_FLOW_TYPE_MIN &&
6573                     flow->drv_type < MLX5_FLOW_TYPE_MAX);
6574         memset(rss_desc, 0, offsetof(struct mlx5_flow_rss_desc, queue));
6575         /* RSS Action only works on NIC RX domain */
6576         if (attr->ingress && !attr->transfer)
6577                 rss = flow_get_rss_action(dev, p_actions_rx);
6578         if (rss) {
6579                 if (flow_rss_workspace_adjust(wks, rss_desc, rss->queue_num))
6580                         return 0;
6581                 /*
6582                  * The following information is required by
6583                  * mlx5_flow_hashfields_adjust() in advance.
6584                  */
6585                 rss_desc->level = rss->level;
6586                 /* RSS type 0 indicates default RSS type (RTE_ETH_RSS_IP). */
6587                 rss_desc->types = !rss->types ? RTE_ETH_RSS_IP : rss->types;
6588         }
6589         flow->dev_handles = 0;
6590         if (rss && rss->types) {
6591                 unsigned int graph_root;
6592
6593                 graph_root = find_graph_root(rss->level);
6594                 ret = mlx5_flow_expand_rss(buf, sizeof(expand_buffer.buffer),
6595                                            items, rss->types,
6596                                            mlx5_support_expansion, graph_root);
6597                 MLX5_ASSERT(ret > 0 &&
6598                        (unsigned int)ret < sizeof(expand_buffer.buffer));
6599                 if (rte_log_can_log(mlx5_logtype, RTE_LOG_DEBUG)) {
6600                         for (i = 0; i < buf->entries; ++i)
6601                                 mlx5_dbg__print_pattern(buf->entry[i].pattern);
6602                 }
6603         } else {
6604                 buf->entries = 1;
6605                 buf->entry[0].pattern = (void *)(uintptr_t)items;
6606         }
6607         rss_desc->shared_rss = flow_get_shared_rss_action(dev, indir_actions,
6608                                                       indir_actions_n);
6609         for (i = 0; i < buf->entries; ++i) {
6610                 /* Initialize flow split data. */
6611                 flow_split_info.prefix_layers = 0;
6612                 flow_split_info.prefix_mark = 0;
6613                 flow_split_info.skip_scale = 0;
6614                 /*
6615                  * The splitter may create multiple dev_flows,
6616                  * depending on configuration. In the simplest
6617                  * case it just creates unmodified original flow.
6618                  */
6619                 ret = flow_create_split_outer(dev, flow, attr,
6620                                               buf->entry[i].pattern,
6621                                               p_actions_rx, &flow_split_info,
6622                                               error);
6623                 if (ret < 0)
6624                         goto error;
6625                 if (is_flow_tunnel_steer_rule(wks->flows[0].tof_type)) {
6626                         ret = flow_tunnel_add_default_miss(dev, flow, attr,
6627                                                            p_actions_rx,
6628                                                            idx,
6629                                                            wks->flows[0].tunnel,
6630                                                            &default_miss_ctx,
6631                                                            error);
6632                         if (ret < 0) {
6633                                 mlx5_free(default_miss_ctx.queue);
6634                                 goto error;
6635                         }
6636                 }
6637         }
6638         /* Create the tx flow. */
6639         if (hairpin_flow) {
6640                 attr_tx.group = MLX5_HAIRPIN_TX_TABLE;
6641                 attr_tx.ingress = 0;
6642                 attr_tx.egress = 1;
6643                 dev_flow = flow_drv_prepare(dev, flow, &attr_tx, items_tx.items,
6644                                          actions_hairpin_tx.actions,
6645                                          idx, error);
6646                 if (!dev_flow)
6647                         goto error;
6648                 dev_flow->flow = flow;
6649                 dev_flow->external = 0;
6650                 SILIST_INSERT(&flow->dev_handles, dev_flow->handle_idx,
6651                               dev_flow->handle, next);
6652                 ret = flow_drv_translate(dev, dev_flow, &attr_tx,
6653                                          items_tx.items,
6654                                          actions_hairpin_tx.actions, error);
6655                 if (ret < 0)
6656                         goto error;
6657         }
6658         /*
6659          * Update the metadata register copy table. If extensive
6660          * metadata feature is enabled and registers are supported
6661          * we might create the extra rte_flow for each unique
6662          * MARK/FLAG action ID.
6663          *
6664          * The table is updated for ingress Flows only, because
6665          * the egress Flows belong to the different device and
6666          * copy table should be updated in peer NIC Rx domain.
6667          */
6668         if (attr->ingress &&
6669             (external || attr->group != MLX5_FLOW_MREG_CP_TABLE_GROUP)) {
6670                 ret = flow_mreg_update_copy_table(dev, flow, actions, error);
6671                 if (ret)
6672                         goto error;
6673         }
6674         /*
6675          * If the flow is external (from application) OR device is started,
6676          * OR mreg discover, then apply immediately.
6677          */
6678         if (external || dev->data->dev_started ||
6679             (attr->group == MLX5_FLOW_MREG_CP_TABLE_GROUP &&
6680              attr->priority == MLX5_FLOW_LOWEST_PRIO_INDICATOR)) {
6681                 ret = flow_drv_apply(dev, flow, error);
6682                 if (ret < 0)
6683                         goto error;
6684         }
6685         flow->type = type;
6686         flow_rxq_flags_set(dev, flow);
6687         rte_free(translated_actions);
6688         tunnel = flow_tunnel_from_rule(wks->flows);
6689         if (tunnel) {
6690                 flow->tunnel = 1;
6691                 flow->tunnel_id = tunnel->tunnel_id;
6692                 __atomic_add_fetch(&tunnel->refctn, 1, __ATOMIC_RELAXED);
6693                 mlx5_free(default_miss_ctx.queue);
6694         }
6695         mlx5_flow_pop_thread_workspace();
6696         return idx;
6697 error:
6698         MLX5_ASSERT(flow);
6699         ret = rte_errno; /* Save rte_errno before cleanup. */
6700         flow_mreg_del_copy_action(dev, flow);
6701         flow_drv_destroy(dev, flow);
6702         if (rss_desc->shared_rss)
6703                 __atomic_sub_fetch(&((struct mlx5_shared_action_rss *)
6704                         mlx5_ipool_get
6705                         (priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS],
6706                         rss_desc->shared_rss))->refcnt, 1, __ATOMIC_RELAXED);
6707         mlx5_ipool_free(priv->flows[type], idx);
6708         rte_errno = ret; /* Restore rte_errno. */
6709         ret = rte_errno;
6710         rte_errno = ret;
6711         mlx5_flow_pop_thread_workspace();
6712 error_before_hairpin_split:
6713         rte_free(translated_actions);
6714         return 0;
6715 }
6716
6717 /**
6718  * Create a dedicated flow rule on e-switch table 0 (root table), to direct all
6719  * incoming packets to table 1.
6720  *
6721  * Other flow rules, requested for group n, will be created in
6722  * e-switch table n+1.
6723  * Jump action to e-switch group n will be created to group n+1.
6724  *
6725  * Used when working in switchdev mode, to utilise advantages of table 1
6726  * and above.
6727  *
6728  * @param dev
6729  *   Pointer to Ethernet device.
6730  *
6731  * @return
6732  *   Pointer to flow on success, NULL otherwise and rte_errno is set.
6733  */
6734 struct rte_flow *
6735 mlx5_flow_create_esw_table_zero_flow(struct rte_eth_dev *dev)
6736 {
6737         const struct rte_flow_attr attr = {
6738                 .group = 0,
6739                 .priority = 0,
6740                 .ingress = 1,
6741                 .egress = 0,
6742                 .transfer = 1,
6743         };
6744         const struct rte_flow_item pattern = {
6745                 .type = RTE_FLOW_ITEM_TYPE_END,
6746         };
6747         struct rte_flow_action_jump jump = {
6748                 .group = 1,
6749         };
6750         const struct rte_flow_action actions[] = {
6751                 {
6752                         .type = RTE_FLOW_ACTION_TYPE_JUMP,
6753                         .conf = &jump,
6754                 },
6755                 {
6756                         .type = RTE_FLOW_ACTION_TYPE_END,
6757                 },
6758         };
6759         struct rte_flow_error error;
6760
6761         return (void *)(uintptr_t)flow_list_create(dev, MLX5_FLOW_TYPE_CTL,
6762                                                    &attr, &pattern,
6763                                                    actions, false, &error);
6764 }
6765
6766 /**
6767  * Create a dedicated flow rule on e-switch table 1, matches ESW manager
6768  * and sq number, directs all packets to peer vport.
6769  *
6770  * @param dev
6771  *   Pointer to Ethernet device.
6772  * @param txq
6773  *   Txq index.
6774  *
6775  * @return
6776  *   Flow ID on success, 0 otherwise and rte_errno is set.
6777  */
6778 uint32_t
6779 mlx5_flow_create_devx_sq_miss_flow(struct rte_eth_dev *dev, uint32_t txq)
6780 {
6781         struct rte_flow_attr attr = {
6782                 .group = 0,
6783                 .priority = MLX5_FLOW_LOWEST_PRIO_INDICATOR,
6784                 .ingress = 1,
6785                 .egress = 0,
6786                 .transfer = 1,
6787         };
6788         struct rte_flow_item_port_id port_spec = {
6789                 .id = MLX5_PORT_ESW_MGR,
6790         };
6791         struct mlx5_rte_flow_item_tx_queue txq_spec = {
6792                 .queue = txq,
6793         };
6794         struct rte_flow_item pattern[] = {
6795                 {
6796                         .type = RTE_FLOW_ITEM_TYPE_PORT_ID,
6797                         .spec = &port_spec,
6798                 },
6799                 {
6800                         .type = (enum rte_flow_item_type)
6801                                 MLX5_RTE_FLOW_ITEM_TYPE_TX_QUEUE,
6802                         .spec = &txq_spec,
6803                 },
6804                 {
6805                         .type = RTE_FLOW_ITEM_TYPE_END,
6806                 },
6807         };
6808         struct rte_flow_action_jump jump = {
6809                 .group = 1,
6810         };
6811         struct rte_flow_action_port_id port = {
6812                 .id = dev->data->port_id,
6813         };
6814         struct rte_flow_action actions[] = {
6815                 {
6816                         .type = RTE_FLOW_ACTION_TYPE_JUMP,
6817                         .conf = &jump,
6818                 },
6819                 {
6820                         .type = RTE_FLOW_ACTION_TYPE_END,
6821                 },
6822         };
6823         struct rte_flow_error error;
6824
6825         /*
6826          * Creates group 0, highest priority jump flow.
6827          * Matches txq to bypass kernel packets.
6828          */
6829         if (flow_list_create(dev, MLX5_FLOW_TYPE_CTL, &attr, pattern, actions,
6830                              false, &error) == 0)
6831                 return 0;
6832         /* Create group 1, lowest priority redirect flow for txq. */
6833         attr.group = 1;
6834         actions[0].conf = &port;
6835         actions[0].type = RTE_FLOW_ACTION_TYPE_PORT_ID;
6836         return flow_list_create(dev, MLX5_FLOW_TYPE_CTL, &attr, pattern,
6837                                 actions, false, &error);
6838 }
6839
6840 /**
6841  * Validate a flow supported by the NIC.
6842  *
6843  * @see rte_flow_validate()
6844  * @see rte_flow_ops
6845  */
6846 int
6847 mlx5_flow_validate(struct rte_eth_dev *dev,
6848                    const struct rte_flow_attr *attr,
6849                    const struct rte_flow_item items[],
6850                    const struct rte_flow_action original_actions[],
6851                    struct rte_flow_error *error)
6852 {
6853         int hairpin_flow;
6854         struct mlx5_translated_action_handle
6855                 indir_actions[MLX5_MAX_INDIRECT_ACTIONS];
6856         int indir_actions_n = MLX5_MAX_INDIRECT_ACTIONS;
6857         const struct rte_flow_action *actions;
6858         struct rte_flow_action *translated_actions = NULL;
6859         int ret = flow_action_handles_translate(dev, original_actions,
6860                                                 indir_actions,
6861                                                 &indir_actions_n,
6862                                                 &translated_actions, error);
6863
6864         if (ret)
6865                 return ret;
6866         actions = translated_actions ? translated_actions : original_actions;
6867         hairpin_flow = flow_check_hairpin_split(dev, attr, actions);
6868         ret = flow_drv_validate(dev, attr, items, actions,
6869                                 true, hairpin_flow, error);
6870         rte_free(translated_actions);
6871         return ret;
6872 }
6873
6874 /**
6875  * Create a flow.
6876  *
6877  * @see rte_flow_create()
6878  * @see rte_flow_ops
6879  */
6880 struct rte_flow *
6881 mlx5_flow_create(struct rte_eth_dev *dev,
6882                  const struct rte_flow_attr *attr,
6883                  const struct rte_flow_item items[],
6884                  const struct rte_flow_action actions[],
6885                  struct rte_flow_error *error)
6886 {
6887         struct mlx5_priv *priv = dev->data->dev_private;
6888
6889         if (priv->sh->config.dv_flow_en == 2) {
6890                 rte_flow_error_set(error, ENOTSUP,
6891                           RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
6892                           NULL,
6893                           "Flow non-Q creation not supported");
6894                 return NULL;
6895         }
6896         /*
6897          * If the device is not started yet, it is not allowed to created a
6898          * flow from application. PMD default flows and traffic control flows
6899          * are not affected.
6900          */
6901         if (unlikely(!dev->data->dev_started)) {
6902                 DRV_LOG(DEBUG, "port %u is not started when "
6903                         "inserting a flow", dev->data->port_id);
6904                 rte_flow_error_set(error, ENODEV,
6905                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
6906                                    NULL,
6907                                    "port not started");
6908                 return NULL;
6909         }
6910
6911         return (void *)(uintptr_t)flow_list_create(dev, MLX5_FLOW_TYPE_GEN,
6912                                                    attr, items, actions,
6913                                                    true, error);
6914 }
6915
6916 /**
6917  * Destroy a flow in a list.
6918  *
6919  * @param dev
6920  *   Pointer to Ethernet device.
6921  * @param[in] flow_idx
6922  *   Index of flow to destroy.
6923  */
6924 static void
6925 flow_list_destroy(struct rte_eth_dev *dev, enum mlx5_flow_type type,
6926                   uint32_t flow_idx)
6927 {
6928         struct mlx5_priv *priv = dev->data->dev_private;
6929         struct rte_flow *flow = mlx5_ipool_get(priv->flows[type], flow_idx);
6930
6931         if (!flow)
6932                 return;
6933         MLX5_ASSERT(flow->type == type);
6934         /*
6935          * Update RX queue flags only if port is started, otherwise it is
6936          * already clean.
6937          */
6938         if (dev->data->dev_started)
6939                 flow_rxq_flags_trim(dev, flow);
6940         flow_drv_destroy(dev, flow);
6941         if (flow->tunnel) {
6942                 struct mlx5_flow_tunnel *tunnel;
6943
6944                 tunnel = mlx5_find_tunnel_id(dev, flow->tunnel_id);
6945                 RTE_VERIFY(tunnel);
6946                 if (!__atomic_sub_fetch(&tunnel->refctn, 1, __ATOMIC_RELAXED))
6947                         mlx5_flow_tunnel_free(dev, tunnel);
6948         }
6949         flow_mreg_del_copy_action(dev, flow);
6950         mlx5_ipool_free(priv->flows[type], flow_idx);
6951 }
6952
6953 /**
6954  * Destroy all flows.
6955  *
6956  * @param dev
6957  *   Pointer to Ethernet device.
6958  * @param type
6959  *   Flow type to be flushed.
6960  * @param active
6961  *   If flushing is called actively.
6962  */
6963 void
6964 mlx5_flow_list_flush(struct rte_eth_dev *dev, enum mlx5_flow_type type,
6965                      bool active)
6966 {
6967         struct mlx5_priv *priv = dev->data->dev_private;
6968         uint32_t num_flushed = 0, fidx = 1;
6969         struct rte_flow *flow;
6970
6971         MLX5_IPOOL_FOREACH(priv->flows[type], fidx, flow) {
6972                 flow_list_destroy(dev, type, fidx);
6973                 num_flushed++;
6974         }
6975         if (active) {
6976                 DRV_LOG(INFO, "port %u: %u flows flushed before stopping",
6977                         dev->data->port_id, num_flushed);
6978         }
6979 }
6980
6981 /**
6982  * Stop all default actions for flows.
6983  *
6984  * @param dev
6985  *   Pointer to Ethernet device.
6986  */
6987 void
6988 mlx5_flow_stop_default(struct rte_eth_dev *dev)
6989 {
6990         flow_mreg_del_default_copy_action(dev);
6991         flow_rxq_flags_clear(dev);
6992 }
6993
6994 /**
6995  * Start all default actions for flows.
6996  *
6997  * @param dev
6998  *   Pointer to Ethernet device.
6999  * @return
7000  *   0 on success, a negative errno value otherwise and rte_errno is set.
7001  */
7002 int
7003 mlx5_flow_start_default(struct rte_eth_dev *dev)
7004 {
7005         struct rte_flow_error error;
7006
7007         /* Make sure default copy action (reg_c[0] -> reg_b) is created. */
7008         return flow_mreg_add_default_copy_action(dev, &error);
7009 }
7010
7011 /**
7012  * Release key of thread specific flow workspace data.
7013  */
7014 void
7015 flow_release_workspace(void *data)
7016 {
7017         struct mlx5_flow_workspace *wks = data;
7018         struct mlx5_flow_workspace *next;
7019
7020         while (wks) {
7021                 next = wks->next;
7022                 free(wks->rss_desc.queue);
7023                 free(wks);
7024                 wks = next;
7025         }
7026 }
7027
7028 /**
7029  * Get thread specific current flow workspace.
7030  *
7031  * @return pointer to thread specific flow workspace data, NULL on error.
7032  */
7033 struct mlx5_flow_workspace*
7034 mlx5_flow_get_thread_workspace(void)
7035 {
7036         struct mlx5_flow_workspace *data;
7037
7038         data = mlx5_flow_os_get_specific_workspace();
7039         MLX5_ASSERT(data && data->inuse);
7040         if (!data || !data->inuse)
7041                 DRV_LOG(ERR, "flow workspace not initialized.");
7042         return data;
7043 }
7044
7045 /**
7046  * Allocate and init new flow workspace.
7047  *
7048  * @return pointer to flow workspace data, NULL on error.
7049  */
7050 static struct mlx5_flow_workspace*
7051 flow_alloc_thread_workspace(void)
7052 {
7053         struct mlx5_flow_workspace *data = calloc(1, sizeof(*data));
7054
7055         if (!data) {
7056                 DRV_LOG(ERR, "Failed to allocate flow workspace "
7057                         "memory.");
7058                 return NULL;
7059         }
7060         data->rss_desc.queue = calloc(1,
7061                         sizeof(uint16_t) * MLX5_RSSQ_DEFAULT_NUM);
7062         if (!data->rss_desc.queue)
7063                 goto err;
7064         data->rssq_num = MLX5_RSSQ_DEFAULT_NUM;
7065         return data;
7066 err:
7067         free(data->rss_desc.queue);
7068         free(data);
7069         return NULL;
7070 }
7071
7072 /**
7073  * Get new thread specific flow workspace.
7074  *
7075  * If current workspace inuse, create new one and set as current.
7076  *
7077  * @return pointer to thread specific flow workspace data, NULL on error.
7078  */
7079 static struct mlx5_flow_workspace*
7080 mlx5_flow_push_thread_workspace(void)
7081 {
7082         struct mlx5_flow_workspace *curr;
7083         struct mlx5_flow_workspace *data;
7084
7085         curr = mlx5_flow_os_get_specific_workspace();
7086         if (!curr) {
7087                 data = flow_alloc_thread_workspace();
7088                 if (!data)
7089                         return NULL;
7090         } else if (!curr->inuse) {
7091                 data = curr;
7092         } else if (curr->next) {
7093                 data = curr->next;
7094         } else {
7095                 data = flow_alloc_thread_workspace();
7096                 if (!data)
7097                         return NULL;
7098                 curr->next = data;
7099                 data->prev = curr;
7100         }
7101         data->inuse = 1;
7102         data->flow_idx = 0;
7103         /* Set as current workspace */
7104         if (mlx5_flow_os_set_specific_workspace(data))
7105                 DRV_LOG(ERR, "Failed to set flow workspace to thread.");
7106         return data;
7107 }
7108
7109 /**
7110  * Close current thread specific flow workspace.
7111  *
7112  * If previous workspace available, set it as current.
7113  *
7114  * @return pointer to thread specific flow workspace data, NULL on error.
7115  */
7116 static void
7117 mlx5_flow_pop_thread_workspace(void)
7118 {
7119         struct mlx5_flow_workspace *data = mlx5_flow_get_thread_workspace();
7120
7121         if (!data)
7122                 return;
7123         if (!data->inuse) {
7124                 DRV_LOG(ERR, "Failed to close unused flow workspace.");
7125                 return;
7126         }
7127         data->inuse = 0;
7128         if (!data->prev)
7129                 return;
7130         if (mlx5_flow_os_set_specific_workspace(data->prev))
7131                 DRV_LOG(ERR, "Failed to set flow workspace to thread.");
7132 }
7133
7134 /**
7135  * Verify the flow list is empty
7136  *
7137  * @param dev
7138  *  Pointer to Ethernet device.
7139  *
7140  * @return the number of flows not released.
7141  */
7142 int
7143 mlx5_flow_verify(struct rte_eth_dev *dev __rte_unused)
7144 {
7145         struct mlx5_priv *priv = dev->data->dev_private;
7146         struct rte_flow *flow;
7147         uint32_t idx = 0;
7148         int ret = 0, i;
7149
7150         for (i = 0; i < MLX5_FLOW_TYPE_MAXI; i++) {
7151                 MLX5_IPOOL_FOREACH(priv->flows[i], idx, flow) {
7152                         DRV_LOG(DEBUG, "port %u flow %p still referenced",
7153                                 dev->data->port_id, (void *)flow);
7154                         ret++;
7155                 }
7156         }
7157         return ret;
7158 }
7159
7160 /**
7161  * Enable default hairpin egress flow.
7162  *
7163  * @param dev
7164  *   Pointer to Ethernet device.
7165  * @param queue
7166  *   The queue index.
7167  *
7168  * @return
7169  *   0 on success, a negative errno value otherwise and rte_errno is set.
7170  */
7171 int
7172 mlx5_ctrl_flow_source_queue(struct rte_eth_dev *dev,
7173                             uint32_t queue)
7174 {
7175         const struct rte_flow_attr attr = {
7176                 .egress = 1,
7177                 .priority = 0,
7178         };
7179         struct mlx5_rte_flow_item_tx_queue queue_spec = {
7180                 .queue = queue,
7181         };
7182         struct mlx5_rte_flow_item_tx_queue queue_mask = {
7183                 .queue = UINT32_MAX,
7184         };
7185         struct rte_flow_item items[] = {
7186                 {
7187                         .type = (enum rte_flow_item_type)
7188                                 MLX5_RTE_FLOW_ITEM_TYPE_TX_QUEUE,
7189                         .spec = &queue_spec,
7190                         .last = NULL,
7191                         .mask = &queue_mask,
7192                 },
7193                 {
7194                         .type = RTE_FLOW_ITEM_TYPE_END,
7195                 },
7196         };
7197         struct rte_flow_action_jump jump = {
7198                 .group = MLX5_HAIRPIN_TX_TABLE,
7199         };
7200         struct rte_flow_action actions[2];
7201         uint32_t flow_idx;
7202         struct rte_flow_error error;
7203
7204         actions[0].type = RTE_FLOW_ACTION_TYPE_JUMP;
7205         actions[0].conf = &jump;
7206         actions[1].type = RTE_FLOW_ACTION_TYPE_END;
7207         flow_idx = flow_list_create(dev, MLX5_FLOW_TYPE_CTL,
7208                                     &attr, items, actions, false, &error);
7209         if (!flow_idx) {
7210                 DRV_LOG(DEBUG,
7211                         "Failed to create ctrl flow: rte_errno(%d),"
7212                         " type(%d), message(%s)",
7213                         rte_errno, error.type,
7214                         error.message ? error.message : " (no stated reason)");
7215                 return -rte_errno;
7216         }
7217         return 0;
7218 }
7219
7220 /**
7221  * Enable a control flow configured from the control plane.
7222  *
7223  * @param dev
7224  *   Pointer to Ethernet device.
7225  * @param eth_spec
7226  *   An Ethernet flow spec to apply.
7227  * @param eth_mask
7228  *   An Ethernet flow mask to apply.
7229  * @param vlan_spec
7230  *   A VLAN flow spec to apply.
7231  * @param vlan_mask
7232  *   A VLAN flow mask to apply.
7233  *
7234  * @return
7235  *   0 on success, a negative errno value otherwise and rte_errno is set.
7236  */
7237 int
7238 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
7239                     struct rte_flow_item_eth *eth_spec,
7240                     struct rte_flow_item_eth *eth_mask,
7241                     struct rte_flow_item_vlan *vlan_spec,
7242                     struct rte_flow_item_vlan *vlan_mask)
7243 {
7244         struct mlx5_priv *priv = dev->data->dev_private;
7245         const struct rte_flow_attr attr = {
7246                 .ingress = 1,
7247                 .priority = MLX5_FLOW_LOWEST_PRIO_INDICATOR,
7248         };
7249         struct rte_flow_item items[] = {
7250                 {
7251                         .type = RTE_FLOW_ITEM_TYPE_ETH,
7252                         .spec = eth_spec,
7253                         .last = NULL,
7254                         .mask = eth_mask,
7255                 },
7256                 {
7257                         .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
7258                                               RTE_FLOW_ITEM_TYPE_END,
7259                         .spec = vlan_spec,
7260                         .last = NULL,
7261                         .mask = vlan_mask,
7262                 },
7263                 {
7264                         .type = RTE_FLOW_ITEM_TYPE_END,
7265                 },
7266         };
7267         uint16_t queue[priv->reta_idx_n];
7268         struct rte_flow_action_rss action_rss = {
7269                 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
7270                 .level = 0,
7271                 .types = priv->rss_conf.rss_hf,
7272                 .key_len = priv->rss_conf.rss_key_len,
7273                 .queue_num = priv->reta_idx_n,
7274                 .key = priv->rss_conf.rss_key,
7275                 .queue = queue,
7276         };
7277         struct rte_flow_action actions[] = {
7278                 {
7279                         .type = RTE_FLOW_ACTION_TYPE_RSS,
7280                         .conf = &action_rss,
7281                 },
7282                 {
7283                         .type = RTE_FLOW_ACTION_TYPE_END,
7284                 },
7285         };
7286         uint32_t flow_idx;
7287         struct rte_flow_error error;
7288         unsigned int i;
7289
7290         if (!priv->reta_idx_n || !priv->rxqs_n) {
7291                 return 0;
7292         }
7293         if (!(dev->data->dev_conf.rxmode.mq_mode & RTE_ETH_MQ_RX_RSS_FLAG))
7294                 action_rss.types = 0;
7295         for (i = 0; i != priv->reta_idx_n; ++i)
7296                 queue[i] = (*priv->reta_idx)[i];
7297         flow_idx = flow_list_create(dev, MLX5_FLOW_TYPE_CTL,
7298                                     &attr, items, actions, false, &error);
7299         if (!flow_idx)
7300                 return -rte_errno;
7301         return 0;
7302 }
7303
7304 /**
7305  * Enable a flow control configured from the control plane.
7306  *
7307  * @param dev
7308  *   Pointer to Ethernet device.
7309  * @param eth_spec
7310  *   An Ethernet flow spec to apply.
7311  * @param eth_mask
7312  *   An Ethernet flow mask to apply.
7313  *
7314  * @return
7315  *   0 on success, a negative errno value otherwise and rte_errno is set.
7316  */
7317 int
7318 mlx5_ctrl_flow(struct rte_eth_dev *dev,
7319                struct rte_flow_item_eth *eth_spec,
7320                struct rte_flow_item_eth *eth_mask)
7321 {
7322         return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
7323 }
7324
7325 /**
7326  * Create default miss flow rule matching lacp traffic
7327  *
7328  * @param dev
7329  *   Pointer to Ethernet device.
7330  * @param eth_spec
7331  *   An Ethernet flow spec to apply.
7332  *
7333  * @return
7334  *   0 on success, a negative errno value otherwise and rte_errno is set.
7335  */
7336 int
7337 mlx5_flow_lacp_miss(struct rte_eth_dev *dev)
7338 {
7339         /*
7340          * The LACP matching is done by only using ether type since using
7341          * a multicast dst mac causes kernel to give low priority to this flow.
7342          */
7343         static const struct rte_flow_item_eth lacp_spec = {
7344                 .type = RTE_BE16(0x8809),
7345         };
7346         static const struct rte_flow_item_eth lacp_mask = {
7347                 .type = 0xffff,
7348         };
7349         const struct rte_flow_attr attr = {
7350                 .ingress = 1,
7351         };
7352         struct rte_flow_item items[] = {
7353                 {
7354                         .type = RTE_FLOW_ITEM_TYPE_ETH,
7355                         .spec = &lacp_spec,
7356                         .mask = &lacp_mask,
7357                 },
7358                 {
7359                         .type = RTE_FLOW_ITEM_TYPE_END,
7360                 },
7361         };
7362         struct rte_flow_action actions[] = {
7363                 {
7364                         .type = (enum rte_flow_action_type)
7365                                 MLX5_RTE_FLOW_ACTION_TYPE_DEFAULT_MISS,
7366                 },
7367                 {
7368                         .type = RTE_FLOW_ACTION_TYPE_END,
7369                 },
7370         };
7371         struct rte_flow_error error;
7372         uint32_t flow_idx = flow_list_create(dev, MLX5_FLOW_TYPE_CTL,
7373                                         &attr, items, actions,
7374                                         false, &error);
7375
7376         if (!flow_idx)
7377                 return -rte_errno;
7378         return 0;
7379 }
7380
7381 /**
7382  * Destroy a flow.
7383  *
7384  * @see rte_flow_destroy()
7385  * @see rte_flow_ops
7386  */
7387 int
7388 mlx5_flow_destroy(struct rte_eth_dev *dev,
7389                   struct rte_flow *flow,
7390                   struct rte_flow_error *error __rte_unused)
7391 {
7392         struct mlx5_priv *priv = dev->data->dev_private;
7393
7394         if (priv->sh->config.dv_flow_en == 2)
7395                 return rte_flow_error_set(error, ENOTSUP,
7396                           RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
7397                           NULL,
7398                           "Flow non-Q destruction not supported");
7399         flow_list_destroy(dev, MLX5_FLOW_TYPE_GEN,
7400                                 (uintptr_t)(void *)flow);
7401         return 0;
7402 }
7403
7404 /**
7405  * Destroy all flows.
7406  *
7407  * @see rte_flow_flush()
7408  * @see rte_flow_ops
7409  */
7410 int
7411 mlx5_flow_flush(struct rte_eth_dev *dev,
7412                 struct rte_flow_error *error __rte_unused)
7413 {
7414         mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_GEN, false);
7415         return 0;
7416 }
7417
7418 /**
7419  * Isolated mode.
7420  *
7421  * @see rte_flow_isolate()
7422  * @see rte_flow_ops
7423  */
7424 int
7425 mlx5_flow_isolate(struct rte_eth_dev *dev,
7426                   int enable,
7427                   struct rte_flow_error *error)
7428 {
7429         struct mlx5_priv *priv = dev->data->dev_private;
7430
7431         if (dev->data->dev_started) {
7432                 rte_flow_error_set(error, EBUSY,
7433                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
7434                                    NULL,
7435                                    "port must be stopped first");
7436                 return -rte_errno;
7437         }
7438         priv->isolated = !!enable;
7439         if (enable)
7440                 dev->dev_ops = &mlx5_dev_ops_isolate;
7441         else
7442                 dev->dev_ops = &mlx5_dev_ops;
7443
7444         dev->rx_descriptor_status = mlx5_rx_descriptor_status;
7445         dev->tx_descriptor_status = mlx5_tx_descriptor_status;
7446
7447         return 0;
7448 }
7449
7450 /**
7451  * Query a flow.
7452  *
7453  * @see rte_flow_query()
7454  * @see rte_flow_ops
7455  */
7456 static int
7457 flow_drv_query(struct rte_eth_dev *dev,
7458                uint32_t flow_idx,
7459                const struct rte_flow_action *actions,
7460                void *data,
7461                struct rte_flow_error *error)
7462 {
7463         struct mlx5_priv *priv = dev->data->dev_private;
7464         const struct mlx5_flow_driver_ops *fops;
7465         struct rte_flow *flow = mlx5_ipool_get(priv->flows[MLX5_FLOW_TYPE_GEN],
7466                                                flow_idx);
7467         enum mlx5_flow_drv_type ftype;
7468
7469         if (!flow) {
7470                 return rte_flow_error_set(error, ENOENT,
7471                           RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
7472                           NULL,
7473                           "invalid flow handle");
7474         }
7475         ftype = flow->drv_type;
7476         MLX5_ASSERT(ftype > MLX5_FLOW_TYPE_MIN && ftype < MLX5_FLOW_TYPE_MAX);
7477         fops = flow_get_drv_ops(ftype);
7478
7479         return fops->query(dev, flow, actions, data, error);
7480 }
7481
7482 /**
7483  * Query a flow.
7484  *
7485  * @see rte_flow_query()
7486  * @see rte_flow_ops
7487  */
7488 int
7489 mlx5_flow_query(struct rte_eth_dev *dev,
7490                 struct rte_flow *flow,
7491                 const struct rte_flow_action *actions,
7492                 void *data,
7493                 struct rte_flow_error *error)
7494 {
7495         int ret;
7496         struct mlx5_priv *priv = dev->data->dev_private;
7497
7498         if (priv->sh->config.dv_flow_en == 2)
7499                 return rte_flow_error_set(error, ENOTSUP,
7500                           RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
7501                           NULL,
7502                           "Flow non-Q query not supported");
7503         ret = flow_drv_query(dev, (uintptr_t)(void *)flow, actions, data,
7504                              error);
7505         if (ret < 0)
7506                 return ret;
7507         return 0;
7508 }
7509
7510 /**
7511  * Get rte_flow callbacks.
7512  *
7513  * @param dev
7514  *   Pointer to Ethernet device structure.
7515  * @param ops
7516  *   Pointer to operation-specific structure.
7517  *
7518  * @return 0
7519  */
7520 int
7521 mlx5_flow_ops_get(struct rte_eth_dev *dev __rte_unused,
7522                   const struct rte_flow_ops **ops)
7523 {
7524         *ops = &mlx5_flow_ops;
7525         return 0;
7526 }
7527
7528 /**
7529  * Validate meter policy actions.
7530  * Dispatcher for action type specific validation.
7531  *
7532  * @param[in] dev
7533  *   Pointer to the Ethernet device structure.
7534  * @param[in] action
7535  *   The meter policy action object to validate.
7536  * @param[in] attr
7537  *   Attributes of flow to determine steering domain.
7538  * @param[out] is_rss
7539  *   Is RSS or not.
7540  * @param[out] domain_bitmap
7541  *   Domain bitmap.
7542  * @param[out] is_def_policy
7543  *   Is default policy or not.
7544  * @param[out] error
7545  *   Perform verbose error reporting if not NULL. Initialized in case of
7546  *   error only.
7547  *
7548  * @return
7549  *   0 on success, otherwise negative errno value.
7550  */
7551 int
7552 mlx5_flow_validate_mtr_acts(struct rte_eth_dev *dev,
7553                         const struct rte_flow_action *actions[RTE_COLORS],
7554                         struct rte_flow_attr *attr,
7555                         bool *is_rss,
7556                         uint8_t *domain_bitmap,
7557                         uint8_t *policy_mode,
7558                         struct rte_mtr_error *error)
7559 {
7560         const struct mlx5_flow_driver_ops *fops;
7561
7562         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7563         return fops->validate_mtr_acts(dev, actions, attr, is_rss,
7564                                        domain_bitmap, policy_mode, error);
7565 }
7566
7567 /**
7568  * Destroy the meter table set.
7569  *
7570  * @param[in] dev
7571  *   Pointer to Ethernet device.
7572  * @param[in] mtr_policy
7573  *   Meter policy struct.
7574  */
7575 void
7576 mlx5_flow_destroy_mtr_acts(struct rte_eth_dev *dev,
7577                       struct mlx5_flow_meter_policy *mtr_policy)
7578 {
7579         const struct mlx5_flow_driver_ops *fops;
7580
7581         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7582         fops->destroy_mtr_acts(dev, mtr_policy);
7583 }
7584
7585 /**
7586  * Create policy action, lock free,
7587  * (mutex should be acquired by caller).
7588  * Dispatcher for action type specific call.
7589  *
7590  * @param[in] dev
7591  *   Pointer to the Ethernet device structure.
7592  * @param[in] mtr_policy
7593  *   Meter policy struct.
7594  * @param[in] action
7595  *   Action specification used to create meter actions.
7596  * @param[out] error
7597  *   Perform verbose error reporting if not NULL. Initialized in case of
7598  *   error only.
7599  *
7600  * @return
7601  *   0 on success, otherwise negative errno value.
7602  */
7603 int
7604 mlx5_flow_create_mtr_acts(struct rte_eth_dev *dev,
7605                       struct mlx5_flow_meter_policy *mtr_policy,
7606                       const struct rte_flow_action *actions[RTE_COLORS],
7607                       struct rte_mtr_error *error)
7608 {
7609         const struct mlx5_flow_driver_ops *fops;
7610
7611         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7612         return fops->create_mtr_acts(dev, mtr_policy, actions, error);
7613 }
7614
7615 /**
7616  * Create policy rules, lock free,
7617  * (mutex should be acquired by caller).
7618  * Dispatcher for action type specific call.
7619  *
7620  * @param[in] dev
7621  *   Pointer to the Ethernet device structure.
7622  * @param[in] mtr_policy
7623  *   Meter policy struct.
7624  *
7625  * @return
7626  *   0 on success, -1 otherwise.
7627  */
7628 int
7629 mlx5_flow_create_policy_rules(struct rte_eth_dev *dev,
7630                              struct mlx5_flow_meter_policy *mtr_policy)
7631 {
7632         const struct mlx5_flow_driver_ops *fops;
7633
7634         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7635         return fops->create_policy_rules(dev, mtr_policy);
7636 }
7637
7638 /**
7639  * Destroy policy rules, lock free,
7640  * (mutex should be acquired by caller).
7641  * Dispatcher for action type specific call.
7642  *
7643  * @param[in] dev
7644  *   Pointer to the Ethernet device structure.
7645  * @param[in] mtr_policy
7646  *   Meter policy struct.
7647  */
7648 void
7649 mlx5_flow_destroy_policy_rules(struct rte_eth_dev *dev,
7650                              struct mlx5_flow_meter_policy *mtr_policy)
7651 {
7652         const struct mlx5_flow_driver_ops *fops;
7653
7654         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7655         fops->destroy_policy_rules(dev, mtr_policy);
7656 }
7657
7658 /**
7659  * Destroy the default policy table set.
7660  *
7661  * @param[in] dev
7662  *   Pointer to Ethernet device.
7663  */
7664 void
7665 mlx5_flow_destroy_def_policy(struct rte_eth_dev *dev)
7666 {
7667         const struct mlx5_flow_driver_ops *fops;
7668
7669         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7670         fops->destroy_def_policy(dev);
7671 }
7672
7673 /**
7674  * Destroy the default policy table set.
7675  *
7676  * @param[in] dev
7677  *   Pointer to Ethernet device.
7678  *
7679  * @return
7680  *   0 on success, -1 otherwise.
7681  */
7682 int
7683 mlx5_flow_create_def_policy(struct rte_eth_dev *dev)
7684 {
7685         const struct mlx5_flow_driver_ops *fops;
7686
7687         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7688         return fops->create_def_policy(dev);
7689 }
7690
7691 /**
7692  * Create the needed meter and suffix tables.
7693  *
7694  * @param[in] dev
7695  *   Pointer to Ethernet device.
7696  *
7697  * @return
7698  *   0 on success, -1 otherwise.
7699  */
7700 int
7701 mlx5_flow_create_mtr_tbls(struct rte_eth_dev *dev,
7702                         struct mlx5_flow_meter_info *fm,
7703                         uint32_t mtr_idx,
7704                         uint8_t domain_bitmap)
7705 {
7706         const struct mlx5_flow_driver_ops *fops;
7707
7708         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7709         return fops->create_mtr_tbls(dev, fm, mtr_idx, domain_bitmap);
7710 }
7711
7712 /**
7713  * Destroy the meter table set.
7714  *
7715  * @param[in] dev
7716  *   Pointer to Ethernet device.
7717  * @param[in] tbl
7718  *   Pointer to the meter table set.
7719  */
7720 void
7721 mlx5_flow_destroy_mtr_tbls(struct rte_eth_dev *dev,
7722                            struct mlx5_flow_meter_info *fm)
7723 {
7724         const struct mlx5_flow_driver_ops *fops;
7725
7726         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7727         fops->destroy_mtr_tbls(dev, fm);
7728 }
7729
7730 /**
7731  * Destroy the global meter drop table.
7732  *
7733  * @param[in] dev
7734  *   Pointer to Ethernet device.
7735  */
7736 void
7737 mlx5_flow_destroy_mtr_drop_tbls(struct rte_eth_dev *dev)
7738 {
7739         const struct mlx5_flow_driver_ops *fops;
7740
7741         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7742         fops->destroy_mtr_drop_tbls(dev);
7743 }
7744
7745 /**
7746  * Destroy the sub policy table with RX queue.
7747  *
7748  * @param[in] dev
7749  *   Pointer to Ethernet device.
7750  * @param[in] mtr_policy
7751  *   Pointer to meter policy table.
7752  */
7753 void
7754 mlx5_flow_destroy_sub_policy_with_rxq(struct rte_eth_dev *dev,
7755                 struct mlx5_flow_meter_policy *mtr_policy)
7756 {
7757         const struct mlx5_flow_driver_ops *fops;
7758
7759         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7760         fops->destroy_sub_policy_with_rxq(dev, mtr_policy);
7761 }
7762
7763 /**
7764  * Allocate the needed aso flow meter id.
7765  *
7766  * @param[in] dev
7767  *   Pointer to Ethernet device.
7768  *
7769  * @return
7770  *   Index to aso flow meter on success, NULL otherwise.
7771  */
7772 uint32_t
7773 mlx5_flow_mtr_alloc(struct rte_eth_dev *dev)
7774 {
7775         const struct mlx5_flow_driver_ops *fops;
7776
7777         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7778         return fops->create_meter(dev);
7779 }
7780
7781 /**
7782  * Free the aso flow meter id.
7783  *
7784  * @param[in] dev
7785  *   Pointer to Ethernet device.
7786  * @param[in] mtr_idx
7787  *  Index to aso flow meter to be free.
7788  *
7789  * @return
7790  *   0 on success.
7791  */
7792 void
7793 mlx5_flow_mtr_free(struct rte_eth_dev *dev, uint32_t mtr_idx)
7794 {
7795         const struct mlx5_flow_driver_ops *fops;
7796
7797         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7798         fops->free_meter(dev, mtr_idx);
7799 }
7800
7801 /**
7802  * Allocate a counter.
7803  *
7804  * @param[in] dev
7805  *   Pointer to Ethernet device structure.
7806  *
7807  * @return
7808  *   Index to allocated counter  on success, 0 otherwise.
7809  */
7810 uint32_t
7811 mlx5_counter_alloc(struct rte_eth_dev *dev)
7812 {
7813         const struct mlx5_flow_driver_ops *fops;
7814         struct rte_flow_attr attr = { .transfer = 0 };
7815
7816         if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
7817                 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7818                 return fops->counter_alloc(dev);
7819         }
7820         DRV_LOG(ERR,
7821                 "port %u counter allocate is not supported.",
7822                  dev->data->port_id);
7823         return 0;
7824 }
7825
7826 /**
7827  * Free a counter.
7828  *
7829  * @param[in] dev
7830  *   Pointer to Ethernet device structure.
7831  * @param[in] cnt
7832  *   Index to counter to be free.
7833  */
7834 void
7835 mlx5_counter_free(struct rte_eth_dev *dev, uint32_t cnt)
7836 {
7837         const struct mlx5_flow_driver_ops *fops;
7838         struct rte_flow_attr attr = { .transfer = 0 };
7839
7840         if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
7841                 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7842                 fops->counter_free(dev, cnt);
7843                 return;
7844         }
7845         DRV_LOG(ERR,
7846                 "port %u counter free is not supported.",
7847                  dev->data->port_id);
7848 }
7849
7850 /**
7851  * Query counter statistics.
7852  *
7853  * @param[in] dev
7854  *   Pointer to Ethernet device structure.
7855  * @param[in] cnt
7856  *   Index to counter to query.
7857  * @param[in] clear
7858  *   Set to clear counter statistics.
7859  * @param[out] pkts
7860  *   The counter hits packets number to save.
7861  * @param[out] bytes
7862  *   The counter hits bytes number to save.
7863  *
7864  * @return
7865  *   0 on success, a negative errno value otherwise.
7866  */
7867 int
7868 mlx5_counter_query(struct rte_eth_dev *dev, uint32_t cnt,
7869                    bool clear, uint64_t *pkts, uint64_t *bytes, void **action)
7870 {
7871         const struct mlx5_flow_driver_ops *fops;
7872         struct rte_flow_attr attr = { .transfer = 0 };
7873
7874         if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
7875                 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7876                 return fops->counter_query(dev, cnt, clear, pkts,
7877                                         bytes, action);
7878         }
7879         DRV_LOG(ERR,
7880                 "port %u counter query is not supported.",
7881                  dev->data->port_id);
7882         return -ENOTSUP;
7883 }
7884
7885 /**
7886  * Get information about HWS pre-configurable resources.
7887  *
7888  * @param[in] dev
7889  *   Pointer to the rte_eth_dev structure.
7890  * @param[out] port_info
7891  *   Pointer to port information.
7892  * @param[out] queue_info
7893  *   Pointer to queue information.
7894  * @param[out] error
7895  *   Pointer to error structure.
7896  *
7897  * @return
7898  *   0 on success, a negative errno value otherwise and rte_errno is set.
7899  */
7900 static int
7901 mlx5_flow_info_get(struct rte_eth_dev *dev,
7902                    struct rte_flow_port_info *port_info,
7903                    struct rte_flow_queue_info *queue_info,
7904                    struct rte_flow_error *error)
7905 {
7906         const struct mlx5_flow_driver_ops *fops;
7907
7908         if (flow_get_drv_type(dev, NULL) != MLX5_FLOW_TYPE_HW)
7909                 return rte_flow_error_set(error, ENOTSUP,
7910                                 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
7911                                 NULL,
7912                                 "info get with incorrect steering mode");
7913         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
7914         return fops->info_get(dev, port_info, queue_info, error);
7915 }
7916
7917 /**
7918  * Configure port HWS resources.
7919  *
7920  * @param[in] dev
7921  *   Pointer to the rte_eth_dev structure.
7922  * @param[in] port_attr
7923  *   Port configuration attributes.
7924  * @param[in] nb_queue
7925  *   Number of queue.
7926  * @param[in] queue_attr
7927  *   Array that holds attributes for each flow queue.
7928  * @param[out] error
7929  *   Pointer to error structure.
7930  *
7931  * @return
7932  *   0 on success, a negative errno value otherwise and rte_errno is set.
7933  */
7934 static int
7935 mlx5_flow_port_configure(struct rte_eth_dev *dev,
7936                          const struct rte_flow_port_attr *port_attr,
7937                          uint16_t nb_queue,
7938                          const struct rte_flow_queue_attr *queue_attr[],
7939                          struct rte_flow_error *error)
7940 {
7941         const struct mlx5_flow_driver_ops *fops;
7942
7943         if (flow_get_drv_type(dev, NULL) != MLX5_FLOW_TYPE_HW)
7944                 return rte_flow_error_set(error, ENOTSUP,
7945                                 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
7946                                 NULL,
7947                                 "port configure with incorrect steering mode");
7948         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
7949         return fops->configure(dev, port_attr, nb_queue, queue_attr, error);
7950 }
7951
7952 /**
7953  * Create flow item template.
7954  *
7955  * @param[in] dev
7956  *   Pointer to the rte_eth_dev structure.
7957  * @param[in] attr
7958  *   Pointer to the item template attributes.
7959  * @param[in] items
7960  *   The template item pattern.
7961  * @param[out] error
7962  *   Pointer to error structure.
7963  *
7964  * @return
7965  *   0 on success, a negative errno value otherwise and rte_errno is set.
7966  */
7967 static struct rte_flow_pattern_template *
7968 mlx5_flow_pattern_template_create(struct rte_eth_dev *dev,
7969                 const struct rte_flow_pattern_template_attr *attr,
7970                 const struct rte_flow_item items[],
7971                 struct rte_flow_error *error)
7972 {
7973         const struct mlx5_flow_driver_ops *fops;
7974
7975         if (flow_get_drv_type(dev, NULL) != MLX5_FLOW_TYPE_HW) {
7976                 rte_flow_error_set(error, ENOTSUP,
7977                                 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
7978                                 NULL,
7979                                 "pattern create with incorrect steering mode");
7980                 return NULL;
7981         }
7982         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
7983         return fops->pattern_template_create(dev, attr, items, error);
7984 }
7985
7986 /**
7987  * Destroy flow item template.
7988  *
7989  * @param[in] dev
7990  *   Pointer to the rte_eth_dev structure.
7991  * @param[in] template
7992  *   Pointer to the item template to be destroyed.
7993  * @param[out] error
7994  *   Pointer to error structure.
7995  *
7996  * @return
7997  *   0 on success, a negative errno value otherwise and rte_errno is set.
7998  */
7999 static int
8000 mlx5_flow_pattern_template_destroy(struct rte_eth_dev *dev,
8001                                    struct rte_flow_pattern_template *template,
8002                                    struct rte_flow_error *error)
8003 {
8004         const struct mlx5_flow_driver_ops *fops;
8005
8006         if (flow_get_drv_type(dev, NULL) != MLX5_FLOW_TYPE_HW)
8007                 return rte_flow_error_set(error, ENOTSUP,
8008                                 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8009                                 NULL,
8010                                 "pattern destroy with incorrect steering mode");
8011         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8012         return fops->pattern_template_destroy(dev, template, error);
8013 }
8014
8015 /**
8016  * Create flow item template.
8017  *
8018  * @param[in] dev
8019  *   Pointer to the rte_eth_dev structure.
8020  * @param[in] attr
8021  *   Pointer to the action template attributes.
8022  * @param[in] actions
8023  *   Associated actions (list terminated by the END action).
8024  * @param[in] masks
8025  *   List of actions that marks which of the action's member is constant.
8026  * @param[out] error
8027  *   Pointer to error structure.
8028  *
8029  * @return
8030  *   0 on success, a negative errno value otherwise and rte_errno is set.
8031  */
8032 static struct rte_flow_actions_template *
8033 mlx5_flow_actions_template_create(struct rte_eth_dev *dev,
8034                         const struct rte_flow_actions_template_attr *attr,
8035                         const struct rte_flow_action actions[],
8036                         const struct rte_flow_action masks[],
8037                         struct rte_flow_error *error)
8038 {
8039         const struct mlx5_flow_driver_ops *fops;
8040
8041         if (flow_get_drv_type(dev, NULL) != MLX5_FLOW_TYPE_HW) {
8042                 rte_flow_error_set(error, ENOTSUP,
8043                                 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8044                                 NULL,
8045                                 "action create with incorrect steering mode");
8046                 return NULL;
8047         }
8048         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8049         return fops->actions_template_create(dev, attr, actions, masks, error);
8050 }
8051
8052 /**
8053  * Destroy flow action template.
8054  *
8055  * @param[in] dev
8056  *   Pointer to the rte_eth_dev structure.
8057  * @param[in] template
8058  *   Pointer to the action template to be destroyed.
8059  * @param[out] error
8060  *   Pointer to error structure.
8061  *
8062  * @return
8063  *   0 on success, a negative errno value otherwise and rte_errno is set.
8064  */
8065 static int
8066 mlx5_flow_actions_template_destroy(struct rte_eth_dev *dev,
8067                                    struct rte_flow_actions_template *template,
8068                                    struct rte_flow_error *error)
8069 {
8070         const struct mlx5_flow_driver_ops *fops;
8071
8072         if (flow_get_drv_type(dev, NULL) != MLX5_FLOW_TYPE_HW)
8073                 return rte_flow_error_set(error, ENOTSUP,
8074                                 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8075                                 NULL,
8076                                 "action destroy with incorrect steering mode");
8077         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8078         return fops->actions_template_destroy(dev, template, error);
8079 }
8080
8081 /**
8082  * Allocate a new memory for the counter values wrapped by all the needed
8083  * management.
8084  *
8085  * @param[in] sh
8086  *   Pointer to mlx5_dev_ctx_shared object.
8087  *
8088  * @return
8089  *   0 on success, a negative errno value otherwise.
8090  */
8091 static int
8092 mlx5_flow_create_counter_stat_mem_mng(struct mlx5_dev_ctx_shared *sh)
8093 {
8094         struct mlx5_counter_stats_mem_mng *mem_mng;
8095         volatile struct flow_counter_stats *raw_data;
8096         int raws_n = MLX5_CNT_CONTAINER_RESIZE + MLX5_MAX_PENDING_QUERIES;
8097         int size = (sizeof(struct flow_counter_stats) *
8098                         MLX5_COUNTERS_PER_POOL +
8099                         sizeof(struct mlx5_counter_stats_raw)) * raws_n +
8100                         sizeof(struct mlx5_counter_stats_mem_mng);
8101         size_t pgsize = rte_mem_page_size();
8102         uint8_t *mem;
8103         int ret;
8104         int i;
8105
8106         if (pgsize == (size_t)-1) {
8107                 DRV_LOG(ERR, "Failed to get mem page size");
8108                 rte_errno = ENOMEM;
8109                 return -ENOMEM;
8110         }
8111         mem = mlx5_malloc(MLX5_MEM_ZERO, size, pgsize, SOCKET_ID_ANY);
8112         if (!mem) {
8113                 rte_errno = ENOMEM;
8114                 return -ENOMEM;
8115         }
8116         mem_mng = (struct mlx5_counter_stats_mem_mng *)(mem + size) - 1;
8117         size = sizeof(*raw_data) * MLX5_COUNTERS_PER_POOL * raws_n;
8118         ret = mlx5_os_wrapped_mkey_create(sh->cdev->ctx, sh->cdev->pd,
8119                                           sh->cdev->pdn, mem, size,
8120                                           &mem_mng->wm);
8121         if (ret) {
8122                 rte_errno = errno;
8123                 mlx5_free(mem);
8124                 return -rte_errno;
8125         }
8126         mem_mng->raws = (struct mlx5_counter_stats_raw *)(mem + size);
8127         raw_data = (volatile struct flow_counter_stats *)mem;
8128         for (i = 0; i < raws_n; ++i) {
8129                 mem_mng->raws[i].mem_mng = mem_mng;
8130                 mem_mng->raws[i].data = raw_data + i * MLX5_COUNTERS_PER_POOL;
8131         }
8132         for (i = 0; i < MLX5_MAX_PENDING_QUERIES; ++i)
8133                 LIST_INSERT_HEAD(&sh->cmng.free_stat_raws,
8134                                  mem_mng->raws + MLX5_CNT_CONTAINER_RESIZE + i,
8135                                  next);
8136         LIST_INSERT_HEAD(&sh->cmng.mem_mngs, mem_mng, next);
8137         sh->cmng.mem_mng = mem_mng;
8138         return 0;
8139 }
8140
8141 /**
8142  * Set the statistic memory to the new counter pool.
8143  *
8144  * @param[in] sh
8145  *   Pointer to mlx5_dev_ctx_shared object.
8146  * @param[in] pool
8147  *   Pointer to the pool to set the statistic memory.
8148  *
8149  * @return
8150  *   0 on success, a negative errno value otherwise.
8151  */
8152 static int
8153 mlx5_flow_set_counter_stat_mem(struct mlx5_dev_ctx_shared *sh,
8154                                struct mlx5_flow_counter_pool *pool)
8155 {
8156         struct mlx5_flow_counter_mng *cmng = &sh->cmng;
8157         /* Resize statistic memory once used out. */
8158         if (!(pool->index % MLX5_CNT_CONTAINER_RESIZE) &&
8159             mlx5_flow_create_counter_stat_mem_mng(sh)) {
8160                 DRV_LOG(ERR, "Cannot resize counter stat mem.");
8161                 return -1;
8162         }
8163         rte_spinlock_lock(&pool->sl);
8164         pool->raw = cmng->mem_mng->raws + pool->index %
8165                     MLX5_CNT_CONTAINER_RESIZE;
8166         rte_spinlock_unlock(&pool->sl);
8167         pool->raw_hw = NULL;
8168         return 0;
8169 }
8170
8171 #define MLX5_POOL_QUERY_FREQ_US 1000000
8172
8173 /**
8174  * Set the periodic procedure for triggering asynchronous batch queries for all
8175  * the counter pools.
8176  *
8177  * @param[in] sh
8178  *   Pointer to mlx5_dev_ctx_shared object.
8179  */
8180 void
8181 mlx5_set_query_alarm(struct mlx5_dev_ctx_shared *sh)
8182 {
8183         uint32_t pools_n, us;
8184
8185         pools_n = __atomic_load_n(&sh->cmng.n_valid, __ATOMIC_RELAXED);
8186         us = MLX5_POOL_QUERY_FREQ_US / pools_n;
8187         DRV_LOG(DEBUG, "Set alarm for %u pools each %u us", pools_n, us);
8188         if (rte_eal_alarm_set(us, mlx5_flow_query_alarm, sh)) {
8189                 sh->cmng.query_thread_on = 0;
8190                 DRV_LOG(ERR, "Cannot reinitialize query alarm");
8191         } else {
8192                 sh->cmng.query_thread_on = 1;
8193         }
8194 }
8195
8196 /**
8197  * The periodic procedure for triggering asynchronous batch queries for all the
8198  * counter pools. This function is probably called by the host thread.
8199  *
8200  * @param[in] arg
8201  *   The parameter for the alarm process.
8202  */
8203 void
8204 mlx5_flow_query_alarm(void *arg)
8205 {
8206         struct mlx5_dev_ctx_shared *sh = arg;
8207         int ret;
8208         uint16_t pool_index = sh->cmng.pool_index;
8209         struct mlx5_flow_counter_mng *cmng = &sh->cmng;
8210         struct mlx5_flow_counter_pool *pool;
8211         uint16_t n_valid;
8212
8213         if (sh->cmng.pending_queries >= MLX5_MAX_PENDING_QUERIES)
8214                 goto set_alarm;
8215         rte_spinlock_lock(&cmng->pool_update_sl);
8216         pool = cmng->pools[pool_index];
8217         n_valid = cmng->n_valid;
8218         rte_spinlock_unlock(&cmng->pool_update_sl);
8219         /* Set the statistic memory to the new created pool. */
8220         if ((!pool->raw && mlx5_flow_set_counter_stat_mem(sh, pool)))
8221                 goto set_alarm;
8222         if (pool->raw_hw)
8223                 /* There is a pool query in progress. */
8224                 goto set_alarm;
8225         pool->raw_hw =
8226                 LIST_FIRST(&sh->cmng.free_stat_raws);
8227         if (!pool->raw_hw)
8228                 /* No free counter statistics raw memory. */
8229                 goto set_alarm;
8230         /*
8231          * Identify the counters released between query trigger and query
8232          * handle more efficiently. The counter released in this gap period
8233          * should wait for a new round of query as the new arrived packets
8234          * will not be taken into account.
8235          */
8236         pool->query_gen++;
8237         ret = mlx5_devx_cmd_flow_counter_query(pool->min_dcs, 0,
8238                                                MLX5_COUNTERS_PER_POOL,
8239                                                NULL, NULL,
8240                                                pool->raw_hw->mem_mng->wm.lkey,
8241                                                (void *)(uintptr_t)
8242                                                pool->raw_hw->data,
8243                                                sh->devx_comp,
8244                                                (uint64_t)(uintptr_t)pool);
8245         if (ret) {
8246                 DRV_LOG(ERR, "Failed to trigger asynchronous query for dcs ID"
8247                         " %d", pool->min_dcs->id);
8248                 pool->raw_hw = NULL;
8249                 goto set_alarm;
8250         }
8251         LIST_REMOVE(pool->raw_hw, next);
8252         sh->cmng.pending_queries++;
8253         pool_index++;
8254         if (pool_index >= n_valid)
8255                 pool_index = 0;
8256 set_alarm:
8257         sh->cmng.pool_index = pool_index;
8258         mlx5_set_query_alarm(sh);
8259 }
8260
8261 /**
8262  * Check and callback event for new aged flow in the counter pool
8263  *
8264  * @param[in] sh
8265  *   Pointer to mlx5_dev_ctx_shared object.
8266  * @param[in] pool
8267  *   Pointer to Current counter pool.
8268  */
8269 static void
8270 mlx5_flow_aging_check(struct mlx5_dev_ctx_shared *sh,
8271                    struct mlx5_flow_counter_pool *pool)
8272 {
8273         struct mlx5_priv *priv;
8274         struct mlx5_flow_counter *cnt;
8275         struct mlx5_age_info *age_info;
8276         struct mlx5_age_param *age_param;
8277         struct mlx5_counter_stats_raw *cur = pool->raw_hw;
8278         struct mlx5_counter_stats_raw *prev = pool->raw;
8279         const uint64_t curr_time = MLX5_CURR_TIME_SEC;
8280         const uint32_t time_delta = curr_time - pool->time_of_last_age_check;
8281         uint16_t expected = AGE_CANDIDATE;
8282         uint32_t i;
8283
8284         pool->time_of_last_age_check = curr_time;
8285         for (i = 0; i < MLX5_COUNTERS_PER_POOL; ++i) {
8286                 cnt = MLX5_POOL_GET_CNT(pool, i);
8287                 age_param = MLX5_CNT_TO_AGE(cnt);
8288                 if (__atomic_load_n(&age_param->state,
8289                                     __ATOMIC_RELAXED) != AGE_CANDIDATE)
8290                         continue;
8291                 if (cur->data[i].hits != prev->data[i].hits) {
8292                         __atomic_store_n(&age_param->sec_since_last_hit, 0,
8293                                          __ATOMIC_RELAXED);
8294                         continue;
8295                 }
8296                 if (__atomic_add_fetch(&age_param->sec_since_last_hit,
8297                                        time_delta,
8298                                        __ATOMIC_RELAXED) <= age_param->timeout)
8299                         continue;
8300                 /**
8301                  * Hold the lock first, or if between the
8302                  * state AGE_TMOUT and tailq operation the
8303                  * release happened, the release procedure
8304                  * may delete a non-existent tailq node.
8305                  */
8306                 priv = rte_eth_devices[age_param->port_id].data->dev_private;
8307                 age_info = GET_PORT_AGE_INFO(priv);
8308                 rte_spinlock_lock(&age_info->aged_sl);
8309                 if (__atomic_compare_exchange_n(&age_param->state, &expected,
8310                                                 AGE_TMOUT, false,
8311                                                 __ATOMIC_RELAXED,
8312                                                 __ATOMIC_RELAXED)) {
8313                         TAILQ_INSERT_TAIL(&age_info->aged_counters, cnt, next);
8314                         MLX5_AGE_SET(age_info, MLX5_AGE_EVENT_NEW);
8315                 }
8316                 rte_spinlock_unlock(&age_info->aged_sl);
8317         }
8318         mlx5_age_event_prepare(sh);
8319 }
8320
8321 /**
8322  * Handler for the HW respond about ready values from an asynchronous batch
8323  * query. This function is probably called by the host thread.
8324  *
8325  * @param[in] sh
8326  *   The pointer to the shared device context.
8327  * @param[in] async_id
8328  *   The Devx async ID.
8329  * @param[in] status
8330  *   The status of the completion.
8331  */
8332 void
8333 mlx5_flow_async_pool_query_handle(struct mlx5_dev_ctx_shared *sh,
8334                                   uint64_t async_id, int status)
8335 {
8336         struct mlx5_flow_counter_pool *pool =
8337                 (struct mlx5_flow_counter_pool *)(uintptr_t)async_id;
8338         struct mlx5_counter_stats_raw *raw_to_free;
8339         uint8_t query_gen = pool->query_gen ^ 1;
8340         struct mlx5_flow_counter_mng *cmng = &sh->cmng;
8341         enum mlx5_counter_type cnt_type =
8342                 pool->is_aged ? MLX5_COUNTER_TYPE_AGE :
8343                                 MLX5_COUNTER_TYPE_ORIGIN;
8344
8345         if (unlikely(status)) {
8346                 raw_to_free = pool->raw_hw;
8347         } else {
8348                 raw_to_free = pool->raw;
8349                 if (pool->is_aged)
8350                         mlx5_flow_aging_check(sh, pool);
8351                 rte_spinlock_lock(&pool->sl);
8352                 pool->raw = pool->raw_hw;
8353                 rte_spinlock_unlock(&pool->sl);
8354                 /* Be sure the new raw counters data is updated in memory. */
8355                 rte_io_wmb();
8356                 if (!TAILQ_EMPTY(&pool->counters[query_gen])) {
8357                         rte_spinlock_lock(&cmng->csl[cnt_type]);
8358                         TAILQ_CONCAT(&cmng->counters[cnt_type],
8359                                      &pool->counters[query_gen], next);
8360                         rte_spinlock_unlock(&cmng->csl[cnt_type]);
8361                 }
8362         }
8363         LIST_INSERT_HEAD(&sh->cmng.free_stat_raws, raw_to_free, next);
8364         pool->raw_hw = NULL;
8365         sh->cmng.pending_queries--;
8366 }
8367
8368 static int
8369 flow_group_to_table(uint32_t port_id, uint32_t group, uint32_t *table,
8370                     const struct flow_grp_info *grp_info,
8371                     struct rte_flow_error *error)
8372 {
8373         if (grp_info->transfer && grp_info->external &&
8374             grp_info->fdb_def_rule) {
8375                 if (group == UINT32_MAX)
8376                         return rte_flow_error_set
8377                                                 (error, EINVAL,
8378                                                  RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
8379                                                  NULL,
8380                                                  "group index not supported");
8381                 *table = group + 1;
8382         } else {
8383                 *table = group;
8384         }
8385         DRV_LOG(DEBUG, "port %u group=%#x table=%#x", port_id, group, *table);
8386         return 0;
8387 }
8388
8389 /**
8390  * Translate the rte_flow group index to HW table value.
8391  *
8392  * If tunnel offload is disabled, all group ids converted to flow table
8393  * id using the standard method.
8394  * If tunnel offload is enabled, group id can be converted using the
8395  * standard or tunnel conversion method. Group conversion method
8396  * selection depends on flags in `grp_info` parameter:
8397  * - Internal (grp_info.external == 0) groups conversion uses the
8398  *   standard method.
8399  * - Group ids in JUMP action converted with the tunnel conversion.
8400  * - Group id in rule attribute conversion depends on a rule type and
8401  *   group id value:
8402  *   ** non zero group attributes converted with the tunnel method
8403  *   ** zero group attribute in non-tunnel rule is converted using the
8404  *      standard method - there's only one root table
8405  *   ** zero group attribute in steer tunnel rule is converted with the
8406  *      standard method - single root table
8407  *   ** zero group attribute in match tunnel rule is a special OvS
8408  *      case: that value is used for portability reasons. That group
8409  *      id is converted with the tunnel conversion method.
8410  *
8411  * @param[in] dev
8412  *   Port device
8413  * @param[in] tunnel
8414  *   PMD tunnel offload object
8415  * @param[in] group
8416  *   rte_flow group index value.
8417  * @param[out] table
8418  *   HW table value.
8419  * @param[in] grp_info
8420  *   flags used for conversion
8421  * @param[out] error
8422  *   Pointer to error structure.
8423  *
8424  * @return
8425  *   0 on success, a negative errno value otherwise and rte_errno is set.
8426  */
8427 int
8428 mlx5_flow_group_to_table(struct rte_eth_dev *dev,
8429                          const struct mlx5_flow_tunnel *tunnel,
8430                          uint32_t group, uint32_t *table,
8431                          const struct flow_grp_info *grp_info,
8432                          struct rte_flow_error *error)
8433 {
8434         int ret;
8435         bool standard_translation;
8436
8437         if (!grp_info->skip_scale && grp_info->external &&
8438             group < MLX5_MAX_TABLES_EXTERNAL)
8439                 group *= MLX5_FLOW_TABLE_FACTOR;
8440         if (is_tunnel_offload_active(dev)) {
8441                 standard_translation = !grp_info->external ||
8442                                         grp_info->std_tbl_fix;
8443         } else {
8444                 standard_translation = true;
8445         }
8446         DRV_LOG(DEBUG,
8447                 "port %u group=%u transfer=%d external=%d fdb_def_rule=%d translate=%s",
8448                 dev->data->port_id, group, grp_info->transfer,
8449                 grp_info->external, grp_info->fdb_def_rule,
8450                 standard_translation ? "STANDARD" : "TUNNEL");
8451         if (standard_translation)
8452                 ret = flow_group_to_table(dev->data->port_id, group, table,
8453                                           grp_info, error);
8454         else
8455                 ret = tunnel_flow_group_to_flow_table(dev, tunnel, group,
8456                                                       table, error);
8457
8458         return ret;
8459 }
8460
8461 /**
8462  * Discover availability of metadata reg_c's.
8463  *
8464  * Iteratively use test flows to check availability.
8465  *
8466  * @param[in] dev
8467  *   Pointer to the Ethernet device structure.
8468  *
8469  * @return
8470  *   0 on success, a negative errno value otherwise and rte_errno is set.
8471  */
8472 int
8473 mlx5_flow_discover_mreg_c(struct rte_eth_dev *dev)
8474 {
8475         struct mlx5_priv *priv = dev->data->dev_private;
8476         enum modify_reg idx;
8477         int n = 0;
8478
8479         /* reg_c[0] and reg_c[1] are reserved. */
8480         priv->sh->flow_mreg_c[n++] = REG_C_0;
8481         priv->sh->flow_mreg_c[n++] = REG_C_1;
8482         /* Discover availability of other reg_c's. */
8483         for (idx = REG_C_2; idx <= REG_C_7; ++idx) {
8484                 struct rte_flow_attr attr = {
8485                         .group = MLX5_FLOW_MREG_CP_TABLE_GROUP,
8486                         .priority = MLX5_FLOW_LOWEST_PRIO_INDICATOR,
8487                         .ingress = 1,
8488                 };
8489                 struct rte_flow_item items[] = {
8490                         [0] = {
8491                                 .type = RTE_FLOW_ITEM_TYPE_END,
8492                         },
8493                 };
8494                 struct rte_flow_action actions[] = {
8495                         [0] = {
8496                                 .type = (enum rte_flow_action_type)
8497                                         MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
8498                                 .conf = &(struct mlx5_flow_action_copy_mreg){
8499                                         .src = REG_C_1,
8500                                         .dst = idx,
8501                                 },
8502                         },
8503                         [1] = {
8504                                 .type = RTE_FLOW_ACTION_TYPE_JUMP,
8505                                 .conf = &(struct rte_flow_action_jump){
8506                                         .group = MLX5_FLOW_MREG_ACT_TABLE_GROUP,
8507                                 },
8508                         },
8509                         [2] = {
8510                                 .type = RTE_FLOW_ACTION_TYPE_END,
8511                         },
8512                 };
8513                 uint32_t flow_idx;
8514                 struct rte_flow *flow;
8515                 struct rte_flow_error error;
8516
8517                 if (!priv->sh->config.dv_flow_en)
8518                         break;
8519                 /* Create internal flow, validation skips copy action. */
8520                 flow_idx = flow_list_create(dev, MLX5_FLOW_TYPE_GEN, &attr,
8521                                         items, actions, false, &error);
8522                 flow = mlx5_ipool_get(priv->flows[MLX5_FLOW_TYPE_GEN],
8523                                       flow_idx);
8524                 if (!flow)
8525                         continue;
8526                 priv->sh->flow_mreg_c[n++] = idx;
8527                 flow_list_destroy(dev, MLX5_FLOW_TYPE_GEN, flow_idx);
8528         }
8529         for (; n < MLX5_MREG_C_NUM; ++n)
8530                 priv->sh->flow_mreg_c[n] = REG_NON;
8531         priv->sh->metadata_regc_check_flag = 1;
8532         return 0;
8533 }
8534
8535 int
8536 save_dump_file(const uint8_t *data, uint32_t size,
8537         uint32_t type, uint64_t id, void *arg, FILE *file)
8538 {
8539         char line[BUF_SIZE];
8540         uint32_t out = 0;
8541         uint32_t k;
8542         uint32_t actions_num;
8543         struct rte_flow_query_count *count;
8544
8545         memset(line, 0, BUF_SIZE);
8546         switch (type) {
8547         case DR_DUMP_REC_TYPE_PMD_MODIFY_HDR:
8548                 actions_num = *(uint32_t *)(arg);
8549                 out += snprintf(line + out, BUF_SIZE - out, "%d,0x%" PRIx64 ",%d,",
8550                                 type, id, actions_num);
8551                 break;
8552         case DR_DUMP_REC_TYPE_PMD_PKT_REFORMAT:
8553                 out += snprintf(line + out, BUF_SIZE - out, "%d,0x%" PRIx64 ",",
8554                                 type, id);
8555                 break;
8556         case DR_DUMP_REC_TYPE_PMD_COUNTER:
8557                 count = (struct rte_flow_query_count *)arg;
8558                 fprintf(file,
8559                         "%d,0x%" PRIx64 ",%" PRIu64 ",%" PRIu64 "\n",
8560                         type, id, count->hits, count->bytes);
8561                 return 0;
8562         default:
8563                 return -1;
8564         }
8565
8566         for (k = 0; k < size; k++) {
8567                 /* Make sure we do not overrun the line buffer length. */
8568                 if (out >= BUF_SIZE - 4) {
8569                         line[out] = '\0';
8570                         break;
8571                 }
8572                 out += snprintf(line + out, BUF_SIZE - out, "%02x",
8573                                 (data[k]) & 0xff);
8574         }
8575         fprintf(file, "%s\n", line);
8576         return 0;
8577 }
8578
8579 int
8580 mlx5_flow_query_counter(struct rte_eth_dev *dev, struct rte_flow *flow,
8581         struct rte_flow_query_count *count, struct rte_flow_error *error)
8582 {
8583         struct rte_flow_action action[2];
8584         enum mlx5_flow_drv_type ftype;
8585         const struct mlx5_flow_driver_ops *fops;
8586
8587         if (!flow) {
8588                 return rte_flow_error_set(error, ENOENT,
8589                                 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8590                                 NULL,
8591                                 "invalid flow handle");
8592         }
8593         action[0].type = RTE_FLOW_ACTION_TYPE_COUNT;
8594         action[1].type = RTE_FLOW_ACTION_TYPE_END;
8595         if (flow->counter) {
8596                 memset(count, 0, sizeof(struct rte_flow_query_count));
8597                 ftype = (enum mlx5_flow_drv_type)(flow->drv_type);
8598                 MLX5_ASSERT(ftype > MLX5_FLOW_TYPE_MIN &&
8599                                                 ftype < MLX5_FLOW_TYPE_MAX);
8600                 fops = flow_get_drv_ops(ftype);
8601                 return fops->query(dev, flow, action, count, error);
8602         }
8603         return -1;
8604 }
8605
8606 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
8607 /**
8608  * Dump flow ipool data to file
8609  *
8610  * @param[in] dev
8611  *   The pointer to Ethernet device.
8612  * @param[in] file
8613  *   A pointer to a file for output.
8614  * @param[out] error
8615  *   Perform verbose error reporting if not NULL. PMDs initialize this
8616  *   structure in case of error only.
8617  * @return
8618  *   0 on success, a negative value otherwise.
8619  */
8620 int
8621 mlx5_flow_dev_dump_ipool(struct rte_eth_dev *dev,
8622         struct rte_flow *flow, FILE *file,
8623         struct rte_flow_error *error)
8624 {
8625         struct mlx5_priv *priv = dev->data->dev_private;
8626         struct mlx5_flow_dv_modify_hdr_resource  *modify_hdr;
8627         struct mlx5_flow_dv_encap_decap_resource *encap_decap;
8628         uint32_t handle_idx;
8629         struct mlx5_flow_handle *dh;
8630         struct rte_flow_query_count count;
8631         uint32_t actions_num;
8632         const uint8_t *data;
8633         size_t size;
8634         uint64_t id;
8635         uint32_t type;
8636         void *action = NULL;
8637
8638         if (!flow) {
8639                 return rte_flow_error_set(error, ENOENT,
8640                                 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8641                                 NULL,
8642                                 "invalid flow handle");
8643         }
8644         handle_idx = flow->dev_handles;
8645         /* query counter */
8646         if (flow->counter &&
8647         (!mlx5_counter_query(dev, flow->counter, false,
8648         &count.hits, &count.bytes, &action)) && action) {
8649                 id = (uint64_t)(uintptr_t)action;
8650                 type = DR_DUMP_REC_TYPE_PMD_COUNTER;
8651                 save_dump_file(NULL, 0, type,
8652                         id, (void *)&count, file);
8653         }
8654
8655         while (handle_idx) {
8656                 dh = mlx5_ipool_get(priv->sh->ipool
8657                                 [MLX5_IPOOL_MLX5_FLOW], handle_idx);
8658                 if (!dh)
8659                         continue;
8660                 handle_idx = dh->next.next;
8661
8662                 /* Get modify_hdr and encap_decap buf from ipools. */
8663                 encap_decap = NULL;
8664                 modify_hdr = dh->dvh.modify_hdr;
8665
8666                 if (dh->dvh.rix_encap_decap) {
8667                         encap_decap = mlx5_ipool_get(priv->sh->ipool
8668                                                 [MLX5_IPOOL_DECAP_ENCAP],
8669                                                 dh->dvh.rix_encap_decap);
8670                 }
8671                 if (modify_hdr) {
8672                         data = (const uint8_t *)modify_hdr->actions;
8673                         size = (size_t)(modify_hdr->actions_num) * 8;
8674                         id = (uint64_t)(uintptr_t)modify_hdr->action;
8675                         actions_num = modify_hdr->actions_num;
8676                         type = DR_DUMP_REC_TYPE_PMD_MODIFY_HDR;
8677                         save_dump_file(data, size, type, id,
8678                                                 (void *)(&actions_num), file);
8679                 }
8680                 if (encap_decap) {
8681                         data = encap_decap->buf;
8682                         size = encap_decap->size;
8683                         id = (uint64_t)(uintptr_t)encap_decap->action;
8684                         type = DR_DUMP_REC_TYPE_PMD_PKT_REFORMAT;
8685                         save_dump_file(data, size, type,
8686                                                 id, NULL, file);
8687                 }
8688         }
8689         return 0;
8690 }
8691
8692 /**
8693  * Dump all flow's encap_decap/modify_hdr/counter data to file
8694  *
8695  * @param[in] dev
8696  *   The pointer to Ethernet device.
8697  * @param[in] file
8698  *   A pointer to a file for output.
8699  * @param[out] error
8700  *   Perform verbose error reporting if not NULL. PMDs initialize this
8701  *   structure in case of error only.
8702  * @return
8703  *   0 on success, a negative value otherwise.
8704  */
8705 static int
8706 mlx5_flow_dev_dump_sh_all(struct rte_eth_dev *dev,
8707         FILE *file, struct rte_flow_error *error __rte_unused)
8708 {
8709         struct mlx5_priv *priv = dev->data->dev_private;
8710         struct mlx5_dev_ctx_shared *sh = priv->sh;
8711         struct mlx5_hlist *h;
8712         struct mlx5_flow_dv_modify_hdr_resource  *modify_hdr;
8713         struct mlx5_flow_dv_encap_decap_resource *encap_decap;
8714         struct rte_flow_query_count count;
8715         uint32_t actions_num;
8716         const uint8_t *data;
8717         size_t size;
8718         uint64_t id;
8719         uint32_t type;
8720         uint32_t i;
8721         uint32_t j;
8722         struct mlx5_list_inconst *l_inconst;
8723         struct mlx5_list_entry *e;
8724         int lcore_index;
8725         struct mlx5_flow_counter_mng *cmng = &priv->sh->cmng;
8726         uint32_t max;
8727         void *action;
8728
8729         /* encap_decap hlist is lcore_share, get global core cache. */
8730         i = MLX5_LIST_GLOBAL;
8731         h = sh->encaps_decaps;
8732         if (h) {
8733                 for (j = 0; j <= h->mask; j++) {
8734                         l_inconst = &h->buckets[j].l;
8735                         if (!l_inconst || !l_inconst->cache[i])
8736                                 continue;
8737
8738                         e = LIST_FIRST(&l_inconst->cache[i]->h);
8739                         while (e) {
8740                                 encap_decap =
8741                                 (struct mlx5_flow_dv_encap_decap_resource *)e;
8742                                 data = encap_decap->buf;
8743                                 size = encap_decap->size;
8744                                 id = (uint64_t)(uintptr_t)encap_decap->action;
8745                                 type = DR_DUMP_REC_TYPE_PMD_PKT_REFORMAT;
8746                                 save_dump_file(data, size, type,
8747                                         id, NULL, file);
8748                                 e = LIST_NEXT(e, next);
8749                         }
8750                 }
8751         }
8752
8753         /* get modify_hdr */
8754         h = sh->modify_cmds;
8755         if (h) {
8756                 lcore_index = rte_lcore_index(rte_lcore_id());
8757                 if (unlikely(lcore_index == -1)) {
8758                         lcore_index = MLX5_LIST_NLCORE;
8759                         rte_spinlock_lock(&h->l_const.lcore_lock);
8760                 }
8761                 i = lcore_index;
8762
8763                 for (j = 0; j <= h->mask; j++) {
8764                         l_inconst = &h->buckets[j].l;
8765                         if (!l_inconst || !l_inconst->cache[i])
8766                                 continue;
8767
8768                         e = LIST_FIRST(&l_inconst->cache[i]->h);
8769                         while (e) {
8770                                 modify_hdr =
8771                                 (struct mlx5_flow_dv_modify_hdr_resource *)e;
8772                                 data = (const uint8_t *)modify_hdr->actions;
8773                                 size = (size_t)(modify_hdr->actions_num) * 8;
8774                                 actions_num = modify_hdr->actions_num;
8775                                 id = (uint64_t)(uintptr_t)modify_hdr->action;
8776                                 type = DR_DUMP_REC_TYPE_PMD_MODIFY_HDR;
8777                                 save_dump_file(data, size, type, id,
8778                                                 (void *)(&actions_num), file);
8779                                 e = LIST_NEXT(e, next);
8780                         }
8781                 }
8782
8783                 if (unlikely(lcore_index == MLX5_LIST_NLCORE))
8784                         rte_spinlock_unlock(&h->l_const.lcore_lock);
8785         }
8786
8787         /* get counter */
8788         MLX5_ASSERT(cmng->n_valid <= cmng->n);
8789         max = MLX5_COUNTERS_PER_POOL * cmng->n_valid;
8790         for (j = 1; j <= max; j++) {
8791                 action = NULL;
8792                 if ((!mlx5_counter_query(dev, j, false, &count.hits,
8793                 &count.bytes, &action)) && action) {
8794                         id = (uint64_t)(uintptr_t)action;
8795                         type = DR_DUMP_REC_TYPE_PMD_COUNTER;
8796                         save_dump_file(NULL, 0, type,
8797                                         id, (void *)&count, file);
8798                 }
8799         }
8800         return 0;
8801 }
8802 #endif
8803
8804 /**
8805  * Dump flow raw hw data to file
8806  *
8807  * @param[in] dev
8808  *    The pointer to Ethernet device.
8809  * @param[in] file
8810  *   A pointer to a file for output.
8811  * @param[out] error
8812  *   Perform verbose error reporting if not NULL. PMDs initialize this
8813  *   structure in case of error only.
8814  * @return
8815  *   0 on success, a negative value otherwise.
8816  */
8817 int
8818 mlx5_flow_dev_dump(struct rte_eth_dev *dev, struct rte_flow *flow_idx,
8819                    FILE *file,
8820                    struct rte_flow_error *error __rte_unused)
8821 {
8822         struct mlx5_priv *priv = dev->data->dev_private;
8823         struct mlx5_dev_ctx_shared *sh = priv->sh;
8824         uint32_t handle_idx;
8825         int ret;
8826         struct mlx5_flow_handle *dh;
8827         struct rte_flow *flow;
8828
8829         if (!sh->config.dv_flow_en) {
8830                 if (fputs("device dv flow disabled\n", file) <= 0)
8831                         return -errno;
8832                 return -ENOTSUP;
8833         }
8834
8835         /* dump all */
8836         if (!flow_idx) {
8837 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
8838                 if (mlx5_flow_dev_dump_sh_all(dev, file, error))
8839                         return -EINVAL;
8840 #endif
8841                 return mlx5_devx_cmd_flow_dump(sh->fdb_domain,
8842                                         sh->rx_domain,
8843                                         sh->tx_domain, file);
8844         }
8845         /* dump one */
8846         flow = mlx5_ipool_get(priv->flows[MLX5_FLOW_TYPE_GEN],
8847                         (uintptr_t)(void *)flow_idx);
8848         if (!flow)
8849                 return -EINVAL;
8850
8851 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
8852         mlx5_flow_dev_dump_ipool(dev, flow, file, error);
8853 #endif
8854         handle_idx = flow->dev_handles;
8855         while (handle_idx) {
8856                 dh = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW],
8857                                 handle_idx);
8858                 if (!dh)
8859                         return -ENOENT;
8860                 if (dh->drv_flow) {
8861                         ret = mlx5_devx_cmd_flow_single_dump(dh->drv_flow,
8862                                         file);
8863                         if (ret)
8864                                 return -ENOENT;
8865                 }
8866                 handle_idx = dh->next.next;
8867         }
8868         return 0;
8869 }
8870
8871 /**
8872  * Get aged-out flows.
8873  *
8874  * @param[in] dev
8875  *   Pointer to the Ethernet device structure.
8876  * @param[in] context
8877  *   The address of an array of pointers to the aged-out flows contexts.
8878  * @param[in] nb_countexts
8879  *   The length of context array pointers.
8880  * @param[out] error
8881  *   Perform verbose error reporting if not NULL. Initialized in case of
8882  *   error only.
8883  *
8884  * @return
8885  *   how many contexts get in success, otherwise negative errno value.
8886  *   if nb_contexts is 0, return the amount of all aged contexts.
8887  *   if nb_contexts is not 0 , return the amount of aged flows reported
8888  *   in the context array.
8889  */
8890 int
8891 mlx5_flow_get_aged_flows(struct rte_eth_dev *dev, void **contexts,
8892                         uint32_t nb_contexts, struct rte_flow_error *error)
8893 {
8894         const struct mlx5_flow_driver_ops *fops;
8895         struct rte_flow_attr attr = { .transfer = 0 };
8896
8897         if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
8898                 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8899                 return fops->get_aged_flows(dev, contexts, nb_contexts,
8900                                                     error);
8901         }
8902         DRV_LOG(ERR,
8903                 "port %u get aged flows is not supported.",
8904                  dev->data->port_id);
8905         return -ENOTSUP;
8906 }
8907
8908 /* Wrapper for driver action_validate op callback */
8909 static int
8910 flow_drv_action_validate(struct rte_eth_dev *dev,
8911                          const struct rte_flow_indir_action_conf *conf,
8912                          const struct rte_flow_action *action,
8913                          const struct mlx5_flow_driver_ops *fops,
8914                          struct rte_flow_error *error)
8915 {
8916         static const char err_msg[] = "indirect action validation unsupported";
8917
8918         if (!fops->action_validate) {
8919                 DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
8920                 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
8921                                    NULL, err_msg);
8922                 return -rte_errno;
8923         }
8924         return fops->action_validate(dev, conf, action, error);
8925 }
8926
8927 /**
8928  * Destroys the shared action by handle.
8929  *
8930  * @param dev
8931  *   Pointer to Ethernet device structure.
8932  * @param[in] handle
8933  *   Handle for the indirect action object to be destroyed.
8934  * @param[out] error
8935  *   Perform verbose error reporting if not NULL. PMDs initialize this
8936  *   structure in case of error only.
8937  *
8938  * @return
8939  *   0 on success, a negative errno value otherwise and rte_errno is set.
8940  *
8941  * @note: wrapper for driver action_create op callback.
8942  */
8943 static int
8944 mlx5_action_handle_destroy(struct rte_eth_dev *dev,
8945                            struct rte_flow_action_handle *handle,
8946                            struct rte_flow_error *error)
8947 {
8948         static const char err_msg[] = "indirect action destruction unsupported";
8949         struct rte_flow_attr attr = { .transfer = 0 };
8950         const struct mlx5_flow_driver_ops *fops =
8951                         flow_get_drv_ops(flow_get_drv_type(dev, &attr));
8952
8953         if (!fops->action_destroy) {
8954                 DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
8955                 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
8956                                    NULL, err_msg);
8957                 return -rte_errno;
8958         }
8959         return fops->action_destroy(dev, handle, error);
8960 }
8961
8962 /* Wrapper for driver action_destroy op callback */
8963 static int
8964 flow_drv_action_update(struct rte_eth_dev *dev,
8965                        struct rte_flow_action_handle *handle,
8966                        const void *update,
8967                        const struct mlx5_flow_driver_ops *fops,
8968                        struct rte_flow_error *error)
8969 {
8970         static const char err_msg[] = "indirect action update unsupported";
8971
8972         if (!fops->action_update) {
8973                 DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
8974                 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
8975                                    NULL, err_msg);
8976                 return -rte_errno;
8977         }
8978         return fops->action_update(dev, handle, update, error);
8979 }
8980
8981 /* Wrapper for driver action_destroy op callback */
8982 static int
8983 flow_drv_action_query(struct rte_eth_dev *dev,
8984                       const struct rte_flow_action_handle *handle,
8985                       void *data,
8986                       const struct mlx5_flow_driver_ops *fops,
8987                       struct rte_flow_error *error)
8988 {
8989         static const char err_msg[] = "indirect action query unsupported";
8990
8991         if (!fops->action_query) {
8992                 DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
8993                 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
8994                                    NULL, err_msg);
8995                 return -rte_errno;
8996         }
8997         return fops->action_query(dev, handle, data, error);
8998 }
8999
9000 /**
9001  * Create indirect action for reuse in multiple flow rules.
9002  *
9003  * @param dev
9004  *   Pointer to Ethernet device structure.
9005  * @param conf
9006  *   Pointer to indirect action object configuration.
9007  * @param[in] action
9008  *   Action configuration for indirect action object creation.
9009  * @param[out] error
9010  *   Perform verbose error reporting if not NULL. PMDs initialize this
9011  *   structure in case of error only.
9012  * @return
9013  *   A valid handle in case of success, NULL otherwise and rte_errno is set.
9014  */
9015 static struct rte_flow_action_handle *
9016 mlx5_action_handle_create(struct rte_eth_dev *dev,
9017                           const struct rte_flow_indir_action_conf *conf,
9018                           const struct rte_flow_action *action,
9019                           struct rte_flow_error *error)
9020 {
9021         static const char err_msg[] = "indirect action creation unsupported";
9022         struct rte_flow_attr attr = { .transfer = 0 };
9023         const struct mlx5_flow_driver_ops *fops =
9024                         flow_get_drv_ops(flow_get_drv_type(dev, &attr));
9025
9026         if (flow_drv_action_validate(dev, conf, action, fops, error))
9027                 return NULL;
9028         if (!fops->action_create) {
9029                 DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
9030                 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
9031                                    NULL, err_msg);
9032                 return NULL;
9033         }
9034         return fops->action_create(dev, conf, action, error);
9035 }
9036
9037 /**
9038  * Updates inplace the indirect action configuration pointed by *handle*
9039  * with the configuration provided as *update* argument.
9040  * The update of the indirect action configuration effects all flow rules
9041  * reusing the action via handle.
9042  *
9043  * @param dev
9044  *   Pointer to Ethernet device structure.
9045  * @param[in] handle
9046  *   Handle for the indirect action to be updated.
9047  * @param[in] update
9048  *   Action specification used to modify the action pointed by handle.
9049  *   *update* could be of same type with the action pointed by the *handle*
9050  *   handle argument, or some other structures like a wrapper, depending on
9051  *   the indirect action type.
9052  * @param[out] error
9053  *   Perform verbose error reporting if not NULL. PMDs initialize this
9054  *   structure in case of error only.
9055  *
9056  * @return
9057  *   0 on success, a negative errno value otherwise and rte_errno is set.
9058  */
9059 static int
9060 mlx5_action_handle_update(struct rte_eth_dev *dev,
9061                 struct rte_flow_action_handle *handle,
9062                 const void *update,
9063                 struct rte_flow_error *error)
9064 {
9065         struct rte_flow_attr attr = { .transfer = 0 };
9066         const struct mlx5_flow_driver_ops *fops =
9067                         flow_get_drv_ops(flow_get_drv_type(dev, &attr));
9068         int ret;
9069
9070         ret = flow_drv_action_validate(dev, NULL,
9071                         (const struct rte_flow_action *)update, fops, error);
9072         if (ret)
9073                 return ret;
9074         return flow_drv_action_update(dev, handle, update, fops,
9075                                       error);
9076 }
9077
9078 /**
9079  * Query the indirect action by handle.
9080  *
9081  * This function allows retrieving action-specific data such as counters.
9082  * Data is gathered by special action which may be present/referenced in
9083  * more than one flow rule definition.
9084  *
9085  * see @RTE_FLOW_ACTION_TYPE_COUNT
9086  *
9087  * @param dev
9088  *   Pointer to Ethernet device structure.
9089  * @param[in] handle
9090  *   Handle for the indirect action to query.
9091  * @param[in, out] data
9092  *   Pointer to storage for the associated query data type.
9093  * @param[out] error
9094  *   Perform verbose error reporting if not NULL. PMDs initialize this
9095  *   structure in case of error only.
9096  *
9097  * @return
9098  *   0 on success, a negative errno value otherwise and rte_errno is set.
9099  */
9100 static int
9101 mlx5_action_handle_query(struct rte_eth_dev *dev,
9102                          const struct rte_flow_action_handle *handle,
9103                          void *data,
9104                          struct rte_flow_error *error)
9105 {
9106         struct rte_flow_attr attr = { .transfer = 0 };
9107         const struct mlx5_flow_driver_ops *fops =
9108                         flow_get_drv_ops(flow_get_drv_type(dev, &attr));
9109
9110         return flow_drv_action_query(dev, handle, data, fops, error);
9111 }
9112
9113 /**
9114  * Destroy all indirect actions (shared RSS).
9115  *
9116  * @param dev
9117  *   Pointer to Ethernet device.
9118  *
9119  * @return
9120  *   0 on success, a negative errno value otherwise and rte_errno is set.
9121  */
9122 int
9123 mlx5_action_handle_flush(struct rte_eth_dev *dev)
9124 {
9125         struct rte_flow_error error;
9126         struct mlx5_priv *priv = dev->data->dev_private;
9127         struct mlx5_shared_action_rss *shared_rss;
9128         int ret = 0;
9129         uint32_t idx;
9130
9131         ILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS],
9132                       priv->rss_shared_actions, idx, shared_rss, next) {
9133                 ret |= mlx5_action_handle_destroy(dev,
9134                        (struct rte_flow_action_handle *)(uintptr_t)idx, &error);
9135         }
9136         return ret;
9137 }
9138
9139 /**
9140  * Validate existing indirect actions against current device configuration
9141  * and attach them to device resources.
9142  *
9143  * @param dev
9144  *   Pointer to Ethernet device.
9145  *
9146  * @return
9147  *   0 on success, a negative errno value otherwise and rte_errno is set.
9148  */
9149 int
9150 mlx5_action_handle_attach(struct rte_eth_dev *dev)
9151 {
9152         struct mlx5_priv *priv = dev->data->dev_private;
9153         struct mlx5_indexed_pool *ipool =
9154                         priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS];
9155         struct mlx5_shared_action_rss *shared_rss, *shared_rss_last;
9156         int ret = 0;
9157         uint32_t idx;
9158
9159         ILIST_FOREACH(ipool, priv->rss_shared_actions, idx, shared_rss, next) {
9160                 struct mlx5_ind_table_obj *ind_tbl = shared_rss->ind_tbl;
9161                 const char *message;
9162                 uint32_t queue_idx;
9163
9164                 ret = mlx5_validate_rss_queues(dev, ind_tbl->queues,
9165                                                ind_tbl->queues_n,
9166                                                &message, &queue_idx);
9167                 if (ret != 0) {
9168                         DRV_LOG(ERR, "Port %u cannot use queue %u in RSS: %s",
9169                                 dev->data->port_id, ind_tbl->queues[queue_idx],
9170                                 message);
9171                         break;
9172                 }
9173         }
9174         if (ret != 0)
9175                 return ret;
9176         ILIST_FOREACH(ipool, priv->rss_shared_actions, idx, shared_rss, next) {
9177                 struct mlx5_ind_table_obj *ind_tbl = shared_rss->ind_tbl;
9178
9179                 ret = mlx5_ind_table_obj_attach(dev, ind_tbl);
9180                 if (ret != 0) {
9181                         DRV_LOG(ERR, "Port %u could not attach "
9182                                 "indirection table obj %p",
9183                                 dev->data->port_id, (void *)ind_tbl);
9184                         goto error;
9185                 }
9186         }
9187         return 0;
9188 error:
9189         shared_rss_last = shared_rss;
9190         ILIST_FOREACH(ipool, priv->rss_shared_actions, idx, shared_rss, next) {
9191                 struct mlx5_ind_table_obj *ind_tbl = shared_rss->ind_tbl;
9192
9193                 if (shared_rss == shared_rss_last)
9194                         break;
9195                 if (mlx5_ind_table_obj_detach(dev, ind_tbl) != 0)
9196                         DRV_LOG(CRIT, "Port %u could not detach "
9197                                 "indirection table obj %p on rollback",
9198                                 dev->data->port_id, (void *)ind_tbl);
9199         }
9200         return ret;
9201 }
9202
9203 /**
9204  * Detach indirect actions of the device from its resources.
9205  *
9206  * @param dev
9207  *   Pointer to Ethernet device.
9208  *
9209  * @return
9210  *   0 on success, a negative errno value otherwise and rte_errno is set.
9211  */
9212 int
9213 mlx5_action_handle_detach(struct rte_eth_dev *dev)
9214 {
9215         struct mlx5_priv *priv = dev->data->dev_private;
9216         struct mlx5_indexed_pool *ipool =
9217                         priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS];
9218         struct mlx5_shared_action_rss *shared_rss, *shared_rss_last;
9219         int ret = 0;
9220         uint32_t idx;
9221
9222         ILIST_FOREACH(ipool, priv->rss_shared_actions, idx, shared_rss, next) {
9223                 struct mlx5_ind_table_obj *ind_tbl = shared_rss->ind_tbl;
9224
9225                 ret = mlx5_ind_table_obj_detach(dev, ind_tbl);
9226                 if (ret != 0) {
9227                         DRV_LOG(ERR, "Port %u could not detach "
9228                                 "indirection table obj %p",
9229                                 dev->data->port_id, (void *)ind_tbl);
9230                         goto error;
9231                 }
9232         }
9233         return 0;
9234 error:
9235         shared_rss_last = shared_rss;
9236         ILIST_FOREACH(ipool, priv->rss_shared_actions, idx, shared_rss, next) {
9237                 struct mlx5_ind_table_obj *ind_tbl = shared_rss->ind_tbl;
9238
9239                 if (shared_rss == shared_rss_last)
9240                         break;
9241                 if (mlx5_ind_table_obj_attach(dev, ind_tbl) != 0)
9242                         DRV_LOG(CRIT, "Port %u could not attach "
9243                                 "indirection table obj %p on rollback",
9244                                 dev->data->port_id, (void *)ind_tbl);
9245         }
9246         return ret;
9247 }
9248
9249 #ifndef HAVE_MLX5DV_DR
9250 #define MLX5_DOMAIN_SYNC_FLOW ((1 << 0) | (1 << 1))
9251 #else
9252 #define MLX5_DOMAIN_SYNC_FLOW \
9253         (MLX5DV_DR_DOMAIN_SYNC_FLAGS_SW | MLX5DV_DR_DOMAIN_SYNC_FLAGS_HW)
9254 #endif
9255
9256 int rte_pmd_mlx5_sync_flow(uint16_t port_id, uint32_t domains)
9257 {
9258         struct rte_eth_dev *dev = &rte_eth_devices[port_id];
9259         const struct mlx5_flow_driver_ops *fops;
9260         int ret;
9261         struct rte_flow_attr attr = { .transfer = 0 };
9262
9263         fops = flow_get_drv_ops(flow_get_drv_type(dev, &attr));
9264         ret = fops->sync_domain(dev, domains, MLX5_DOMAIN_SYNC_FLOW);
9265         if (ret > 0)
9266                 ret = -ret;
9267         return ret;
9268 }
9269
9270 const struct mlx5_flow_tunnel *
9271 mlx5_get_tof(const struct rte_flow_item *item,
9272              const struct rte_flow_action *action,
9273              enum mlx5_tof_rule_type *rule_type)
9274 {
9275         for (; item->type != RTE_FLOW_ITEM_TYPE_END; item++) {
9276                 if (item->type == (typeof(item->type))
9277                                   MLX5_RTE_FLOW_ITEM_TYPE_TUNNEL) {
9278                         *rule_type = MLX5_TUNNEL_OFFLOAD_MATCH_RULE;
9279                         return flow_items_to_tunnel(item);
9280                 }
9281         }
9282         for (; action->conf != RTE_FLOW_ACTION_TYPE_END; action++) {
9283                 if (action->type == (typeof(action->type))
9284                                     MLX5_RTE_FLOW_ACTION_TYPE_TUNNEL_SET) {
9285                         *rule_type = MLX5_TUNNEL_OFFLOAD_SET_RULE;
9286                         return flow_actions_to_tunnel(action);
9287                 }
9288         }
9289         return NULL;
9290 }
9291
9292 /**
9293  * tunnel offload functionality is defined for DV environment only
9294  */
9295 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
9296 __extension__
9297 union tunnel_offload_mark {
9298         uint32_t val;
9299         struct {
9300                 uint32_t app_reserve:8;
9301                 uint32_t table_id:15;
9302                 uint32_t transfer:1;
9303                 uint32_t _unused_:8;
9304         };
9305 };
9306
9307 static bool
9308 mlx5_access_tunnel_offload_db
9309         (struct rte_eth_dev *dev,
9310          bool (*match)(struct rte_eth_dev *,
9311                        struct mlx5_flow_tunnel *, const void *),
9312          void (*hit)(struct rte_eth_dev *, struct mlx5_flow_tunnel *, void *),
9313          void (*miss)(struct rte_eth_dev *, void *),
9314          void *ctx, bool lock_op);
9315
9316 static int
9317 flow_tunnel_add_default_miss(struct rte_eth_dev *dev,
9318                              struct rte_flow *flow,
9319                              const struct rte_flow_attr *attr,
9320                              const struct rte_flow_action *app_actions,
9321                              uint32_t flow_idx,
9322                              const struct mlx5_flow_tunnel *tunnel,
9323                              struct tunnel_default_miss_ctx *ctx,
9324                              struct rte_flow_error *error)
9325 {
9326         struct mlx5_priv *priv = dev->data->dev_private;
9327         struct mlx5_flow *dev_flow;
9328         struct rte_flow_attr miss_attr = *attr;
9329         const struct rte_flow_item miss_items[2] = {
9330                 {
9331                         .type = RTE_FLOW_ITEM_TYPE_ETH,
9332                         .spec = NULL,
9333                         .last = NULL,
9334                         .mask = NULL
9335                 },
9336                 {
9337                         .type = RTE_FLOW_ITEM_TYPE_END,
9338                         .spec = NULL,
9339                         .last = NULL,
9340                         .mask = NULL
9341                 }
9342         };
9343         union tunnel_offload_mark mark_id;
9344         struct rte_flow_action_mark miss_mark;
9345         struct rte_flow_action miss_actions[3] = {
9346                 [0] = { .type = RTE_FLOW_ACTION_TYPE_MARK, .conf = &miss_mark },
9347                 [2] = { .type = RTE_FLOW_ACTION_TYPE_END,  .conf = NULL }
9348         };
9349         const struct rte_flow_action_jump *jump_data;
9350         uint32_t i, flow_table = 0; /* prevent compilation warning */
9351         struct flow_grp_info grp_info = {
9352                 .external = 1,
9353                 .transfer = attr->transfer,
9354                 .fdb_def_rule = !!priv->fdb_def_rule,
9355                 .std_tbl_fix = 0,
9356         };
9357         int ret;
9358
9359         if (!attr->transfer) {
9360                 uint32_t q_size;
9361
9362                 miss_actions[1].type = RTE_FLOW_ACTION_TYPE_RSS;
9363                 q_size = priv->reta_idx_n * sizeof(ctx->queue[0]);
9364                 ctx->queue = mlx5_malloc(MLX5_MEM_SYS | MLX5_MEM_ZERO, q_size,
9365                                          0, SOCKET_ID_ANY);
9366                 if (!ctx->queue)
9367                         return rte_flow_error_set
9368                                 (error, ENOMEM,
9369                                 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
9370                                 NULL, "invalid default miss RSS");
9371                 ctx->action_rss.func = RTE_ETH_HASH_FUNCTION_DEFAULT,
9372                 ctx->action_rss.level = 0,
9373                 ctx->action_rss.types = priv->rss_conf.rss_hf,
9374                 ctx->action_rss.key_len = priv->rss_conf.rss_key_len,
9375                 ctx->action_rss.queue_num = priv->reta_idx_n,
9376                 ctx->action_rss.key = priv->rss_conf.rss_key,
9377                 ctx->action_rss.queue = ctx->queue;
9378                 if (!priv->reta_idx_n || !priv->rxqs_n)
9379                         return rte_flow_error_set
9380                                 (error, EINVAL,
9381                                 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
9382                                 NULL, "invalid port configuration");
9383                 if (!(dev->data->dev_conf.rxmode.mq_mode & RTE_ETH_MQ_RX_RSS_FLAG))
9384                         ctx->action_rss.types = 0;
9385                 for (i = 0; i != priv->reta_idx_n; ++i)
9386                         ctx->queue[i] = (*priv->reta_idx)[i];
9387         } else {
9388                 miss_actions[1].type = RTE_FLOW_ACTION_TYPE_JUMP;
9389                 ctx->miss_jump.group = MLX5_TNL_MISS_FDB_JUMP_GRP;
9390         }
9391         miss_actions[1].conf = (typeof(miss_actions[1].conf))ctx->raw;
9392         for (; app_actions->type != RTE_FLOW_ACTION_TYPE_JUMP; app_actions++);
9393         jump_data = app_actions->conf;
9394         miss_attr.priority = MLX5_TNL_MISS_RULE_PRIORITY;
9395         miss_attr.group = jump_data->group;
9396         ret = mlx5_flow_group_to_table(dev, tunnel, jump_data->group,
9397                                        &flow_table, &grp_info, error);
9398         if (ret)
9399                 return rte_flow_error_set(error, EINVAL,
9400                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
9401                                           NULL, "invalid tunnel id");
9402         mark_id.app_reserve = 0;
9403         mark_id.table_id = tunnel_flow_tbl_to_id(flow_table);
9404         mark_id.transfer = !!attr->transfer;
9405         mark_id._unused_ = 0;
9406         miss_mark.id = mark_id.val;
9407         dev_flow = flow_drv_prepare(dev, flow, &miss_attr,
9408                                     miss_items, miss_actions, flow_idx, error);
9409         if (!dev_flow)
9410                 return -rte_errno;
9411         dev_flow->flow = flow;
9412         dev_flow->external = true;
9413         dev_flow->tunnel = tunnel;
9414         dev_flow->tof_type = MLX5_TUNNEL_OFFLOAD_MISS_RULE;
9415         /* Subflow object was created, we must include one in the list. */
9416         SILIST_INSERT(&flow->dev_handles, dev_flow->handle_idx,
9417                       dev_flow->handle, next);
9418         DRV_LOG(DEBUG,
9419                 "port %u tunnel type=%d id=%u miss rule priority=%u group=%u",
9420                 dev->data->port_id, tunnel->app_tunnel.type,
9421                 tunnel->tunnel_id, miss_attr.priority, miss_attr.group);
9422         ret = flow_drv_translate(dev, dev_flow, &miss_attr, miss_items,
9423                                   miss_actions, error);
9424         if (!ret)
9425                 ret = flow_mreg_update_copy_table(dev, flow, miss_actions,
9426                                                   error);
9427
9428         return ret;
9429 }
9430
9431 static const struct mlx5_flow_tbl_data_entry  *
9432 tunnel_mark_decode(struct rte_eth_dev *dev, uint32_t mark)
9433 {
9434         struct mlx5_priv *priv = dev->data->dev_private;
9435         struct mlx5_dev_ctx_shared *sh = priv->sh;
9436         struct mlx5_list_entry *he;
9437         union tunnel_offload_mark mbits = { .val = mark };
9438         union mlx5_flow_tbl_key table_key = {
9439                 {
9440                         .level = tunnel_id_to_flow_tbl(mbits.table_id),
9441                         .id = 0,
9442                         .reserved = 0,
9443                         .dummy = 0,
9444                         .is_fdb = !!mbits.transfer,
9445                         .is_egress = 0,
9446                 }
9447         };
9448         struct mlx5_flow_cb_ctx ctx = {
9449                 .data = &table_key.v64,
9450         };
9451
9452         he = mlx5_hlist_lookup(sh->flow_tbls, table_key.v64, &ctx);
9453         return he ?
9454                container_of(he, struct mlx5_flow_tbl_data_entry, entry) : NULL;
9455 }
9456
9457 static void
9458 mlx5_flow_tunnel_grp2tbl_remove_cb(void *tool_ctx,
9459                                    struct mlx5_list_entry *entry)
9460 {
9461         struct mlx5_dev_ctx_shared *sh = tool_ctx;
9462         struct tunnel_tbl_entry *tte = container_of(entry, typeof(*tte), hash);
9463
9464         mlx5_ipool_free(sh->ipool[MLX5_IPOOL_TNL_TBL_ID],
9465                         tunnel_flow_tbl_to_id(tte->flow_table));
9466         mlx5_free(tte);
9467 }
9468
9469 static int
9470 mlx5_flow_tunnel_grp2tbl_match_cb(void *tool_ctx __rte_unused,
9471                                   struct mlx5_list_entry *entry, void *cb_ctx)
9472 {
9473         struct mlx5_flow_cb_ctx *ctx = cb_ctx;
9474         union tunnel_tbl_key tbl = {
9475                 .val = *(uint64_t *)(ctx->data),
9476         };
9477         struct tunnel_tbl_entry *tte = container_of(entry, typeof(*tte), hash);
9478
9479         return tbl.tunnel_id != tte->tunnel_id || tbl.group != tte->group;
9480 }
9481
9482 static struct mlx5_list_entry *
9483 mlx5_flow_tunnel_grp2tbl_create_cb(void *tool_ctx, void *cb_ctx)
9484 {
9485         struct mlx5_dev_ctx_shared *sh = tool_ctx;
9486         struct mlx5_flow_cb_ctx *ctx = cb_ctx;
9487         struct tunnel_tbl_entry *tte;
9488         union tunnel_tbl_key tbl = {
9489                 .val = *(uint64_t *)(ctx->data),
9490         };
9491
9492         tte = mlx5_malloc(MLX5_MEM_SYS | MLX5_MEM_ZERO,
9493                           sizeof(*tte), 0,
9494                           SOCKET_ID_ANY);
9495         if (!tte)
9496                 goto err;
9497         mlx5_ipool_malloc(sh->ipool[MLX5_IPOOL_TNL_TBL_ID],
9498                           &tte->flow_table);
9499         if (tte->flow_table >= MLX5_MAX_TABLES) {
9500                 DRV_LOG(ERR, "Tunnel TBL ID %d exceed max limit.",
9501                         tte->flow_table);
9502                 mlx5_ipool_free(sh->ipool[MLX5_IPOOL_TNL_TBL_ID],
9503                                 tte->flow_table);
9504                 goto err;
9505         } else if (!tte->flow_table) {
9506                 goto err;
9507         }
9508         tte->flow_table = tunnel_id_to_flow_tbl(tte->flow_table);
9509         tte->tunnel_id = tbl.tunnel_id;
9510         tte->group = tbl.group;
9511         return &tte->hash;
9512 err:
9513         if (tte)
9514                 mlx5_free(tte);
9515         return NULL;
9516 }
9517
9518 static struct mlx5_list_entry *
9519 mlx5_flow_tunnel_grp2tbl_clone_cb(void *tool_ctx __rte_unused,
9520                                   struct mlx5_list_entry *oentry,
9521                                   void *cb_ctx __rte_unused)
9522 {
9523         struct tunnel_tbl_entry *tte = mlx5_malloc(MLX5_MEM_SYS, sizeof(*tte),
9524                                                    0, SOCKET_ID_ANY);
9525
9526         if (!tte)
9527                 return NULL;
9528         memcpy(tte, oentry, sizeof(*tte));
9529         return &tte->hash;
9530 }
9531
9532 static void
9533 mlx5_flow_tunnel_grp2tbl_clone_free_cb(void *tool_ctx __rte_unused,
9534                                        struct mlx5_list_entry *entry)
9535 {
9536         struct tunnel_tbl_entry *tte = container_of(entry, typeof(*tte), hash);
9537
9538         mlx5_free(tte);
9539 }
9540
9541 static uint32_t
9542 tunnel_flow_group_to_flow_table(struct rte_eth_dev *dev,
9543                                 const struct mlx5_flow_tunnel *tunnel,
9544                                 uint32_t group, uint32_t *table,
9545                                 struct rte_flow_error *error)
9546 {
9547         struct mlx5_list_entry *he;
9548         struct tunnel_tbl_entry *tte;
9549         union tunnel_tbl_key key = {
9550                 .tunnel_id = tunnel ? tunnel->tunnel_id : 0,
9551                 .group = group
9552         };
9553         struct mlx5_flow_tunnel_hub *thub = mlx5_tunnel_hub(dev);
9554         struct mlx5_hlist *group_hash;
9555         struct mlx5_flow_cb_ctx ctx = {
9556                 .data = &key.val,
9557         };
9558
9559         group_hash = tunnel ? tunnel->groups : thub->groups;
9560         he = mlx5_hlist_register(group_hash, key.val, &ctx);
9561         if (!he)
9562                 return rte_flow_error_set(error, EINVAL,
9563                                           RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
9564                                           NULL,
9565                                           "tunnel group index not supported");
9566         tte = container_of(he, typeof(*tte), hash);
9567         *table = tte->flow_table;
9568         DRV_LOG(DEBUG, "port %u tunnel %u group=%#x table=%#x",
9569                 dev->data->port_id, key.tunnel_id, group, *table);
9570         return 0;
9571 }
9572
9573 static void
9574 mlx5_flow_tunnel_free(struct rte_eth_dev *dev,
9575                       struct mlx5_flow_tunnel *tunnel)
9576 {
9577         struct mlx5_priv *priv = dev->data->dev_private;
9578         struct mlx5_indexed_pool *ipool;
9579
9580         DRV_LOG(DEBUG, "port %u release pmd tunnel id=0x%x",
9581                 dev->data->port_id, tunnel->tunnel_id);
9582         LIST_REMOVE(tunnel, chain);
9583         mlx5_hlist_destroy(tunnel->groups);
9584         ipool = priv->sh->ipool[MLX5_IPOOL_TUNNEL_ID];
9585         mlx5_ipool_free(ipool, tunnel->tunnel_id);
9586 }
9587
9588 static bool
9589 mlx5_access_tunnel_offload_db
9590         (struct rte_eth_dev *dev,
9591          bool (*match)(struct rte_eth_dev *,
9592                        struct mlx5_flow_tunnel *, const void *),
9593          void (*hit)(struct rte_eth_dev *, struct mlx5_flow_tunnel *, void *),
9594          void (*miss)(struct rte_eth_dev *, void *),
9595          void *ctx, bool lock_op)
9596 {
9597         bool verdict = false;
9598         struct mlx5_flow_tunnel_hub *thub = mlx5_tunnel_hub(dev);
9599         struct mlx5_flow_tunnel *tunnel;
9600
9601         rte_spinlock_lock(&thub->sl);
9602         LIST_FOREACH(tunnel, &thub->tunnels, chain) {
9603                 verdict = match(dev, tunnel, (const void *)ctx);
9604                 if (verdict)
9605                         break;
9606         }
9607         if (!lock_op)
9608                 rte_spinlock_unlock(&thub->sl);
9609         if (verdict && hit)
9610                 hit(dev, tunnel, ctx);
9611         if (!verdict && miss)
9612                 miss(dev, ctx);
9613         if (lock_op)
9614                 rte_spinlock_unlock(&thub->sl);
9615
9616         return verdict;
9617 }
9618
9619 struct tunnel_db_find_tunnel_id_ctx {
9620         uint32_t tunnel_id;
9621         struct mlx5_flow_tunnel *tunnel;
9622 };
9623
9624 static bool
9625 find_tunnel_id_match(struct rte_eth_dev *dev,
9626                      struct mlx5_flow_tunnel *tunnel, const void *x)
9627 {
9628         const struct tunnel_db_find_tunnel_id_ctx *ctx = x;
9629
9630         RTE_SET_USED(dev);
9631         return tunnel->tunnel_id == ctx->tunnel_id;
9632 }
9633
9634 static void
9635 find_tunnel_id_hit(struct rte_eth_dev *dev,
9636                    struct mlx5_flow_tunnel *tunnel, void *x)
9637 {
9638         struct tunnel_db_find_tunnel_id_ctx *ctx = x;
9639         RTE_SET_USED(dev);
9640         ctx->tunnel = tunnel;
9641 }
9642
9643 static struct mlx5_flow_tunnel *
9644 mlx5_find_tunnel_id(struct rte_eth_dev *dev, uint32_t id)
9645 {
9646         struct tunnel_db_find_tunnel_id_ctx ctx = {
9647                 .tunnel_id = id,
9648         };
9649
9650         mlx5_access_tunnel_offload_db(dev, find_tunnel_id_match,
9651                                       find_tunnel_id_hit, NULL, &ctx, true);
9652
9653         return ctx.tunnel;
9654 }
9655
9656 static struct mlx5_flow_tunnel *
9657 mlx5_flow_tunnel_allocate(struct rte_eth_dev *dev,
9658                           const struct rte_flow_tunnel *app_tunnel)
9659 {
9660         struct mlx5_priv *priv = dev->data->dev_private;
9661         struct mlx5_indexed_pool *ipool;
9662         struct mlx5_flow_tunnel *tunnel;
9663         uint32_t id;
9664
9665         ipool = priv->sh->ipool[MLX5_IPOOL_TUNNEL_ID];
9666         tunnel = mlx5_ipool_zmalloc(ipool, &id);
9667         if (!tunnel)
9668                 return NULL;
9669         if (id >= MLX5_MAX_TUNNELS) {
9670                 mlx5_ipool_free(ipool, id);
9671                 DRV_LOG(ERR, "Tunnel ID %d exceed max limit.", id);
9672                 return NULL;
9673         }
9674         tunnel->groups = mlx5_hlist_create("tunnel groups", 64, false, true,
9675                                            priv->sh,
9676                                            mlx5_flow_tunnel_grp2tbl_create_cb,
9677                                            mlx5_flow_tunnel_grp2tbl_match_cb,
9678                                            mlx5_flow_tunnel_grp2tbl_remove_cb,
9679                                            mlx5_flow_tunnel_grp2tbl_clone_cb,
9680                                         mlx5_flow_tunnel_grp2tbl_clone_free_cb);
9681         if (!tunnel->groups) {
9682                 mlx5_ipool_free(ipool, id);
9683                 return NULL;
9684         }
9685         /* initiate new PMD tunnel */
9686         memcpy(&tunnel->app_tunnel, app_tunnel, sizeof(*app_tunnel));
9687         tunnel->tunnel_id = id;
9688         tunnel->action.type = (typeof(tunnel->action.type))
9689                               MLX5_RTE_FLOW_ACTION_TYPE_TUNNEL_SET;
9690         tunnel->action.conf = tunnel;
9691         tunnel->item.type = (typeof(tunnel->item.type))
9692                             MLX5_RTE_FLOW_ITEM_TYPE_TUNNEL;
9693         tunnel->item.spec = tunnel;
9694         tunnel->item.last = NULL;
9695         tunnel->item.mask = NULL;
9696
9697         DRV_LOG(DEBUG, "port %u new pmd tunnel id=0x%x",
9698                 dev->data->port_id, tunnel->tunnel_id);
9699
9700         return tunnel;
9701 }
9702
9703 struct tunnel_db_get_tunnel_ctx {
9704         const struct rte_flow_tunnel *app_tunnel;
9705         struct mlx5_flow_tunnel *tunnel;
9706 };
9707
9708 static bool get_tunnel_match(struct rte_eth_dev *dev,
9709                              struct mlx5_flow_tunnel *tunnel, const void *x)
9710 {
9711         const struct tunnel_db_get_tunnel_ctx *ctx = x;
9712
9713         RTE_SET_USED(dev);
9714         return !memcmp(ctx->app_tunnel, &tunnel->app_tunnel,
9715                        sizeof(*ctx->app_tunnel));
9716 }
9717
9718 static void get_tunnel_hit(struct rte_eth_dev *dev,
9719                            struct mlx5_flow_tunnel *tunnel, void *x)
9720 {
9721         /* called under tunnel spinlock protection */
9722         struct tunnel_db_get_tunnel_ctx *ctx = x;
9723
9724         RTE_SET_USED(dev);
9725         tunnel->refctn++;
9726         ctx->tunnel = tunnel;
9727 }
9728
9729 static void get_tunnel_miss(struct rte_eth_dev *dev, void *x)
9730 {
9731         /* called under tunnel spinlock protection */
9732         struct mlx5_flow_tunnel_hub *thub = mlx5_tunnel_hub(dev);
9733         struct tunnel_db_get_tunnel_ctx *ctx = x;
9734
9735         rte_spinlock_unlock(&thub->sl);
9736         ctx->tunnel = mlx5_flow_tunnel_allocate(dev, ctx->app_tunnel);
9737         rte_spinlock_lock(&thub->sl);
9738         if (ctx->tunnel) {
9739                 ctx->tunnel->refctn = 1;
9740                 LIST_INSERT_HEAD(&thub->tunnels, ctx->tunnel, chain);
9741         }
9742 }
9743
9744
9745 static int
9746 mlx5_get_flow_tunnel(struct rte_eth_dev *dev,
9747                      const struct rte_flow_tunnel *app_tunnel,
9748                      struct mlx5_flow_tunnel **tunnel)
9749 {
9750         struct tunnel_db_get_tunnel_ctx ctx = {
9751                 .app_tunnel = app_tunnel,
9752         };
9753
9754         mlx5_access_tunnel_offload_db(dev, get_tunnel_match, get_tunnel_hit,
9755                                       get_tunnel_miss, &ctx, true);
9756         *tunnel = ctx.tunnel;
9757         return ctx.tunnel ? 0 : -ENOMEM;
9758 }
9759
9760 void mlx5_release_tunnel_hub(struct mlx5_dev_ctx_shared *sh, uint16_t port_id)
9761 {
9762         struct mlx5_flow_tunnel_hub *thub = sh->tunnel_hub;
9763
9764         if (!thub)
9765                 return;
9766         if (!LIST_EMPTY(&thub->tunnels))
9767                 DRV_LOG(WARNING, "port %u tunnels present", port_id);
9768         mlx5_hlist_destroy(thub->groups);
9769         mlx5_free(thub);
9770 }
9771
9772 int mlx5_alloc_tunnel_hub(struct mlx5_dev_ctx_shared *sh)
9773 {
9774         int err;
9775         struct mlx5_flow_tunnel_hub *thub;
9776
9777         thub = mlx5_malloc(MLX5_MEM_SYS | MLX5_MEM_ZERO, sizeof(*thub),
9778                            0, SOCKET_ID_ANY);
9779         if (!thub)
9780                 return -ENOMEM;
9781         LIST_INIT(&thub->tunnels);
9782         rte_spinlock_init(&thub->sl);
9783         thub->groups = mlx5_hlist_create("flow groups", 64,
9784                                          false, true, sh,
9785                                          mlx5_flow_tunnel_grp2tbl_create_cb,
9786                                          mlx5_flow_tunnel_grp2tbl_match_cb,
9787                                          mlx5_flow_tunnel_grp2tbl_remove_cb,
9788                                          mlx5_flow_tunnel_grp2tbl_clone_cb,
9789                                         mlx5_flow_tunnel_grp2tbl_clone_free_cb);
9790         if (!thub->groups) {
9791                 err = -rte_errno;
9792                 goto err;
9793         }
9794         sh->tunnel_hub = thub;
9795
9796         return 0;
9797
9798 err:
9799         if (thub->groups)
9800                 mlx5_hlist_destroy(thub->groups);
9801         if (thub)
9802                 mlx5_free(thub);
9803         return err;
9804 }
9805
9806 static inline int
9807 mlx5_flow_tunnel_validate(struct rte_eth_dev *dev,
9808                           struct rte_flow_tunnel *tunnel,
9809                           struct rte_flow_error *error)
9810 {
9811         struct mlx5_priv *priv = dev->data->dev_private;
9812
9813         if (!priv->sh->config.dv_flow_en)
9814                 return rte_flow_error_set(error, ENOTSUP,
9815                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
9816                                           "flow DV interface is off");
9817         if (!is_tunnel_offload_active(dev))
9818                 return rte_flow_error_set(error, ENOTSUP,
9819                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
9820                                           "tunnel offload was not activated");
9821         if (!tunnel)
9822                 return rte_flow_error_set(error, EINVAL,
9823                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
9824                                           "no application tunnel");
9825         switch (tunnel->type) {
9826         default:
9827                 return rte_flow_error_set(error, EINVAL,
9828                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
9829                                           "unsupported tunnel type");
9830         case RTE_FLOW_ITEM_TYPE_VXLAN:
9831         case RTE_FLOW_ITEM_TYPE_GRE:
9832         case RTE_FLOW_ITEM_TYPE_NVGRE:
9833         case RTE_FLOW_ITEM_TYPE_GENEVE:
9834                 break;
9835         }
9836         return 0;
9837 }
9838
9839 static int
9840 mlx5_flow_tunnel_decap_set(struct rte_eth_dev *dev,
9841                     struct rte_flow_tunnel *app_tunnel,
9842                     struct rte_flow_action **actions,
9843                     uint32_t *num_of_actions,
9844                     struct rte_flow_error *error)
9845 {
9846         struct mlx5_flow_tunnel *tunnel;
9847         int ret = mlx5_flow_tunnel_validate(dev, app_tunnel, error);
9848
9849         if (ret)
9850                 return ret;
9851         ret = mlx5_get_flow_tunnel(dev, app_tunnel, &tunnel);
9852         if (ret < 0) {
9853                 return rte_flow_error_set(error, ret,
9854                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
9855                                           "failed to initialize pmd tunnel");
9856         }
9857         *actions = &tunnel->action;
9858         *num_of_actions = 1;
9859         return 0;
9860 }
9861
9862 static int
9863 mlx5_flow_tunnel_match(struct rte_eth_dev *dev,
9864                        struct rte_flow_tunnel *app_tunnel,
9865                        struct rte_flow_item **items,
9866                        uint32_t *num_of_items,
9867                        struct rte_flow_error *error)
9868 {
9869         struct mlx5_flow_tunnel *tunnel;
9870         int ret = mlx5_flow_tunnel_validate(dev, app_tunnel, error);
9871
9872         if (ret)
9873                 return ret;
9874         ret = mlx5_get_flow_tunnel(dev, app_tunnel, &tunnel);
9875         if (ret < 0) {
9876                 return rte_flow_error_set(error, ret,
9877                                           RTE_FLOW_ERROR_TYPE_HANDLE, NULL,
9878                                           "failed to initialize pmd tunnel");
9879         }
9880         *items = &tunnel->item;
9881         *num_of_items = 1;
9882         return 0;
9883 }
9884
9885 struct tunnel_db_element_release_ctx {
9886         struct rte_flow_item *items;
9887         struct rte_flow_action *actions;
9888         uint32_t num_elements;
9889         struct rte_flow_error *error;
9890         int ret;
9891 };
9892
9893 static bool
9894 tunnel_element_release_match(struct rte_eth_dev *dev,
9895                              struct mlx5_flow_tunnel *tunnel, const void *x)
9896 {
9897         const struct tunnel_db_element_release_ctx *ctx = x;
9898
9899         RTE_SET_USED(dev);
9900         if (ctx->num_elements != 1)
9901                 return false;
9902         else if (ctx->items)
9903                 return ctx->items == &tunnel->item;
9904         else if (ctx->actions)
9905                 return ctx->actions == &tunnel->action;
9906
9907         return false;
9908 }
9909
9910 static void
9911 tunnel_element_release_hit(struct rte_eth_dev *dev,
9912                            struct mlx5_flow_tunnel *tunnel, void *x)
9913 {
9914         struct tunnel_db_element_release_ctx *ctx = x;
9915         ctx->ret = 0;
9916         if (!__atomic_sub_fetch(&tunnel->refctn, 1, __ATOMIC_RELAXED))
9917                 mlx5_flow_tunnel_free(dev, tunnel);
9918 }
9919
9920 static void
9921 tunnel_element_release_miss(struct rte_eth_dev *dev, void *x)
9922 {
9923         struct tunnel_db_element_release_ctx *ctx = x;
9924         RTE_SET_USED(dev);
9925         ctx->ret = rte_flow_error_set(ctx->error, EINVAL,
9926                                       RTE_FLOW_ERROR_TYPE_HANDLE, NULL,
9927                                       "invalid argument");
9928 }
9929
9930 static int
9931 mlx5_flow_tunnel_item_release(struct rte_eth_dev *dev,
9932                        struct rte_flow_item *pmd_items,
9933                        uint32_t num_items, struct rte_flow_error *err)
9934 {
9935         struct tunnel_db_element_release_ctx ctx = {
9936                 .items = pmd_items,
9937                 .actions = NULL,
9938                 .num_elements = num_items,
9939                 .error = err,
9940         };
9941
9942         mlx5_access_tunnel_offload_db(dev, tunnel_element_release_match,
9943                                       tunnel_element_release_hit,
9944                                       tunnel_element_release_miss, &ctx, false);
9945
9946         return ctx.ret;
9947 }
9948
9949 static int
9950 mlx5_flow_tunnel_action_release(struct rte_eth_dev *dev,
9951                          struct rte_flow_action *pmd_actions,
9952                          uint32_t num_actions, struct rte_flow_error *err)
9953 {
9954         struct tunnel_db_element_release_ctx ctx = {
9955                 .items = NULL,
9956                 .actions = pmd_actions,
9957                 .num_elements = num_actions,
9958                 .error = err,
9959         };
9960
9961         mlx5_access_tunnel_offload_db(dev, tunnel_element_release_match,
9962                                       tunnel_element_release_hit,
9963                                       tunnel_element_release_miss, &ctx, false);
9964
9965         return ctx.ret;
9966 }
9967
9968 static int
9969 mlx5_flow_tunnel_get_restore_info(struct rte_eth_dev *dev,
9970                                   struct rte_mbuf *m,
9971                                   struct rte_flow_restore_info *info,
9972                                   struct rte_flow_error *err)
9973 {
9974         uint64_t ol_flags = m->ol_flags;
9975         const struct mlx5_flow_tbl_data_entry *tble;
9976         const uint64_t mask = RTE_MBUF_F_RX_FDIR | RTE_MBUF_F_RX_FDIR_ID;
9977
9978         if (!is_tunnel_offload_active(dev)) {
9979                 info->flags = 0;
9980                 return 0;
9981         }
9982
9983         if ((ol_flags & mask) != mask)
9984                 goto err;
9985         tble = tunnel_mark_decode(dev, m->hash.fdir.hi);
9986         if (!tble) {
9987                 DRV_LOG(DEBUG, "port %u invalid miss tunnel mark %#x",
9988                         dev->data->port_id, m->hash.fdir.hi);
9989                 goto err;
9990         }
9991         MLX5_ASSERT(tble->tunnel);
9992         memcpy(&info->tunnel, &tble->tunnel->app_tunnel, sizeof(info->tunnel));
9993         info->group_id = tble->group_id;
9994         info->flags = RTE_FLOW_RESTORE_INFO_TUNNEL |
9995                       RTE_FLOW_RESTORE_INFO_GROUP_ID |
9996                       RTE_FLOW_RESTORE_INFO_ENCAPSULATED;
9997
9998         return 0;
9999
10000 err:
10001         return rte_flow_error_set(err, EINVAL,
10002                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
10003                                   "failed to get restore info");
10004 }
10005
10006 #else /* HAVE_IBV_FLOW_DV_SUPPORT */
10007 static int
10008 mlx5_flow_tunnel_decap_set(__rte_unused struct rte_eth_dev *dev,
10009                            __rte_unused struct rte_flow_tunnel *app_tunnel,
10010                            __rte_unused struct rte_flow_action **actions,
10011                            __rte_unused uint32_t *num_of_actions,
10012                            __rte_unused struct rte_flow_error *error)
10013 {
10014         return -ENOTSUP;
10015 }
10016
10017 static int
10018 mlx5_flow_tunnel_match(__rte_unused struct rte_eth_dev *dev,
10019                        __rte_unused struct rte_flow_tunnel *app_tunnel,
10020                        __rte_unused struct rte_flow_item **items,
10021                        __rte_unused uint32_t *num_of_items,
10022                        __rte_unused struct rte_flow_error *error)
10023 {
10024         return -ENOTSUP;
10025 }
10026
10027 static int
10028 mlx5_flow_tunnel_item_release(__rte_unused struct rte_eth_dev *dev,
10029                               __rte_unused struct rte_flow_item *pmd_items,
10030                               __rte_unused uint32_t num_items,
10031                               __rte_unused struct rte_flow_error *err)
10032 {
10033         return -ENOTSUP;
10034 }
10035
10036 static int
10037 mlx5_flow_tunnel_action_release(__rte_unused struct rte_eth_dev *dev,
10038                                 __rte_unused struct rte_flow_action *pmd_action,
10039                                 __rte_unused uint32_t num_actions,
10040                                 __rte_unused struct rte_flow_error *err)
10041 {
10042         return -ENOTSUP;
10043 }
10044
10045 static int
10046 mlx5_flow_tunnel_get_restore_info(__rte_unused struct rte_eth_dev *dev,
10047                                   __rte_unused struct rte_mbuf *m,
10048                                   __rte_unused struct rte_flow_restore_info *i,
10049                                   __rte_unused struct rte_flow_error *err)
10050 {
10051         return -ENOTSUP;
10052 }
10053
10054 static int
10055 flow_tunnel_add_default_miss(__rte_unused struct rte_eth_dev *dev,
10056                              __rte_unused struct rte_flow *flow,
10057                              __rte_unused const struct rte_flow_attr *attr,
10058                              __rte_unused const struct rte_flow_action *actions,
10059                              __rte_unused uint32_t flow_idx,
10060                              __rte_unused const struct mlx5_flow_tunnel *tunnel,
10061                              __rte_unused struct tunnel_default_miss_ctx *ctx,
10062                              __rte_unused struct rte_flow_error *error)
10063 {
10064         return -ENOTSUP;
10065 }
10066
10067 static struct mlx5_flow_tunnel *
10068 mlx5_find_tunnel_id(__rte_unused struct rte_eth_dev *dev,
10069                     __rte_unused uint32_t id)
10070 {
10071         return NULL;
10072 }
10073
10074 static void
10075 mlx5_flow_tunnel_free(__rte_unused struct rte_eth_dev *dev,
10076                       __rte_unused struct mlx5_flow_tunnel *tunnel)
10077 {
10078 }
10079
10080 static uint32_t
10081 tunnel_flow_group_to_flow_table(__rte_unused struct rte_eth_dev *dev,
10082                                 __rte_unused const struct mlx5_flow_tunnel *t,
10083                                 __rte_unused uint32_t group,
10084                                 __rte_unused uint32_t *table,
10085                                 struct rte_flow_error *error)
10086 {
10087         return rte_flow_error_set(error, ENOTSUP,
10088                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
10089                                   "tunnel offload requires DV support");
10090 }
10091
10092 void
10093 mlx5_release_tunnel_hub(__rte_unused struct mlx5_dev_ctx_shared *sh,
10094                         __rte_unused  uint16_t port_id)
10095 {
10096 }
10097 #endif /* HAVE_IBV_FLOW_DV_SUPPORT */
10098
10099 /* Flex flow item API */
10100 static struct rte_flow_item_flex_handle *
10101 mlx5_flow_flex_item_create(struct rte_eth_dev *dev,
10102                            const struct rte_flow_item_flex_conf *conf,
10103                            struct rte_flow_error *error)
10104 {
10105         static const char err_msg[] = "flex item creation unsupported";
10106         struct rte_flow_attr attr = { .transfer = 0 };
10107         const struct mlx5_flow_driver_ops *fops =
10108                         flow_get_drv_ops(flow_get_drv_type(dev, &attr));
10109
10110         if (!fops->item_create) {
10111                 DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
10112                 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
10113                                    NULL, err_msg);
10114                 return NULL;
10115         }
10116         return fops->item_create(dev, conf, error);
10117 }
10118
10119 static int
10120 mlx5_flow_flex_item_release(struct rte_eth_dev *dev,
10121                             const struct rte_flow_item_flex_handle *handle,
10122                             struct rte_flow_error *error)
10123 {
10124         static const char err_msg[] = "flex item release unsupported";
10125         struct rte_flow_attr attr = { .transfer = 0 };
10126         const struct mlx5_flow_driver_ops *fops =
10127                         flow_get_drv_ops(flow_get_drv_type(dev, &attr));
10128
10129         if (!fops->item_release) {
10130                 DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
10131                 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
10132                                    NULL, err_msg);
10133                 return -rte_errno;
10134         }
10135         return fops->item_release(dev, handle, error);
10136 }
10137
10138 static void
10139 mlx5_dbg__print_pattern(const struct rte_flow_item *item)
10140 {
10141         int ret;
10142         struct rte_flow_error error;
10143
10144         for (; item->type != RTE_FLOW_ITEM_TYPE_END; item++) {
10145                 char *item_name;
10146                 ret = rte_flow_conv(RTE_FLOW_CONV_OP_ITEM_NAME_PTR, &item_name,
10147                                     sizeof(item_name),
10148                                     (void *)(uintptr_t)item->type, &error);
10149                 if (ret > 0)
10150                         printf("%s ", item_name);
10151                 else
10152                         printf("%d\n", (int)item->type);
10153         }
10154         printf("END\n");
10155 }
10156
10157 static int
10158 mlx5_flow_is_std_vxlan_port(const struct rte_flow_item *udp_item)
10159 {
10160         const struct rte_flow_item_udp *spec = udp_item->spec;
10161         const struct rte_flow_item_udp *mask = udp_item->mask;
10162         uint16_t udp_dport = 0;
10163
10164         if (spec != NULL) {
10165                 if (!mask)
10166                         mask = &rte_flow_item_udp_mask;
10167                 udp_dport = rte_be_to_cpu_16(spec->hdr.dst_port &
10168                                 mask->hdr.dst_port);
10169         }
10170         return (!udp_dport || udp_dport == MLX5_UDP_PORT_VXLAN);
10171 }
10172
10173 static const struct mlx5_flow_expand_node *
10174 mlx5_flow_expand_rss_adjust_node(const struct rte_flow_item *pattern,
10175                 unsigned int item_idx,
10176                 const struct mlx5_flow_expand_node graph[],
10177                 const struct mlx5_flow_expand_node *node)
10178 {
10179         const struct rte_flow_item *item = pattern + item_idx, *prev_item;
10180
10181         if (item->type == RTE_FLOW_ITEM_TYPE_VXLAN &&
10182                         node != NULL &&
10183                         node->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
10184                 /*
10185                  * The expansion node is VXLAN and it is also the last
10186                  * expandable item in the pattern, so need to continue
10187                  * expansion of the inner tunnel.
10188                  */
10189                 MLX5_ASSERT(item_idx > 0);
10190                 prev_item = pattern + item_idx - 1;
10191                 MLX5_ASSERT(prev_item->type == RTE_FLOW_ITEM_TYPE_UDP);
10192                 if (mlx5_flow_is_std_vxlan_port(prev_item))
10193                         return &graph[MLX5_EXPANSION_STD_VXLAN];
10194                 return &graph[MLX5_EXPANSION_L3_VXLAN];
10195         }
10196         return node;
10197 }
10198
10199 /* Map of Verbs to Flow priority with 8 Verbs priorities. */
10200 static const uint32_t priority_map_3[][MLX5_PRIORITY_MAP_MAX] = {
10201         { 0, 1, 2 }, { 2, 3, 4 }, { 5, 6, 7 },
10202 };
10203
10204 /* Map of Verbs to Flow priority with 16 Verbs priorities. */
10205 static const uint32_t priority_map_5[][MLX5_PRIORITY_MAP_MAX] = {
10206         { 0, 1, 2 }, { 3, 4, 5 }, { 6, 7, 8 },
10207         { 9, 10, 11 }, { 12, 13, 14 },
10208 };
10209
10210 /**
10211  * Discover the number of available flow priorities.
10212  *
10213  * @param dev
10214  *   Ethernet device.
10215  *
10216  * @return
10217  *   On success, number of available flow priorities.
10218  *   On failure, a negative errno-style code and rte_errno is set.
10219  */
10220 int
10221 mlx5_flow_discover_priorities(struct rte_eth_dev *dev)
10222 {
10223         static const uint16_t vprio[] = {8, 16};
10224         const struct mlx5_priv *priv = dev->data->dev_private;
10225         const struct mlx5_flow_driver_ops *fops;
10226         enum mlx5_flow_drv_type type;
10227         int ret;
10228
10229         type = mlx5_flow_os_get_type();
10230         if (type == MLX5_FLOW_TYPE_MAX) {
10231                 type = MLX5_FLOW_TYPE_VERBS;
10232                 if (priv->sh->cdev->config.devx && priv->sh->config.dv_flow_en)
10233                         type = MLX5_FLOW_TYPE_DV;
10234         }
10235         fops = flow_get_drv_ops(type);
10236         if (fops->discover_priorities == NULL) {
10237                 DRV_LOG(ERR, "Priority discovery not supported");
10238                 rte_errno = ENOTSUP;
10239                 return -rte_errno;
10240         }
10241         ret = fops->discover_priorities(dev, vprio, RTE_DIM(vprio));
10242         if (ret < 0)
10243                 return ret;
10244         switch (ret) {
10245         case 8:
10246                 ret = RTE_DIM(priority_map_3);
10247                 break;
10248         case 16:
10249                 ret = RTE_DIM(priority_map_5);
10250                 break;
10251         default:
10252                 rte_errno = ENOTSUP;
10253                 DRV_LOG(ERR,
10254                         "port %u maximum priority: %d expected 8/16",
10255                         dev->data->port_id, ret);
10256                 return -rte_errno;
10257         }
10258         DRV_LOG(INFO, "port %u supported flow priorities:"
10259                 " 0-%d for ingress or egress root table,"
10260                 " 0-%d for non-root table or transfer root table.",
10261                 dev->data->port_id, ret - 2,
10262                 MLX5_NON_ROOT_FLOW_MAX_PRIO - 1);
10263         return ret;
10264 }
10265
10266 /**
10267  * Adjust flow priority based on the highest layer and the request priority.
10268  *
10269  * @param[in] dev
10270  *   Pointer to the Ethernet device structure.
10271  * @param[in] priority
10272  *   The rule base priority.
10273  * @param[in] subpriority
10274  *   The priority based on the items.
10275  *
10276  * @return
10277  *   The new priority.
10278  */
10279 uint32_t
10280 mlx5_flow_adjust_priority(struct rte_eth_dev *dev, int32_t priority,
10281                           uint32_t subpriority)
10282 {
10283         uint32_t res = 0;
10284         struct mlx5_priv *priv = dev->data->dev_private;
10285
10286         switch (priv->sh->flow_max_priority) {
10287         case RTE_DIM(priority_map_3):
10288                 res = priority_map_3[priority][subpriority];
10289                 break;
10290         case RTE_DIM(priority_map_5):
10291                 res = priority_map_5[priority][subpriority];
10292                 break;
10293         }
10294         return  res;
10295 }