net/mlx5: fix LRO configuration in drop Rx queue
[dpdk.git] / drivers / net / mlx5 / mlx5_flow.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2016 6WIND S.A.
3  * Copyright 2016 Mellanox Technologies, Ltd
4  */
5
6 #include <stdalign.h>
7 #include <stdint.h>
8 #include <string.h>
9 #include <stdbool.h>
10 #include <sys/queue.h>
11
12 #include <rte_common.h>
13 #include <rte_ether.h>
14 #include <ethdev_driver.h>
15 #include <rte_eal_paging.h>
16 #include <rte_flow.h>
17 #include <rte_cycles.h>
18 #include <rte_flow_driver.h>
19 #include <rte_malloc.h>
20 #include <rte_ip.h>
21
22 #include <mlx5_glue.h>
23 #include <mlx5_devx_cmds.h>
24 #include <mlx5_prm.h>
25 #include <mlx5_malloc.h>
26
27 #include "mlx5_defs.h"
28 #include "mlx5.h"
29 #include "mlx5_flow.h"
30 #include "mlx5_flow_os.h"
31 #include "mlx5_rx.h"
32 #include "mlx5_tx.h"
33 #include "mlx5_common_os.h"
34 #include "rte_pmd_mlx5.h"
35
36 struct tunnel_default_miss_ctx {
37         uint16_t *queue;
38         __extension__
39         union {
40                 struct rte_flow_action_rss action_rss;
41                 struct rte_flow_action_queue miss_queue;
42                 struct rte_flow_action_jump miss_jump;
43                 uint8_t raw[0];
44         };
45 };
46
47 static int
48 flow_tunnel_add_default_miss(struct rte_eth_dev *dev,
49                              struct rte_flow *flow,
50                              const struct rte_flow_attr *attr,
51                              const struct rte_flow_action *app_actions,
52                              uint32_t flow_idx,
53                              const struct mlx5_flow_tunnel *tunnel,
54                              struct tunnel_default_miss_ctx *ctx,
55                              struct rte_flow_error *error);
56 static struct mlx5_flow_tunnel *
57 mlx5_find_tunnel_id(struct rte_eth_dev *dev, uint32_t id);
58 static void
59 mlx5_flow_tunnel_free(struct rte_eth_dev *dev, struct mlx5_flow_tunnel *tunnel);
60 static uint32_t
61 tunnel_flow_group_to_flow_table(struct rte_eth_dev *dev,
62                                 const struct mlx5_flow_tunnel *tunnel,
63                                 uint32_t group, uint32_t *table,
64                                 struct rte_flow_error *error);
65
66 static struct mlx5_flow_workspace *mlx5_flow_push_thread_workspace(void);
67 static void mlx5_flow_pop_thread_workspace(void);
68
69
70 /** Device flow drivers. */
71 extern const struct mlx5_flow_driver_ops mlx5_flow_verbs_drv_ops;
72
73 const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops;
74
75 const struct mlx5_flow_driver_ops *flow_drv_ops[] = {
76         [MLX5_FLOW_TYPE_MIN] = &mlx5_flow_null_drv_ops,
77 #if defined(HAVE_IBV_FLOW_DV_SUPPORT) || !defined(HAVE_INFINIBAND_VERBS_H)
78         [MLX5_FLOW_TYPE_DV] = &mlx5_flow_dv_drv_ops,
79         [MLX5_FLOW_TYPE_HW] = &mlx5_flow_hw_drv_ops,
80 #endif
81         [MLX5_FLOW_TYPE_VERBS] = &mlx5_flow_verbs_drv_ops,
82         [MLX5_FLOW_TYPE_MAX] = &mlx5_flow_null_drv_ops
83 };
84
85 /** Helper macro to build input graph for mlx5_flow_expand_rss(). */
86 #define MLX5_FLOW_EXPAND_RSS_NEXT(...) \
87         (const int []){ \
88                 __VA_ARGS__, 0, \
89         }
90
91 /** Node object of input graph for mlx5_flow_expand_rss(). */
92 struct mlx5_flow_expand_node {
93         const int *const next;
94         /**<
95          * List of next node indexes. Index 0 is interpreted as a terminator.
96          */
97         const enum rte_flow_item_type type;
98         /**< Pattern item type of current node. */
99         uint64_t rss_types;
100         /**<
101          * RSS types bit-field associated with this node
102          * (see RTE_ETH_RSS_* definitions).
103          */
104         uint64_t node_flags;
105         /**<
106          *  Bit-fields that define how the node is used in the expansion.
107          * (see MLX5_EXPANSION_NODE_* definitions).
108          */
109 };
110
111 /* Optional expand field. The expansion alg will not go deeper. */
112 #define MLX5_EXPANSION_NODE_OPTIONAL (UINT64_C(1) << 0)
113
114 /* The node is not added implicitly as expansion to the flow pattern.
115  * If the node type does not match the flow pattern item type, the
116  * expansion alg will go deeper to its next items.
117  * In the current implementation, the list of next nodes indexes can
118  * have up to one node with this flag set and it has to be the last
119  * node index (before the list terminator).
120  */
121 #define MLX5_EXPANSION_NODE_EXPLICIT (UINT64_C(1) << 1)
122
123 /** Object returned by mlx5_flow_expand_rss(). */
124 struct mlx5_flow_expand_rss {
125         uint32_t entries;
126         /**< Number of entries @p patterns and @p priorities. */
127         struct {
128                 struct rte_flow_item *pattern; /**< Expanded pattern array. */
129                 uint32_t priority; /**< Priority offset for each expansion. */
130         } entry[];
131 };
132
133 static void
134 mlx5_dbg__print_pattern(const struct rte_flow_item *item);
135
136 static const struct mlx5_flow_expand_node *
137 mlx5_flow_expand_rss_adjust_node(const struct rte_flow_item *pattern,
138                 unsigned int item_idx,
139                 const struct mlx5_flow_expand_node graph[],
140                 const struct mlx5_flow_expand_node *node);
141
142 static bool
143 mlx5_flow_is_rss_expandable_item(const struct rte_flow_item *item)
144 {
145         switch (item->type) {
146         case RTE_FLOW_ITEM_TYPE_ETH:
147         case RTE_FLOW_ITEM_TYPE_VLAN:
148         case RTE_FLOW_ITEM_TYPE_IPV4:
149         case RTE_FLOW_ITEM_TYPE_IPV6:
150         case RTE_FLOW_ITEM_TYPE_UDP:
151         case RTE_FLOW_ITEM_TYPE_TCP:
152         case RTE_FLOW_ITEM_TYPE_VXLAN:
153         case RTE_FLOW_ITEM_TYPE_NVGRE:
154         case RTE_FLOW_ITEM_TYPE_GRE:
155         case RTE_FLOW_ITEM_TYPE_GENEVE:
156         case RTE_FLOW_ITEM_TYPE_MPLS:
157         case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
158         case RTE_FLOW_ITEM_TYPE_GRE_KEY:
159         case RTE_FLOW_ITEM_TYPE_IPV6_FRAG_EXT:
160         case RTE_FLOW_ITEM_TYPE_GTP:
161                 return true;
162         default:
163                 break;
164         }
165         return false;
166 }
167
168 /**
169  * Network Service Header (NSH) and its next protocol values
170  * are described in RFC-8393.
171  */
172 static enum rte_flow_item_type
173 mlx5_nsh_proto_to_item_type(uint8_t proto_spec, uint8_t proto_mask)
174 {
175         enum rte_flow_item_type type;
176
177         switch (proto_mask & proto_spec) {
178         case 0:
179                 type = RTE_FLOW_ITEM_TYPE_VOID;
180                 break;
181         case RTE_VXLAN_GPE_TYPE_IPV4:
182                 type = RTE_FLOW_ITEM_TYPE_IPV4;
183                 break;
184         case RTE_VXLAN_GPE_TYPE_IPV6:
185                 type = RTE_VXLAN_GPE_TYPE_IPV6;
186                 break;
187         case RTE_VXLAN_GPE_TYPE_ETH:
188                 type = RTE_FLOW_ITEM_TYPE_ETH;
189                 break;
190         default:
191                 type = RTE_FLOW_ITEM_TYPE_END;
192         }
193         return type;
194 }
195
196 static enum rte_flow_item_type
197 mlx5_inet_proto_to_item_type(uint8_t proto_spec, uint8_t proto_mask)
198 {
199         enum rte_flow_item_type type;
200
201         switch (proto_mask & proto_spec) {
202         case 0:
203                 type = RTE_FLOW_ITEM_TYPE_VOID;
204                 break;
205         case IPPROTO_UDP:
206                 type = RTE_FLOW_ITEM_TYPE_UDP;
207                 break;
208         case IPPROTO_TCP:
209                 type = RTE_FLOW_ITEM_TYPE_TCP;
210                 break;
211         case IPPROTO_IPIP:
212                 type = RTE_FLOW_ITEM_TYPE_IPV4;
213                 break;
214         case IPPROTO_IPV6:
215                 type = RTE_FLOW_ITEM_TYPE_IPV6;
216                 break;
217         default:
218                 type = RTE_FLOW_ITEM_TYPE_END;
219         }
220         return type;
221 }
222
223 static enum rte_flow_item_type
224 mlx5_ethertype_to_item_type(rte_be16_t type_spec,
225                             rte_be16_t type_mask, bool is_tunnel)
226 {
227         enum rte_flow_item_type type;
228
229         switch (rte_be_to_cpu_16(type_spec & type_mask)) {
230         case 0:
231                 type = RTE_FLOW_ITEM_TYPE_VOID;
232                 break;
233         case RTE_ETHER_TYPE_TEB:
234                 type = is_tunnel ?
235                        RTE_FLOW_ITEM_TYPE_ETH : RTE_FLOW_ITEM_TYPE_END;
236                 break;
237         case RTE_ETHER_TYPE_VLAN:
238                 type = !is_tunnel ?
239                        RTE_FLOW_ITEM_TYPE_VLAN : RTE_FLOW_ITEM_TYPE_END;
240                 break;
241         case RTE_ETHER_TYPE_IPV4:
242                 type = RTE_FLOW_ITEM_TYPE_IPV4;
243                 break;
244         case RTE_ETHER_TYPE_IPV6:
245                 type = RTE_FLOW_ITEM_TYPE_IPV6;
246                 break;
247         default:
248                 type = RTE_FLOW_ITEM_TYPE_END;
249         }
250         return type;
251 }
252
253 static enum rte_flow_item_type
254 mlx5_flow_expand_rss_item_complete(const struct rte_flow_item *item)
255 {
256 #define MLX5_XSET_ITEM_MASK_SPEC(type, fld)                              \
257         do {                                                             \
258                 const void *m = item->mask;                              \
259                 const void *s = item->spec;                              \
260                 mask = m ?                                               \
261                         ((const struct rte_flow_item_##type *)m)->fld :  \
262                         rte_flow_item_##type##_mask.fld;                 \
263                 spec = ((const struct rte_flow_item_##type *)s)->fld;    \
264         } while (0)
265
266         enum rte_flow_item_type ret;
267         uint16_t spec, mask;
268
269         if (item == NULL || item->spec == NULL)
270                 return RTE_FLOW_ITEM_TYPE_VOID;
271         switch (item->type) {
272         case RTE_FLOW_ITEM_TYPE_ETH:
273                 MLX5_XSET_ITEM_MASK_SPEC(eth, type);
274                 if (!mask)
275                         return RTE_FLOW_ITEM_TYPE_VOID;
276                 ret = mlx5_ethertype_to_item_type(spec, mask, false);
277                 break;
278         case RTE_FLOW_ITEM_TYPE_VLAN:
279                 MLX5_XSET_ITEM_MASK_SPEC(vlan, inner_type);
280                 if (!mask)
281                         return RTE_FLOW_ITEM_TYPE_VOID;
282                 ret = mlx5_ethertype_to_item_type(spec, mask, false);
283                 break;
284         case RTE_FLOW_ITEM_TYPE_IPV4:
285                 MLX5_XSET_ITEM_MASK_SPEC(ipv4, hdr.next_proto_id);
286                 if (!mask)
287                         return RTE_FLOW_ITEM_TYPE_VOID;
288                 ret = mlx5_inet_proto_to_item_type(spec, mask);
289                 break;
290         case RTE_FLOW_ITEM_TYPE_IPV6:
291                 MLX5_XSET_ITEM_MASK_SPEC(ipv6, hdr.proto);
292                 if (!mask)
293                         return RTE_FLOW_ITEM_TYPE_VOID;
294                 ret = mlx5_inet_proto_to_item_type(spec, mask);
295                 break;
296         case RTE_FLOW_ITEM_TYPE_GENEVE:
297                 MLX5_XSET_ITEM_MASK_SPEC(geneve, protocol);
298                 ret = mlx5_ethertype_to_item_type(spec, mask, true);
299                 break;
300         case RTE_FLOW_ITEM_TYPE_GRE:
301                 MLX5_XSET_ITEM_MASK_SPEC(gre, protocol);
302                 ret = mlx5_ethertype_to_item_type(spec, mask, true);
303                 break;
304         case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
305                 MLX5_XSET_ITEM_MASK_SPEC(vxlan_gpe, protocol);
306                 ret = mlx5_nsh_proto_to_item_type(spec, mask);
307                 break;
308         default:
309                 ret = RTE_FLOW_ITEM_TYPE_VOID;
310                 break;
311         }
312         return ret;
313 #undef MLX5_XSET_ITEM_MASK_SPEC
314 }
315
316 static const int *
317 mlx5_flow_expand_rss_skip_explicit(const struct mlx5_flow_expand_node graph[],
318                 const int *next_node)
319 {
320         const struct mlx5_flow_expand_node *node = NULL;
321         const int *next = next_node;
322
323         while (next && *next) {
324                 /*
325                  * Skip the nodes with the MLX5_EXPANSION_NODE_EXPLICIT
326                  * flag set, because they were not found in the flow pattern.
327                  */
328                 node = &graph[*next];
329                 if (!(node->node_flags & MLX5_EXPANSION_NODE_EXPLICIT))
330                         break;
331                 next = node->next;
332         }
333         return next;
334 }
335
336 #define MLX5_RSS_EXP_ELT_N 16
337
338 /**
339  * Expand RSS flows into several possible flows according to the RSS hash
340  * fields requested and the driver capabilities.
341  *
342  * @param[out] buf
343  *   Buffer to store the result expansion.
344  * @param[in] size
345  *   Buffer size in bytes. If 0, @p buf can be NULL.
346  * @param[in] pattern
347  *   User flow pattern.
348  * @param[in] types
349  *   RSS types to expand (see RTE_ETH_RSS_* definitions).
350  * @param[in] graph
351  *   Input graph to expand @p pattern according to @p types.
352  * @param[in] graph_root_index
353  *   Index of root node in @p graph, typically 0.
354  *
355  * @return
356  *   A positive value representing the size of @p buf in bytes regardless of
357  *   @p size on success, a negative errno value otherwise and rte_errno is
358  *   set, the following errors are defined:
359  *
360  *   -E2BIG: graph-depth @p graph is too deep.
361  *   -EINVAL: @p size has not enough space for expanded pattern.
362  */
363 static int
364 mlx5_flow_expand_rss(struct mlx5_flow_expand_rss *buf, size_t size,
365                      const struct rte_flow_item *pattern, uint64_t types,
366                      const struct mlx5_flow_expand_node graph[],
367                      int graph_root_index)
368 {
369         const struct rte_flow_item *item;
370         const struct mlx5_flow_expand_node *node = &graph[graph_root_index];
371         const int *next_node;
372         const int *stack[MLX5_RSS_EXP_ELT_N];
373         int stack_pos = 0;
374         struct rte_flow_item flow_items[MLX5_RSS_EXP_ELT_N];
375         unsigned int i, item_idx, last_expand_item_idx = 0;
376         size_t lsize;
377         size_t user_pattern_size = 0;
378         void *addr = NULL;
379         const struct mlx5_flow_expand_node *next = NULL;
380         struct rte_flow_item missed_item;
381         int missed = 0;
382         int elt = 0;
383         const struct rte_flow_item *last_expand_item = NULL;
384
385         memset(&missed_item, 0, sizeof(missed_item));
386         lsize = offsetof(struct mlx5_flow_expand_rss, entry) +
387                 MLX5_RSS_EXP_ELT_N * sizeof(buf->entry[0]);
388         if (lsize > size)
389                 return -EINVAL;
390         buf->entry[0].priority = 0;
391         buf->entry[0].pattern = (void *)&buf->entry[MLX5_RSS_EXP_ELT_N];
392         buf->entries = 0;
393         addr = buf->entry[0].pattern;
394         for (item = pattern, item_idx = 0;
395                         item->type != RTE_FLOW_ITEM_TYPE_END;
396                         item++, item_idx++) {
397                 if (!mlx5_flow_is_rss_expandable_item(item)) {
398                         user_pattern_size += sizeof(*item);
399                         continue;
400                 }
401                 last_expand_item = item;
402                 last_expand_item_idx = item_idx;
403                 i = 0;
404                 while (node->next && node->next[i]) {
405                         next = &graph[node->next[i]];
406                         if (next->type == item->type)
407                                 break;
408                         if (next->node_flags & MLX5_EXPANSION_NODE_EXPLICIT) {
409                                 node = next;
410                                 i = 0;
411                         } else {
412                                 ++i;
413                         }
414                 }
415                 if (next)
416                         node = next;
417                 user_pattern_size += sizeof(*item);
418         }
419         user_pattern_size += sizeof(*item); /* Handle END item. */
420         lsize += user_pattern_size;
421         if (lsize > size)
422                 return -EINVAL;
423         /* Copy the user pattern in the first entry of the buffer. */
424         rte_memcpy(addr, pattern, user_pattern_size);
425         addr = (void *)(((uintptr_t)addr) + user_pattern_size);
426         buf->entries = 1;
427         /* Start expanding. */
428         memset(flow_items, 0, sizeof(flow_items));
429         user_pattern_size -= sizeof(*item);
430         /*
431          * Check if the last valid item has spec set, need complete pattern,
432          * and the pattern can be used for expansion.
433          */
434         missed_item.type = mlx5_flow_expand_rss_item_complete(last_expand_item);
435         if (missed_item.type == RTE_FLOW_ITEM_TYPE_END) {
436                 /* Item type END indicates expansion is not required. */
437                 return lsize;
438         }
439         if (missed_item.type != RTE_FLOW_ITEM_TYPE_VOID) {
440                 next = NULL;
441                 missed = 1;
442                 i = 0;
443                 while (node->next && node->next[i]) {
444                         next = &graph[node->next[i]];
445                         if (next->type == missed_item.type) {
446                                 flow_items[0].type = missed_item.type;
447                                 flow_items[1].type = RTE_FLOW_ITEM_TYPE_END;
448                                 break;
449                         }
450                         if (next->node_flags & MLX5_EXPANSION_NODE_EXPLICIT) {
451                                 node = next;
452                                 i = 0;
453                         } else {
454                                 ++i;
455                         }
456                         next = NULL;
457                 }
458         }
459         if (next && missed) {
460                 elt = 2; /* missed item + item end. */
461                 node = next;
462                 lsize += elt * sizeof(*item) + user_pattern_size;
463                 if (lsize > size)
464                         return -EINVAL;
465                 if (node->rss_types & types) {
466                         buf->entry[buf->entries].priority = 1;
467                         buf->entry[buf->entries].pattern = addr;
468                         buf->entries++;
469                         rte_memcpy(addr, buf->entry[0].pattern,
470                                    user_pattern_size);
471                         addr = (void *)(((uintptr_t)addr) + user_pattern_size);
472                         rte_memcpy(addr, flow_items, elt * sizeof(*item));
473                         addr = (void *)(((uintptr_t)addr) +
474                                         elt * sizeof(*item));
475                 }
476         } else if (last_expand_item != NULL) {
477                 node = mlx5_flow_expand_rss_adjust_node(pattern,
478                                 last_expand_item_idx, graph, node);
479         }
480         memset(flow_items, 0, sizeof(flow_items));
481         next_node = mlx5_flow_expand_rss_skip_explicit(graph,
482                         node->next);
483         stack[stack_pos] = next_node;
484         node = next_node ? &graph[*next_node] : NULL;
485         while (node) {
486                 flow_items[stack_pos].type = node->type;
487                 if (node->rss_types & types) {
488                         size_t n;
489                         /*
490                          * compute the number of items to copy from the
491                          * expansion and copy it.
492                          * When the stack_pos is 0, there are 1 element in it,
493                          * plus the addition END item.
494                          */
495                         elt = stack_pos + 2;
496                         flow_items[stack_pos + 1].type = RTE_FLOW_ITEM_TYPE_END;
497                         lsize += elt * sizeof(*item) + user_pattern_size;
498                         if (lsize > size)
499                                 return -EINVAL;
500                         n = elt * sizeof(*item);
501                         buf->entry[buf->entries].priority =
502                                 stack_pos + 1 + missed;
503                         buf->entry[buf->entries].pattern = addr;
504                         buf->entries++;
505                         rte_memcpy(addr, buf->entry[0].pattern,
506                                    user_pattern_size);
507                         addr = (void *)(((uintptr_t)addr) +
508                                         user_pattern_size);
509                         rte_memcpy(addr, &missed_item,
510                                    missed * sizeof(*item));
511                         addr = (void *)(((uintptr_t)addr) +
512                                 missed * sizeof(*item));
513                         rte_memcpy(addr, flow_items, n);
514                         addr = (void *)(((uintptr_t)addr) + n);
515                 }
516                 /* Go deeper. */
517                 if (!(node->node_flags & MLX5_EXPANSION_NODE_OPTIONAL) &&
518                                 node->next) {
519                         next_node = mlx5_flow_expand_rss_skip_explicit(graph,
520                                         node->next);
521                         if (stack_pos++ == MLX5_RSS_EXP_ELT_N) {
522                                 rte_errno = E2BIG;
523                                 return -rte_errno;
524                         }
525                         stack[stack_pos] = next_node;
526                 } else if (*(next_node + 1)) {
527                         /* Follow up with the next possibility. */
528                         next_node = mlx5_flow_expand_rss_skip_explicit(graph,
529                                         ++next_node);
530                 } else if (!stack_pos) {
531                         /*
532                          * Completing the traverse over the different paths.
533                          * The next_node is advanced to the terminator.
534                          */
535                         ++next_node;
536                 } else {
537                         /* Move to the next path. */
538                         while (stack_pos) {
539                                 next_node = stack[--stack_pos];
540                                 next_node++;
541                                 if (*next_node)
542                                         break;
543                         }
544                         next_node = mlx5_flow_expand_rss_skip_explicit(graph,
545                                         next_node);
546                         stack[stack_pos] = next_node;
547                 }
548                 node = next_node && *next_node ? &graph[*next_node] : NULL;
549         };
550         return lsize;
551 }
552
553 enum mlx5_expansion {
554         MLX5_EXPANSION_ROOT,
555         MLX5_EXPANSION_ROOT_OUTER,
556         MLX5_EXPANSION_OUTER_ETH,
557         MLX5_EXPANSION_OUTER_VLAN,
558         MLX5_EXPANSION_OUTER_IPV4,
559         MLX5_EXPANSION_OUTER_IPV4_UDP,
560         MLX5_EXPANSION_OUTER_IPV4_TCP,
561         MLX5_EXPANSION_OUTER_IPV6,
562         MLX5_EXPANSION_OUTER_IPV6_UDP,
563         MLX5_EXPANSION_OUTER_IPV6_TCP,
564         MLX5_EXPANSION_VXLAN,
565         MLX5_EXPANSION_STD_VXLAN,
566         MLX5_EXPANSION_L3_VXLAN,
567         MLX5_EXPANSION_VXLAN_GPE,
568         MLX5_EXPANSION_GRE,
569         MLX5_EXPANSION_NVGRE,
570         MLX5_EXPANSION_GRE_KEY,
571         MLX5_EXPANSION_MPLS,
572         MLX5_EXPANSION_ETH,
573         MLX5_EXPANSION_VLAN,
574         MLX5_EXPANSION_IPV4,
575         MLX5_EXPANSION_IPV4_UDP,
576         MLX5_EXPANSION_IPV4_TCP,
577         MLX5_EXPANSION_IPV6,
578         MLX5_EXPANSION_IPV6_UDP,
579         MLX5_EXPANSION_IPV6_TCP,
580         MLX5_EXPANSION_IPV6_FRAG_EXT,
581         MLX5_EXPANSION_GTP,
582         MLX5_EXPANSION_GENEVE,
583 };
584
585 /** Supported expansion of items. */
586 static const struct mlx5_flow_expand_node mlx5_support_expansion[] = {
587         [MLX5_EXPANSION_ROOT] = {
588                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
589                                                   MLX5_EXPANSION_IPV4,
590                                                   MLX5_EXPANSION_IPV6),
591                 .type = RTE_FLOW_ITEM_TYPE_END,
592         },
593         [MLX5_EXPANSION_ROOT_OUTER] = {
594                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_ETH,
595                                                   MLX5_EXPANSION_OUTER_IPV4,
596                                                   MLX5_EXPANSION_OUTER_IPV6),
597                 .type = RTE_FLOW_ITEM_TYPE_END,
598         },
599         [MLX5_EXPANSION_OUTER_ETH] = {
600                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_VLAN),
601                 .type = RTE_FLOW_ITEM_TYPE_ETH,
602                 .rss_types = 0,
603         },
604         [MLX5_EXPANSION_OUTER_VLAN] = {
605                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_IPV4,
606                                                   MLX5_EXPANSION_OUTER_IPV6),
607                 .type = RTE_FLOW_ITEM_TYPE_VLAN,
608                 .node_flags = MLX5_EXPANSION_NODE_EXPLICIT,
609         },
610         [MLX5_EXPANSION_OUTER_IPV4] = {
611                 .next = MLX5_FLOW_EXPAND_RSS_NEXT
612                         (MLX5_EXPANSION_OUTER_IPV4_UDP,
613                          MLX5_EXPANSION_OUTER_IPV4_TCP,
614                          MLX5_EXPANSION_GRE,
615                          MLX5_EXPANSION_NVGRE,
616                          MLX5_EXPANSION_IPV4,
617                          MLX5_EXPANSION_IPV6),
618                 .type = RTE_FLOW_ITEM_TYPE_IPV4,
619                 .rss_types = RTE_ETH_RSS_IPV4 | RTE_ETH_RSS_FRAG_IPV4 |
620                         RTE_ETH_RSS_NONFRAG_IPV4_OTHER,
621         },
622         [MLX5_EXPANSION_OUTER_IPV4_UDP] = {
623                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN,
624                                                   MLX5_EXPANSION_VXLAN_GPE,
625                                                   MLX5_EXPANSION_MPLS,
626                                                   MLX5_EXPANSION_GENEVE,
627                                                   MLX5_EXPANSION_GTP),
628                 .type = RTE_FLOW_ITEM_TYPE_UDP,
629                 .rss_types = RTE_ETH_RSS_NONFRAG_IPV4_UDP,
630         },
631         [MLX5_EXPANSION_OUTER_IPV4_TCP] = {
632                 .type = RTE_FLOW_ITEM_TYPE_TCP,
633                 .rss_types = RTE_ETH_RSS_NONFRAG_IPV4_TCP,
634         },
635         [MLX5_EXPANSION_OUTER_IPV6] = {
636                 .next = MLX5_FLOW_EXPAND_RSS_NEXT
637                         (MLX5_EXPANSION_OUTER_IPV6_UDP,
638                          MLX5_EXPANSION_OUTER_IPV6_TCP,
639                          MLX5_EXPANSION_IPV4,
640                          MLX5_EXPANSION_IPV6,
641                          MLX5_EXPANSION_GRE,
642                          MLX5_EXPANSION_NVGRE),
643                 .type = RTE_FLOW_ITEM_TYPE_IPV6,
644                 .rss_types = RTE_ETH_RSS_IPV6 | RTE_ETH_RSS_FRAG_IPV6 |
645                         RTE_ETH_RSS_NONFRAG_IPV6_OTHER,
646         },
647         [MLX5_EXPANSION_OUTER_IPV6_UDP] = {
648                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN,
649                                                   MLX5_EXPANSION_VXLAN_GPE,
650                                                   MLX5_EXPANSION_MPLS,
651                                                   MLX5_EXPANSION_GENEVE,
652                                                   MLX5_EXPANSION_GTP),
653                 .type = RTE_FLOW_ITEM_TYPE_UDP,
654                 .rss_types = RTE_ETH_RSS_NONFRAG_IPV6_UDP,
655         },
656         [MLX5_EXPANSION_OUTER_IPV6_TCP] = {
657                 .type = RTE_FLOW_ITEM_TYPE_TCP,
658                 .rss_types = RTE_ETH_RSS_NONFRAG_IPV6_TCP,
659         },
660         [MLX5_EXPANSION_VXLAN] = {
661                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
662                                                   MLX5_EXPANSION_IPV4,
663                                                   MLX5_EXPANSION_IPV6),
664                 .type = RTE_FLOW_ITEM_TYPE_VXLAN,
665         },
666         [MLX5_EXPANSION_STD_VXLAN] = {
667                         .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH),
668                                         .type = RTE_FLOW_ITEM_TYPE_VXLAN,
669         },
670         [MLX5_EXPANSION_L3_VXLAN] = {
671                         .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
672                                         MLX5_EXPANSION_IPV6),
673                                         .type = RTE_FLOW_ITEM_TYPE_VXLAN,
674         },
675         [MLX5_EXPANSION_VXLAN_GPE] = {
676                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
677                                                   MLX5_EXPANSION_IPV4,
678                                                   MLX5_EXPANSION_IPV6),
679                 .type = RTE_FLOW_ITEM_TYPE_VXLAN_GPE,
680         },
681         [MLX5_EXPANSION_GRE] = {
682                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
683                                                   MLX5_EXPANSION_IPV4,
684                                                   MLX5_EXPANSION_IPV6,
685                                                   MLX5_EXPANSION_GRE_KEY,
686                                                   MLX5_EXPANSION_MPLS),
687                 .type = RTE_FLOW_ITEM_TYPE_GRE,
688         },
689         [MLX5_EXPANSION_GRE_KEY] = {
690                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
691                                                   MLX5_EXPANSION_IPV6,
692                                                   MLX5_EXPANSION_MPLS),
693                 .type = RTE_FLOW_ITEM_TYPE_GRE_KEY,
694                 .node_flags = MLX5_EXPANSION_NODE_OPTIONAL,
695         },
696         [MLX5_EXPANSION_NVGRE] = {
697                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH),
698                 .type = RTE_FLOW_ITEM_TYPE_NVGRE,
699         },
700         [MLX5_EXPANSION_MPLS] = {
701                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
702                                                   MLX5_EXPANSION_IPV6,
703                                                   MLX5_EXPANSION_ETH),
704                 .type = RTE_FLOW_ITEM_TYPE_MPLS,
705                 .node_flags = MLX5_EXPANSION_NODE_OPTIONAL,
706         },
707         [MLX5_EXPANSION_ETH] = {
708                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VLAN),
709                 .type = RTE_FLOW_ITEM_TYPE_ETH,
710         },
711         [MLX5_EXPANSION_VLAN] = {
712                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
713                                                   MLX5_EXPANSION_IPV6),
714                 .type = RTE_FLOW_ITEM_TYPE_VLAN,
715                 .node_flags = MLX5_EXPANSION_NODE_EXPLICIT,
716         },
717         [MLX5_EXPANSION_IPV4] = {
718                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4_UDP,
719                                                   MLX5_EXPANSION_IPV4_TCP),
720                 .type = RTE_FLOW_ITEM_TYPE_IPV4,
721                 .rss_types = RTE_ETH_RSS_IPV4 | RTE_ETH_RSS_FRAG_IPV4 |
722                         RTE_ETH_RSS_NONFRAG_IPV4_OTHER,
723         },
724         [MLX5_EXPANSION_IPV4_UDP] = {
725                 .type = RTE_FLOW_ITEM_TYPE_UDP,
726                 .rss_types = RTE_ETH_RSS_NONFRAG_IPV4_UDP,
727         },
728         [MLX5_EXPANSION_IPV4_TCP] = {
729                 .type = RTE_FLOW_ITEM_TYPE_TCP,
730                 .rss_types = RTE_ETH_RSS_NONFRAG_IPV4_TCP,
731         },
732         [MLX5_EXPANSION_IPV6] = {
733                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV6_UDP,
734                                                   MLX5_EXPANSION_IPV6_TCP,
735                                                   MLX5_EXPANSION_IPV6_FRAG_EXT),
736                 .type = RTE_FLOW_ITEM_TYPE_IPV6,
737                 .rss_types = RTE_ETH_RSS_IPV6 | RTE_ETH_RSS_FRAG_IPV6 |
738                         RTE_ETH_RSS_NONFRAG_IPV6_OTHER,
739         },
740         [MLX5_EXPANSION_IPV6_UDP] = {
741                 .type = RTE_FLOW_ITEM_TYPE_UDP,
742                 .rss_types = RTE_ETH_RSS_NONFRAG_IPV6_UDP,
743         },
744         [MLX5_EXPANSION_IPV6_TCP] = {
745                 .type = RTE_FLOW_ITEM_TYPE_TCP,
746                 .rss_types = RTE_ETH_RSS_NONFRAG_IPV6_TCP,
747         },
748         [MLX5_EXPANSION_IPV6_FRAG_EXT] = {
749                 .type = RTE_FLOW_ITEM_TYPE_IPV6_FRAG_EXT,
750         },
751         [MLX5_EXPANSION_GTP] = {
752                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
753                                                   MLX5_EXPANSION_IPV6),
754                 .type = RTE_FLOW_ITEM_TYPE_GTP,
755         },
756         [MLX5_EXPANSION_GENEVE] = {
757                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
758                                                   MLX5_EXPANSION_IPV4,
759                                                   MLX5_EXPANSION_IPV6),
760                 .type = RTE_FLOW_ITEM_TYPE_GENEVE,
761         },
762 };
763
764 static struct rte_flow_action_handle *
765 mlx5_action_handle_create(struct rte_eth_dev *dev,
766                           const struct rte_flow_indir_action_conf *conf,
767                           const struct rte_flow_action *action,
768                           struct rte_flow_error *error);
769 static int mlx5_action_handle_destroy
770                                 (struct rte_eth_dev *dev,
771                                  struct rte_flow_action_handle *handle,
772                                  struct rte_flow_error *error);
773 static int mlx5_action_handle_update
774                                 (struct rte_eth_dev *dev,
775                                  struct rte_flow_action_handle *handle,
776                                  const void *update,
777                                  struct rte_flow_error *error);
778 static int mlx5_action_handle_query
779                                 (struct rte_eth_dev *dev,
780                                  const struct rte_flow_action_handle *handle,
781                                  void *data,
782                                  struct rte_flow_error *error);
783 static int
784 mlx5_flow_tunnel_decap_set(struct rte_eth_dev *dev,
785                     struct rte_flow_tunnel *app_tunnel,
786                     struct rte_flow_action **actions,
787                     uint32_t *num_of_actions,
788                     struct rte_flow_error *error);
789 static int
790 mlx5_flow_tunnel_match(struct rte_eth_dev *dev,
791                        struct rte_flow_tunnel *app_tunnel,
792                        struct rte_flow_item **items,
793                        uint32_t *num_of_items,
794                        struct rte_flow_error *error);
795 static int
796 mlx5_flow_tunnel_item_release(struct rte_eth_dev *dev,
797                               struct rte_flow_item *pmd_items,
798                               uint32_t num_items, struct rte_flow_error *err);
799 static int
800 mlx5_flow_tunnel_action_release(struct rte_eth_dev *dev,
801                                 struct rte_flow_action *pmd_actions,
802                                 uint32_t num_actions,
803                                 struct rte_flow_error *err);
804 static int
805 mlx5_flow_tunnel_get_restore_info(struct rte_eth_dev *dev,
806                                   struct rte_mbuf *m,
807                                   struct rte_flow_restore_info *info,
808                                   struct rte_flow_error *err);
809 static struct rte_flow_item_flex_handle *
810 mlx5_flow_flex_item_create(struct rte_eth_dev *dev,
811                            const struct rte_flow_item_flex_conf *conf,
812                            struct rte_flow_error *error);
813 static int
814 mlx5_flow_flex_item_release(struct rte_eth_dev *dev,
815                             const struct rte_flow_item_flex_handle *handle,
816                             struct rte_flow_error *error);
817 static int
818 mlx5_flow_info_get(struct rte_eth_dev *dev,
819                    struct rte_flow_port_info *port_info,
820                    struct rte_flow_queue_info *queue_info,
821                    struct rte_flow_error *error);
822 static int
823 mlx5_flow_port_configure(struct rte_eth_dev *dev,
824                          const struct rte_flow_port_attr *port_attr,
825                          uint16_t nb_queue,
826                          const struct rte_flow_queue_attr *queue_attr[],
827                          struct rte_flow_error *err);
828
829 static struct rte_flow_pattern_template *
830 mlx5_flow_pattern_template_create(struct rte_eth_dev *dev,
831                 const struct rte_flow_pattern_template_attr *attr,
832                 const struct rte_flow_item items[],
833                 struct rte_flow_error *error);
834
835 static int
836 mlx5_flow_pattern_template_destroy(struct rte_eth_dev *dev,
837                                    struct rte_flow_pattern_template *template,
838                                    struct rte_flow_error *error);
839 static struct rte_flow_actions_template *
840 mlx5_flow_actions_template_create(struct rte_eth_dev *dev,
841                         const struct rte_flow_actions_template_attr *attr,
842                         const struct rte_flow_action actions[],
843                         const struct rte_flow_action masks[],
844                         struct rte_flow_error *error);
845 static int
846 mlx5_flow_actions_template_destroy(struct rte_eth_dev *dev,
847                                    struct rte_flow_actions_template *template,
848                                    struct rte_flow_error *error);
849
850 static struct rte_flow_template_table *
851 mlx5_flow_table_create(struct rte_eth_dev *dev,
852                        const struct rte_flow_template_table_attr *attr,
853                        struct rte_flow_pattern_template *item_templates[],
854                        uint8_t nb_item_templates,
855                        struct rte_flow_actions_template *action_templates[],
856                        uint8_t nb_action_templates,
857                        struct rte_flow_error *error);
858 static int
859 mlx5_flow_table_destroy(struct rte_eth_dev *dev,
860                         struct rte_flow_template_table *table,
861                         struct rte_flow_error *error);
862 static struct rte_flow *
863 mlx5_flow_async_flow_create(struct rte_eth_dev *dev,
864                             uint32_t queue,
865                             const struct rte_flow_op_attr *attr,
866                             struct rte_flow_template_table *table,
867                             const struct rte_flow_item items[],
868                             uint8_t pattern_template_index,
869                             const struct rte_flow_action actions[],
870                             uint8_t action_template_index,
871                             void *user_data,
872                             struct rte_flow_error *error);
873 static int
874 mlx5_flow_async_flow_destroy(struct rte_eth_dev *dev,
875                              uint32_t queue,
876                              const struct rte_flow_op_attr *attr,
877                              struct rte_flow *flow,
878                              void *user_data,
879                              struct rte_flow_error *error);
880 static int
881 mlx5_flow_pull(struct rte_eth_dev *dev,
882                uint32_t queue,
883                struct rte_flow_op_result res[],
884                uint16_t n_res,
885                struct rte_flow_error *error);
886 static int
887 mlx5_flow_push(struct rte_eth_dev *dev,
888                uint32_t queue,
889                struct rte_flow_error *error);
890
891 static struct rte_flow_action_handle *
892 mlx5_flow_async_action_handle_create(struct rte_eth_dev *dev, uint32_t queue,
893                                  const struct rte_flow_op_attr *attr,
894                                  const struct rte_flow_indir_action_conf *conf,
895                                  const struct rte_flow_action *action,
896                                  void *user_data,
897                                  struct rte_flow_error *error);
898
899 static int
900 mlx5_flow_async_action_handle_update(struct rte_eth_dev *dev, uint32_t queue,
901                                  const struct rte_flow_op_attr *attr,
902                                  struct rte_flow_action_handle *handle,
903                                  const void *update,
904                                  void *user_data,
905                                  struct rte_flow_error *error);
906
907 static int
908 mlx5_flow_async_action_handle_destroy(struct rte_eth_dev *dev, uint32_t queue,
909                                   const struct rte_flow_op_attr *attr,
910                                   struct rte_flow_action_handle *handle,
911                                   void *user_data,
912                                   struct rte_flow_error *error);
913
914 static const struct rte_flow_ops mlx5_flow_ops = {
915         .validate = mlx5_flow_validate,
916         .create = mlx5_flow_create,
917         .destroy = mlx5_flow_destroy,
918         .flush = mlx5_flow_flush,
919         .isolate = mlx5_flow_isolate,
920         .query = mlx5_flow_query,
921         .dev_dump = mlx5_flow_dev_dump,
922         .get_aged_flows = mlx5_flow_get_aged_flows,
923         .action_handle_create = mlx5_action_handle_create,
924         .action_handle_destroy = mlx5_action_handle_destroy,
925         .action_handle_update = mlx5_action_handle_update,
926         .action_handle_query = mlx5_action_handle_query,
927         .tunnel_decap_set = mlx5_flow_tunnel_decap_set,
928         .tunnel_match = mlx5_flow_tunnel_match,
929         .tunnel_action_decap_release = mlx5_flow_tunnel_action_release,
930         .tunnel_item_release = mlx5_flow_tunnel_item_release,
931         .get_restore_info = mlx5_flow_tunnel_get_restore_info,
932         .flex_item_create = mlx5_flow_flex_item_create,
933         .flex_item_release = mlx5_flow_flex_item_release,
934         .info_get = mlx5_flow_info_get,
935         .configure = mlx5_flow_port_configure,
936         .pattern_template_create = mlx5_flow_pattern_template_create,
937         .pattern_template_destroy = mlx5_flow_pattern_template_destroy,
938         .actions_template_create = mlx5_flow_actions_template_create,
939         .actions_template_destroy = mlx5_flow_actions_template_destroy,
940         .template_table_create = mlx5_flow_table_create,
941         .template_table_destroy = mlx5_flow_table_destroy,
942         .async_create = mlx5_flow_async_flow_create,
943         .async_destroy = mlx5_flow_async_flow_destroy,
944         .pull = mlx5_flow_pull,
945         .push = mlx5_flow_push,
946         .async_action_handle_create = mlx5_flow_async_action_handle_create,
947         .async_action_handle_update = mlx5_flow_async_action_handle_update,
948         .async_action_handle_destroy = mlx5_flow_async_action_handle_destroy,
949 };
950
951 /* Tunnel information. */
952 struct mlx5_flow_tunnel_info {
953         uint64_t tunnel; /**< Tunnel bit (see MLX5_FLOW_*). */
954         uint32_t ptype; /**< Tunnel Ptype (see RTE_PTYPE_*). */
955 };
956
957 static struct mlx5_flow_tunnel_info tunnels_info[] = {
958         {
959                 .tunnel = MLX5_FLOW_LAYER_VXLAN,
960                 .ptype = RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L4_UDP,
961         },
962         {
963                 .tunnel = MLX5_FLOW_LAYER_GENEVE,
964                 .ptype = RTE_PTYPE_TUNNEL_GENEVE | RTE_PTYPE_L4_UDP,
965         },
966         {
967                 .tunnel = MLX5_FLOW_LAYER_VXLAN_GPE,
968                 .ptype = RTE_PTYPE_TUNNEL_VXLAN_GPE | RTE_PTYPE_L4_UDP,
969         },
970         {
971                 .tunnel = MLX5_FLOW_LAYER_GRE,
972                 .ptype = RTE_PTYPE_TUNNEL_GRE,
973         },
974         {
975                 .tunnel = MLX5_FLOW_LAYER_MPLS | MLX5_FLOW_LAYER_OUTER_L4_UDP,
976                 .ptype = RTE_PTYPE_TUNNEL_MPLS_IN_UDP | RTE_PTYPE_L4_UDP,
977         },
978         {
979                 .tunnel = MLX5_FLOW_LAYER_MPLS,
980                 .ptype = RTE_PTYPE_TUNNEL_MPLS_IN_GRE,
981         },
982         {
983                 .tunnel = MLX5_FLOW_LAYER_NVGRE,
984                 .ptype = RTE_PTYPE_TUNNEL_NVGRE,
985         },
986         {
987                 .tunnel = MLX5_FLOW_LAYER_IPIP,
988                 .ptype = RTE_PTYPE_TUNNEL_IP,
989         },
990         {
991                 .tunnel = MLX5_FLOW_LAYER_IPV6_ENCAP,
992                 .ptype = RTE_PTYPE_TUNNEL_IP,
993         },
994         {
995                 .tunnel = MLX5_FLOW_LAYER_GTP,
996                 .ptype = RTE_PTYPE_TUNNEL_GTPU,
997         },
998 };
999
1000
1001
1002 /**
1003  * Translate tag ID to register.
1004  *
1005  * @param[in] dev
1006  *   Pointer to the Ethernet device structure.
1007  * @param[in] feature
1008  *   The feature that request the register.
1009  * @param[in] id
1010  *   The request register ID.
1011  * @param[out] error
1012  *   Error description in case of any.
1013  *
1014  * @return
1015  *   The request register on success, a negative errno
1016  *   value otherwise and rte_errno is set.
1017  */
1018 int
1019 mlx5_flow_get_reg_id(struct rte_eth_dev *dev,
1020                      enum mlx5_feature_name feature,
1021                      uint32_t id,
1022                      struct rte_flow_error *error)
1023 {
1024         struct mlx5_priv *priv = dev->data->dev_private;
1025         struct mlx5_sh_config *config = &priv->sh->config;
1026         enum modify_reg start_reg;
1027         bool skip_mtr_reg = false;
1028
1029         switch (feature) {
1030         case MLX5_HAIRPIN_RX:
1031                 return REG_B;
1032         case MLX5_HAIRPIN_TX:
1033                 return REG_A;
1034         case MLX5_METADATA_RX:
1035                 switch (config->dv_xmeta_en) {
1036                 case MLX5_XMETA_MODE_LEGACY:
1037                         return REG_B;
1038                 case MLX5_XMETA_MODE_META16:
1039                         return REG_C_0;
1040                 case MLX5_XMETA_MODE_META32:
1041                         return REG_C_1;
1042                 }
1043                 break;
1044         case MLX5_METADATA_TX:
1045                 return REG_A;
1046         case MLX5_METADATA_FDB:
1047                 switch (config->dv_xmeta_en) {
1048                 case MLX5_XMETA_MODE_LEGACY:
1049                         return REG_NON;
1050                 case MLX5_XMETA_MODE_META16:
1051                         return REG_C_0;
1052                 case MLX5_XMETA_MODE_META32:
1053                         return REG_C_1;
1054                 }
1055                 break;
1056         case MLX5_FLOW_MARK:
1057                 switch (config->dv_xmeta_en) {
1058                 case MLX5_XMETA_MODE_LEGACY:
1059                         return REG_NON;
1060                 case MLX5_XMETA_MODE_META16:
1061                         return REG_C_1;
1062                 case MLX5_XMETA_MODE_META32:
1063                         return REG_C_0;
1064                 }
1065                 break;
1066         case MLX5_MTR_ID:
1067                 /*
1068                  * If meter color and meter id share one register, flow match
1069                  * should use the meter color register for match.
1070                  */
1071                 if (priv->mtr_reg_share)
1072                         return priv->mtr_color_reg;
1073                 else
1074                         return priv->mtr_color_reg != REG_C_2 ? REG_C_2 :
1075                                REG_C_3;
1076         case MLX5_MTR_COLOR:
1077         case MLX5_ASO_FLOW_HIT:
1078         case MLX5_ASO_CONNTRACK:
1079         case MLX5_SAMPLE_ID:
1080                 /* All features use the same REG_C. */
1081                 MLX5_ASSERT(priv->mtr_color_reg != REG_NON);
1082                 return priv->mtr_color_reg;
1083         case MLX5_COPY_MARK:
1084                 /*
1085                  * Metadata COPY_MARK register using is in meter suffix sub
1086                  * flow while with meter. It's safe to share the same register.
1087                  */
1088                 return priv->mtr_color_reg != REG_C_2 ? REG_C_2 : REG_C_3;
1089         case MLX5_APP_TAG:
1090                 /*
1091                  * If meter is enable, it will engage the register for color
1092                  * match and flow match. If meter color match is not using the
1093                  * REG_C_2, need to skip the REG_C_x be used by meter color
1094                  * match.
1095                  * If meter is disable, free to use all available registers.
1096                  */
1097                 start_reg = priv->mtr_color_reg != REG_C_2 ? REG_C_2 :
1098                             (priv->mtr_reg_share ? REG_C_3 : REG_C_4);
1099                 skip_mtr_reg = !!(priv->mtr_en && start_reg == REG_C_2);
1100                 if (id > (uint32_t)(REG_C_7 - start_reg))
1101                         return rte_flow_error_set(error, EINVAL,
1102                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1103                                                   NULL, "invalid tag id");
1104                 if (priv->sh->flow_mreg_c[id + start_reg - REG_C_0] == REG_NON)
1105                         return rte_flow_error_set(error, ENOTSUP,
1106                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1107                                                   NULL, "unsupported tag id");
1108                 /*
1109                  * This case means meter is using the REG_C_x great than 2.
1110                  * Take care not to conflict with meter color REG_C_x.
1111                  * If the available index REG_C_y >= REG_C_x, skip the
1112                  * color register.
1113                  */
1114                 if (skip_mtr_reg && priv->sh->flow_mreg_c
1115                     [id + start_reg - REG_C_0] >= priv->mtr_color_reg) {
1116                         if (id >= (uint32_t)(REG_C_7 - start_reg))
1117                                 return rte_flow_error_set(error, EINVAL,
1118                                                        RTE_FLOW_ERROR_TYPE_ITEM,
1119                                                         NULL, "invalid tag id");
1120                         if (priv->sh->flow_mreg_c
1121                             [id + 1 + start_reg - REG_C_0] != REG_NON)
1122                                 return priv->sh->flow_mreg_c
1123                                                [id + 1 + start_reg - REG_C_0];
1124                         return rte_flow_error_set(error, ENOTSUP,
1125                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1126                                                   NULL, "unsupported tag id");
1127                 }
1128                 return priv->sh->flow_mreg_c[id + start_reg - REG_C_0];
1129         }
1130         MLX5_ASSERT(false);
1131         return rte_flow_error_set(error, EINVAL,
1132                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1133                                   NULL, "invalid feature name");
1134 }
1135
1136 /**
1137  * Check extensive flow metadata register support.
1138  *
1139  * @param dev
1140  *   Pointer to rte_eth_dev structure.
1141  *
1142  * @return
1143  *   True if device supports extensive flow metadata register, otherwise false.
1144  */
1145 bool
1146 mlx5_flow_ext_mreg_supported(struct rte_eth_dev *dev)
1147 {
1148         struct mlx5_priv *priv = dev->data->dev_private;
1149
1150         /*
1151          * Having available reg_c can be regarded inclusively as supporting
1152          * extensive flow metadata register, which could mean,
1153          * - metadata register copy action by modify header.
1154          * - 16 modify header actions is supported.
1155          * - reg_c's are preserved across different domain (FDB and NIC) on
1156          *   packet loopback by flow lookup miss.
1157          */
1158         return priv->sh->flow_mreg_c[2] != REG_NON;
1159 }
1160
1161 /**
1162  * Get the lowest priority.
1163  *
1164  * @param[in] dev
1165  *   Pointer to the Ethernet device structure.
1166  * @param[in] attributes
1167  *   Pointer to device flow rule attributes.
1168  *
1169  * @return
1170  *   The value of lowest priority of flow.
1171  */
1172 uint32_t
1173 mlx5_get_lowest_priority(struct rte_eth_dev *dev,
1174                           const struct rte_flow_attr *attr)
1175 {
1176         struct mlx5_priv *priv = dev->data->dev_private;
1177
1178         if (!attr->group && !attr->transfer)
1179                 return priv->sh->flow_max_priority - 2;
1180         return MLX5_NON_ROOT_FLOW_MAX_PRIO - 1;
1181 }
1182
1183 /**
1184  * Calculate matcher priority of the flow.
1185  *
1186  * @param[in] dev
1187  *   Pointer to the Ethernet device structure.
1188  * @param[in] attr
1189  *   Pointer to device flow rule attributes.
1190  * @param[in] subpriority
1191  *   The priority based on the items.
1192  * @param[in] external
1193  *   Flow is user flow.
1194  * @return
1195  *   The matcher priority of the flow.
1196  */
1197 uint16_t
1198 mlx5_get_matcher_priority(struct rte_eth_dev *dev,
1199                           const struct rte_flow_attr *attr,
1200                           uint32_t subpriority, bool external)
1201 {
1202         uint16_t priority = (uint16_t)attr->priority;
1203         struct mlx5_priv *priv = dev->data->dev_private;
1204
1205         if (!attr->group && !attr->transfer) {
1206                 if (attr->priority == MLX5_FLOW_LOWEST_PRIO_INDICATOR)
1207                         priority = priv->sh->flow_max_priority - 1;
1208                 return mlx5_os_flow_adjust_priority(dev, priority, subpriority);
1209         } else if (!external && attr->transfer && attr->group == 0 &&
1210                    attr->priority == MLX5_FLOW_LOWEST_PRIO_INDICATOR) {
1211                 return (priv->sh->flow_max_priority - 1) * 3;
1212         }
1213         if (attr->priority == MLX5_FLOW_LOWEST_PRIO_INDICATOR)
1214                 priority = MLX5_NON_ROOT_FLOW_MAX_PRIO;
1215         return priority * 3 + subpriority;
1216 }
1217
1218 /**
1219  * Verify the @p item specifications (spec, last, mask) are compatible with the
1220  * NIC capabilities.
1221  *
1222  * @param[in] item
1223  *   Item specification.
1224  * @param[in] mask
1225  *   @p item->mask or flow default bit-masks.
1226  * @param[in] nic_mask
1227  *   Bit-masks covering supported fields by the NIC to compare with user mask.
1228  * @param[in] size
1229  *   Bit-masks size in bytes.
1230  * @param[in] range_accepted
1231  *   True if range of values is accepted for specific fields, false otherwise.
1232  * @param[out] error
1233  *   Pointer to error structure.
1234  *
1235  * @return
1236  *   0 on success, a negative errno value otherwise and rte_errno is set.
1237  */
1238 int
1239 mlx5_flow_item_acceptable(const struct rte_flow_item *item,
1240                           const uint8_t *mask,
1241                           const uint8_t *nic_mask,
1242                           unsigned int size,
1243                           bool range_accepted,
1244                           struct rte_flow_error *error)
1245 {
1246         unsigned int i;
1247
1248         MLX5_ASSERT(nic_mask);
1249         for (i = 0; i < size; ++i)
1250                 if ((nic_mask[i] | mask[i]) != nic_mask[i])
1251                         return rte_flow_error_set(error, ENOTSUP,
1252                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1253                                                   item,
1254                                                   "mask enables non supported"
1255                                                   " bits");
1256         if (!item->spec && (item->mask || item->last))
1257                 return rte_flow_error_set(error, EINVAL,
1258                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1259                                           "mask/last without a spec is not"
1260                                           " supported");
1261         if (item->spec && item->last && !range_accepted) {
1262                 uint8_t spec[size];
1263                 uint8_t last[size];
1264                 unsigned int i;
1265                 int ret;
1266
1267                 for (i = 0; i < size; ++i) {
1268                         spec[i] = ((const uint8_t *)item->spec)[i] & mask[i];
1269                         last[i] = ((const uint8_t *)item->last)[i] & mask[i];
1270                 }
1271                 ret = memcmp(spec, last, size);
1272                 if (ret != 0)
1273                         return rte_flow_error_set(error, EINVAL,
1274                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1275                                                   item,
1276                                                   "range is not valid");
1277         }
1278         return 0;
1279 }
1280
1281 /**
1282  * Adjust the hash fields according to the @p flow information.
1283  *
1284  * @param[in] dev_flow.
1285  *   Pointer to the mlx5_flow.
1286  * @param[in] tunnel
1287  *   1 when the hash field is for a tunnel item.
1288  * @param[in] layer_types
1289  *   RTE_ETH_RSS_* types.
1290  * @param[in] hash_fields
1291  *   Item hash fields.
1292  *
1293  * @return
1294  *   The hash fields that should be used.
1295  */
1296 uint64_t
1297 mlx5_flow_hashfields_adjust(struct mlx5_flow_rss_desc *rss_desc,
1298                             int tunnel __rte_unused, uint64_t layer_types,
1299                             uint64_t hash_fields)
1300 {
1301 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
1302         int rss_request_inner = rss_desc->level >= 2;
1303
1304         /* Check RSS hash level for tunnel. */
1305         if (tunnel && rss_request_inner)
1306                 hash_fields |= IBV_RX_HASH_INNER;
1307         else if (tunnel || rss_request_inner)
1308                 return 0;
1309 #endif
1310         /* Check if requested layer matches RSS hash fields. */
1311         if (!(rss_desc->types & layer_types))
1312                 return 0;
1313         return hash_fields;
1314 }
1315
1316 /**
1317  * Lookup and set the ptype in the data Rx part.  A single Ptype can be used,
1318  * if several tunnel rules are used on this queue, the tunnel ptype will be
1319  * cleared.
1320  *
1321  * @param rxq_ctrl
1322  *   Rx queue to update.
1323  */
1324 static void
1325 flow_rxq_tunnel_ptype_update(struct mlx5_rxq_ctrl *rxq_ctrl)
1326 {
1327         unsigned int i;
1328         uint32_t tunnel_ptype = 0;
1329
1330         /* Look up for the ptype to use. */
1331         for (i = 0; i != MLX5_FLOW_TUNNEL; ++i) {
1332                 if (!rxq_ctrl->flow_tunnels_n[i])
1333                         continue;
1334                 if (!tunnel_ptype) {
1335                         tunnel_ptype = tunnels_info[i].ptype;
1336                 } else {
1337                         tunnel_ptype = 0;
1338                         break;
1339                 }
1340         }
1341         rxq_ctrl->rxq.tunnel = tunnel_ptype;
1342 }
1343
1344 /**
1345  * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) according to the device
1346  * flow.
1347  *
1348  * @param[in] dev
1349  *   Pointer to the Ethernet device structure.
1350  * @param[in] dev_handle
1351  *   Pointer to device flow handle structure.
1352  */
1353 void
1354 flow_drv_rxq_flags_set(struct rte_eth_dev *dev,
1355                        struct mlx5_flow_handle *dev_handle)
1356 {
1357         struct mlx5_priv *priv = dev->data->dev_private;
1358         const int tunnel = !!(dev_handle->layers & MLX5_FLOW_LAYER_TUNNEL);
1359         struct mlx5_ind_table_obj *ind_tbl = NULL;
1360         unsigned int i;
1361
1362         if (dev_handle->fate_action == MLX5_FLOW_FATE_QUEUE) {
1363                 struct mlx5_hrxq *hrxq;
1364
1365                 hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ],
1366                               dev_handle->rix_hrxq);
1367                 if (hrxq)
1368                         ind_tbl = hrxq->ind_table;
1369         } else if (dev_handle->fate_action == MLX5_FLOW_FATE_SHARED_RSS) {
1370                 struct mlx5_shared_action_rss *shared_rss;
1371
1372                 shared_rss = mlx5_ipool_get
1373                         (priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS],
1374                          dev_handle->rix_srss);
1375                 if (shared_rss)
1376                         ind_tbl = shared_rss->ind_tbl;
1377         }
1378         if (!ind_tbl)
1379                 return;
1380         for (i = 0; i != ind_tbl->queues_n; ++i) {
1381                 int idx = ind_tbl->queues[i];
1382                 struct mlx5_rxq_ctrl *rxq_ctrl;
1383
1384                 if (mlx5_is_external_rxq(dev, idx))
1385                         continue;
1386                 rxq_ctrl = mlx5_rxq_ctrl_get(dev, idx);
1387                 MLX5_ASSERT(rxq_ctrl != NULL);
1388                 if (rxq_ctrl == NULL)
1389                         continue;
1390                 /*
1391                  * To support metadata register copy on Tx loopback,
1392                  * this must be always enabled (metadata may arive
1393                  * from other port - not from local flows only.
1394                  */
1395                 if (tunnel) {
1396                         unsigned int j;
1397
1398                         /* Increase the counter matching the flow. */
1399                         for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) {
1400                                 if ((tunnels_info[j].tunnel &
1401                                      dev_handle->layers) ==
1402                                     tunnels_info[j].tunnel) {
1403                                         rxq_ctrl->flow_tunnels_n[j]++;
1404                                         break;
1405                                 }
1406                         }
1407                         flow_rxq_tunnel_ptype_update(rxq_ctrl);
1408                 }
1409         }
1410 }
1411
1412 static void
1413 flow_rxq_mark_flag_set(struct rte_eth_dev *dev)
1414 {
1415         struct mlx5_priv *priv = dev->data->dev_private;
1416         struct mlx5_rxq_ctrl *rxq_ctrl;
1417
1418         if (priv->mark_enabled)
1419                 return;
1420         LIST_FOREACH(rxq_ctrl, &priv->rxqsctrl, next) {
1421                 rxq_ctrl->rxq.mark = 1;
1422         }
1423         priv->mark_enabled = 1;
1424 }
1425
1426 /**
1427  * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) for a flow
1428  *
1429  * @param[in] dev
1430  *   Pointer to the Ethernet device structure.
1431  * @param[in] flow
1432  *   Pointer to flow structure.
1433  */
1434 static void
1435 flow_rxq_flags_set(struct rte_eth_dev *dev, struct rte_flow *flow)
1436 {
1437         struct mlx5_priv *priv = dev->data->dev_private;
1438         uint32_t handle_idx;
1439         struct mlx5_flow_handle *dev_handle;
1440         struct mlx5_flow_workspace *wks = mlx5_flow_get_thread_workspace();
1441
1442         MLX5_ASSERT(wks);
1443         if (wks->mark)
1444                 flow_rxq_mark_flag_set(dev);
1445         SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles,
1446                        handle_idx, dev_handle, next)
1447                 flow_drv_rxq_flags_set(dev, dev_handle);
1448 }
1449
1450 /**
1451  * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the
1452  * device flow if no other flow uses it with the same kind of request.
1453  *
1454  * @param dev
1455  *   Pointer to Ethernet device.
1456  * @param[in] dev_handle
1457  *   Pointer to the device flow handle structure.
1458  */
1459 static void
1460 flow_drv_rxq_flags_trim(struct rte_eth_dev *dev,
1461                         struct mlx5_flow_handle *dev_handle)
1462 {
1463         struct mlx5_priv *priv = dev->data->dev_private;
1464         const int tunnel = !!(dev_handle->layers & MLX5_FLOW_LAYER_TUNNEL);
1465         struct mlx5_ind_table_obj *ind_tbl = NULL;
1466         unsigned int i;
1467
1468         if (dev_handle->fate_action == MLX5_FLOW_FATE_QUEUE) {
1469                 struct mlx5_hrxq *hrxq;
1470
1471                 hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ],
1472                               dev_handle->rix_hrxq);
1473                 if (hrxq)
1474                         ind_tbl = hrxq->ind_table;
1475         } else if (dev_handle->fate_action == MLX5_FLOW_FATE_SHARED_RSS) {
1476                 struct mlx5_shared_action_rss *shared_rss;
1477
1478                 shared_rss = mlx5_ipool_get
1479                         (priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS],
1480                          dev_handle->rix_srss);
1481                 if (shared_rss)
1482                         ind_tbl = shared_rss->ind_tbl;
1483         }
1484         if (!ind_tbl)
1485                 return;
1486         MLX5_ASSERT(dev->data->dev_started);
1487         for (i = 0; i != ind_tbl->queues_n; ++i) {
1488                 int idx = ind_tbl->queues[i];
1489                 struct mlx5_rxq_ctrl *rxq_ctrl;
1490
1491                 if (mlx5_is_external_rxq(dev, idx))
1492                         continue;
1493                 rxq_ctrl = mlx5_rxq_ctrl_get(dev, idx);
1494                 MLX5_ASSERT(rxq_ctrl != NULL);
1495                 if (rxq_ctrl == NULL)
1496                         continue;
1497                 if (tunnel) {
1498                         unsigned int j;
1499
1500                         /* Decrease the counter matching the flow. */
1501                         for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) {
1502                                 if ((tunnels_info[j].tunnel &
1503                                      dev_handle->layers) ==
1504                                     tunnels_info[j].tunnel) {
1505                                         rxq_ctrl->flow_tunnels_n[j]--;
1506                                         break;
1507                                 }
1508                         }
1509                         flow_rxq_tunnel_ptype_update(rxq_ctrl);
1510                 }
1511         }
1512 }
1513
1514 /**
1515  * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the
1516  * @p flow if no other flow uses it with the same kind of request.
1517  *
1518  * @param dev
1519  *   Pointer to Ethernet device.
1520  * @param[in] flow
1521  *   Pointer to the flow.
1522  */
1523 static void
1524 flow_rxq_flags_trim(struct rte_eth_dev *dev, struct rte_flow *flow)
1525 {
1526         struct mlx5_priv *priv = dev->data->dev_private;
1527         uint32_t handle_idx;
1528         struct mlx5_flow_handle *dev_handle;
1529
1530         SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles,
1531                        handle_idx, dev_handle, next)
1532                 flow_drv_rxq_flags_trim(dev, dev_handle);
1533 }
1534
1535 /**
1536  * Clear the Mark/Flag and Tunnel ptype information in all Rx queues.
1537  *
1538  * @param dev
1539  *   Pointer to Ethernet device.
1540  */
1541 static void
1542 flow_rxq_flags_clear(struct rte_eth_dev *dev)
1543 {
1544         struct mlx5_priv *priv = dev->data->dev_private;
1545         unsigned int i;
1546
1547         for (i = 0; i != priv->rxqs_n; ++i) {
1548                 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, i);
1549                 unsigned int j;
1550
1551                 if (rxq == NULL || rxq->ctrl == NULL)
1552                         continue;
1553                 rxq->ctrl->rxq.mark = 0;
1554                 for (j = 0; j != MLX5_FLOW_TUNNEL; ++j)
1555                         rxq->ctrl->flow_tunnels_n[j] = 0;
1556                 rxq->ctrl->rxq.tunnel = 0;
1557         }
1558         priv->mark_enabled = 0;
1559 }
1560
1561 /**
1562  * Set the Rx queue dynamic metadata (mask and offset) for a flow
1563  *
1564  * @param[in] dev
1565  *   Pointer to the Ethernet device structure.
1566  */
1567 void
1568 mlx5_flow_rxq_dynf_metadata_set(struct rte_eth_dev *dev)
1569 {
1570         struct mlx5_priv *priv = dev->data->dev_private;
1571         unsigned int i;
1572
1573         for (i = 0; i != priv->rxqs_n; ++i) {
1574                 struct mlx5_rxq_priv *rxq = mlx5_rxq_get(dev, i);
1575                 struct mlx5_rxq_data *data;
1576
1577                 if (rxq == NULL || rxq->ctrl == NULL)
1578                         continue;
1579                 data = &rxq->ctrl->rxq;
1580                 if (!rte_flow_dynf_metadata_avail()) {
1581                         data->dynf_meta = 0;
1582                         data->flow_meta_mask = 0;
1583                         data->flow_meta_offset = -1;
1584                         data->flow_meta_port_mask = 0;
1585                 } else {
1586                         data->dynf_meta = 1;
1587                         data->flow_meta_mask = rte_flow_dynf_metadata_mask;
1588                         data->flow_meta_offset = rte_flow_dynf_metadata_offs;
1589                         data->flow_meta_port_mask = priv->sh->dv_meta_mask;
1590                 }
1591         }
1592 }
1593
1594 /*
1595  * return a pointer to the desired action in the list of actions.
1596  *
1597  * @param[in] actions
1598  *   The list of actions to search the action in.
1599  * @param[in] action
1600  *   The action to find.
1601  *
1602  * @return
1603  *   Pointer to the action in the list, if found. NULL otherwise.
1604  */
1605 const struct rte_flow_action *
1606 mlx5_flow_find_action(const struct rte_flow_action *actions,
1607                       enum rte_flow_action_type action)
1608 {
1609         if (actions == NULL)
1610                 return NULL;
1611         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++)
1612                 if (actions->type == action)
1613                         return actions;
1614         return NULL;
1615 }
1616
1617 /*
1618  * Validate the flag action.
1619  *
1620  * @param[in] action_flags
1621  *   Bit-fields that holds the actions detected until now.
1622  * @param[in] attr
1623  *   Attributes of flow that includes this action.
1624  * @param[out] error
1625  *   Pointer to error structure.
1626  *
1627  * @return
1628  *   0 on success, a negative errno value otherwise and rte_errno is set.
1629  */
1630 int
1631 mlx5_flow_validate_action_flag(uint64_t action_flags,
1632                                const struct rte_flow_attr *attr,
1633                                struct rte_flow_error *error)
1634 {
1635         if (action_flags & MLX5_FLOW_ACTION_MARK)
1636                 return rte_flow_error_set(error, EINVAL,
1637                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1638                                           "can't mark and flag in same flow");
1639         if (action_flags & MLX5_FLOW_ACTION_FLAG)
1640                 return rte_flow_error_set(error, EINVAL,
1641                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1642                                           "can't have 2 flag"
1643                                           " actions in same flow");
1644         if (attr->egress)
1645                 return rte_flow_error_set(error, ENOTSUP,
1646                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1647                                           "flag action not supported for "
1648                                           "egress");
1649         return 0;
1650 }
1651
1652 /*
1653  * Validate the mark action.
1654  *
1655  * @param[in] action
1656  *   Pointer to the queue action.
1657  * @param[in] action_flags
1658  *   Bit-fields that holds the actions detected until now.
1659  * @param[in] attr
1660  *   Attributes of flow that includes this action.
1661  * @param[out] error
1662  *   Pointer to error structure.
1663  *
1664  * @return
1665  *   0 on success, a negative errno value otherwise and rte_errno is set.
1666  */
1667 int
1668 mlx5_flow_validate_action_mark(const struct rte_flow_action *action,
1669                                uint64_t action_flags,
1670                                const struct rte_flow_attr *attr,
1671                                struct rte_flow_error *error)
1672 {
1673         const struct rte_flow_action_mark *mark = action->conf;
1674
1675         if (!mark)
1676                 return rte_flow_error_set(error, EINVAL,
1677                                           RTE_FLOW_ERROR_TYPE_ACTION,
1678                                           action,
1679                                           "configuration cannot be null");
1680         if (mark->id >= MLX5_FLOW_MARK_MAX)
1681                 return rte_flow_error_set(error, EINVAL,
1682                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1683                                           &mark->id,
1684                                           "mark id must in 0 <= id < "
1685                                           RTE_STR(MLX5_FLOW_MARK_MAX));
1686         if (action_flags & MLX5_FLOW_ACTION_FLAG)
1687                 return rte_flow_error_set(error, EINVAL,
1688                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1689                                           "can't flag and mark in same flow");
1690         if (action_flags & MLX5_FLOW_ACTION_MARK)
1691                 return rte_flow_error_set(error, EINVAL,
1692                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1693                                           "can't have 2 mark actions in same"
1694                                           " flow");
1695         if (attr->egress)
1696                 return rte_flow_error_set(error, ENOTSUP,
1697                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1698                                           "mark action not supported for "
1699                                           "egress");
1700         return 0;
1701 }
1702
1703 /*
1704  * Validate the drop action.
1705  *
1706  * @param[in] action_flags
1707  *   Bit-fields that holds the actions detected until now.
1708  * @param[in] attr
1709  *   Attributes of flow that includes this action.
1710  * @param[out] error
1711  *   Pointer to error structure.
1712  *
1713  * @return
1714  *   0 on success, a negative errno value otherwise and rte_errno is set.
1715  */
1716 int
1717 mlx5_flow_validate_action_drop(uint64_t action_flags __rte_unused,
1718                                const struct rte_flow_attr *attr,
1719                                struct rte_flow_error *error)
1720 {
1721         if (attr->egress)
1722                 return rte_flow_error_set(error, ENOTSUP,
1723                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1724                                           "drop action not supported for "
1725                                           "egress");
1726         return 0;
1727 }
1728
1729 /*
1730  * Validate the queue action.
1731  *
1732  * @param[in] action
1733  *   Pointer to the queue action.
1734  * @param[in] action_flags
1735  *   Bit-fields that holds the actions detected until now.
1736  * @param[in] dev
1737  *   Pointer to the Ethernet device structure.
1738  * @param[in] attr
1739  *   Attributes of flow that includes this action.
1740  * @param[out] error
1741  *   Pointer to error structure.
1742  *
1743  * @return
1744  *   0 on success, a negative errno value otherwise and rte_errno is set.
1745  */
1746 int
1747 mlx5_flow_validate_action_queue(const struct rte_flow_action *action,
1748                                 uint64_t action_flags,
1749                                 struct rte_eth_dev *dev,
1750                                 const struct rte_flow_attr *attr,
1751                                 struct rte_flow_error *error)
1752 {
1753         struct mlx5_priv *priv = dev->data->dev_private;
1754         const struct rte_flow_action_queue *queue = action->conf;
1755
1756         if (action_flags & MLX5_FLOW_FATE_ACTIONS)
1757                 return rte_flow_error_set(error, EINVAL,
1758                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1759                                           "can't have 2 fate actions in"
1760                                           " same flow");
1761         if (attr->egress)
1762                 return rte_flow_error_set(error, ENOTSUP,
1763                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1764                                           "queue action not supported for egress.");
1765         if (mlx5_is_external_rxq(dev, queue->index))
1766                 return 0;
1767         if (!priv->rxqs_n)
1768                 return rte_flow_error_set(error, EINVAL,
1769                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1770                                           NULL, "No Rx queues configured");
1771         if (queue->index >= priv->rxqs_n)
1772                 return rte_flow_error_set(error, EINVAL,
1773                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1774                                           &queue->index,
1775                                           "queue index out of range");
1776         if (mlx5_rxq_get(dev, queue->index) == NULL)
1777                 return rte_flow_error_set(error, EINVAL,
1778                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1779                                           &queue->index,
1780                                           "queue is not configured");
1781         return 0;
1782 }
1783
1784 /**
1785  * Validate queue numbers for device RSS.
1786  *
1787  * @param[in] dev
1788  *   Configured device.
1789  * @param[in] queues
1790  *   Array of queue numbers.
1791  * @param[in] queues_n
1792  *   Size of the @p queues array.
1793  * @param[out] error
1794  *   On error, filled with a textual error description.
1795  * @param[out] queue_idx
1796  *   On error, filled with an offending queue index in @p queues array.
1797  *
1798  * @return
1799  *   0 on success, a negative errno code on error.
1800  */
1801 static int
1802 mlx5_validate_rss_queues(struct rte_eth_dev *dev,
1803                          const uint16_t *queues, uint32_t queues_n,
1804                          const char **error, uint32_t *queue_idx)
1805 {
1806         const struct mlx5_priv *priv = dev->data->dev_private;
1807         bool is_hairpin = false;
1808         bool is_ext_rss = false;
1809         uint32_t i;
1810
1811         for (i = 0; i != queues_n; ++i) {
1812                 struct mlx5_rxq_ctrl *rxq_ctrl;
1813
1814                 if (mlx5_is_external_rxq(dev, queues[0])) {
1815                         is_ext_rss = true;
1816                         continue;
1817                 }
1818                 if (is_ext_rss) {
1819                         *error = "Combining external and regular RSS queues is not supported";
1820                         *queue_idx = i;
1821                         return -ENOTSUP;
1822                 }
1823                 if (queues[i] >= priv->rxqs_n) {
1824                         *error = "queue index out of range";
1825                         *queue_idx = i;
1826                         return -EINVAL;
1827                 }
1828                 rxq_ctrl = mlx5_rxq_ctrl_get(dev, queues[i]);
1829                 if (rxq_ctrl == NULL) {
1830                         *error =  "queue is not configured";
1831                         *queue_idx = i;
1832                         return -EINVAL;
1833                 }
1834                 if (i == 0 && rxq_ctrl->is_hairpin)
1835                         is_hairpin = true;
1836                 if (is_hairpin != rxq_ctrl->is_hairpin) {
1837                         *error = "combining hairpin and regular RSS queues is not supported";
1838                         *queue_idx = i;
1839                         return -ENOTSUP;
1840                 }
1841         }
1842         return 0;
1843 }
1844
1845 /*
1846  * Validate the rss action.
1847  *
1848  * @param[in] dev
1849  *   Pointer to the Ethernet device structure.
1850  * @param[in] action
1851  *   Pointer to the queue action.
1852  * @param[out] error
1853  *   Pointer to error structure.
1854  *
1855  * @return
1856  *   0 on success, a negative errno value otherwise and rte_errno is set.
1857  */
1858 int
1859 mlx5_validate_action_rss(struct rte_eth_dev *dev,
1860                          const struct rte_flow_action *action,
1861                          struct rte_flow_error *error)
1862 {
1863         struct mlx5_priv *priv = dev->data->dev_private;
1864         const struct rte_flow_action_rss *rss = action->conf;
1865         int ret;
1866         const char *message;
1867         uint32_t queue_idx;
1868
1869         if (rss->func != RTE_ETH_HASH_FUNCTION_DEFAULT &&
1870             rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ)
1871                 return rte_flow_error_set(error, ENOTSUP,
1872                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1873                                           &rss->func,
1874                                           "RSS hash function not supported");
1875 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
1876         if (rss->level > 2)
1877 #else
1878         if (rss->level > 1)
1879 #endif
1880                 return rte_flow_error_set(error, ENOTSUP,
1881                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1882                                           &rss->level,
1883                                           "tunnel RSS is not supported");
1884         /* allow RSS key_len 0 in case of NULL (default) RSS key. */
1885         if (rss->key_len == 0 && rss->key != NULL)
1886                 return rte_flow_error_set(error, ENOTSUP,
1887                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1888                                           &rss->key_len,
1889                                           "RSS hash key length 0");
1890         if (rss->key_len > 0 && rss->key_len < MLX5_RSS_HASH_KEY_LEN)
1891                 return rte_flow_error_set(error, ENOTSUP,
1892                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1893                                           &rss->key_len,
1894                                           "RSS hash key too small");
1895         if (rss->key_len > MLX5_RSS_HASH_KEY_LEN)
1896                 return rte_flow_error_set(error, ENOTSUP,
1897                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1898                                           &rss->key_len,
1899                                           "RSS hash key too large");
1900         if (rss->queue_num > priv->sh->dev_cap.ind_table_max_size)
1901                 return rte_flow_error_set(error, ENOTSUP,
1902                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1903                                           &rss->queue_num,
1904                                           "number of queues too large");
1905         if (rss->types & MLX5_RSS_HF_MASK)
1906                 return rte_flow_error_set(error, ENOTSUP,
1907                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1908                                           &rss->types,
1909                                           "some RSS protocols are not"
1910                                           " supported");
1911         if ((rss->types & (RTE_ETH_RSS_L3_SRC_ONLY | RTE_ETH_RSS_L3_DST_ONLY)) &&
1912             !(rss->types & RTE_ETH_RSS_IP))
1913                 return rte_flow_error_set(error, EINVAL,
1914                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
1915                                           "L3 partial RSS requested but L3 RSS"
1916                                           " type not specified");
1917         if ((rss->types & (RTE_ETH_RSS_L4_SRC_ONLY | RTE_ETH_RSS_L4_DST_ONLY)) &&
1918             !(rss->types & (RTE_ETH_RSS_UDP | RTE_ETH_RSS_TCP)))
1919                 return rte_flow_error_set(error, EINVAL,
1920                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
1921                                           "L4 partial RSS requested but L4 RSS"
1922                                           " type not specified");
1923         if (!priv->rxqs_n && priv->ext_rxqs == NULL)
1924                 return rte_flow_error_set(error, EINVAL,
1925                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1926                                           NULL, "No Rx queues configured");
1927         if (!rss->queue_num)
1928                 return rte_flow_error_set(error, EINVAL,
1929                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1930                                           NULL, "No queues configured");
1931         ret = mlx5_validate_rss_queues(dev, rss->queue, rss->queue_num,
1932                                        &message, &queue_idx);
1933         if (ret != 0) {
1934                 return rte_flow_error_set(error, -ret,
1935                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1936                                           &rss->queue[queue_idx], message);
1937         }
1938         return 0;
1939 }
1940
1941 /*
1942  * Validate the rss action.
1943  *
1944  * @param[in] action
1945  *   Pointer to the queue action.
1946  * @param[in] action_flags
1947  *   Bit-fields that holds the actions detected until now.
1948  * @param[in] dev
1949  *   Pointer to the Ethernet device structure.
1950  * @param[in] attr
1951  *   Attributes of flow that includes this action.
1952  * @param[in] item_flags
1953  *   Items that were detected.
1954  * @param[out] error
1955  *   Pointer to error structure.
1956  *
1957  * @return
1958  *   0 on success, a negative errno value otherwise and rte_errno is set.
1959  */
1960 int
1961 mlx5_flow_validate_action_rss(const struct rte_flow_action *action,
1962                               uint64_t action_flags,
1963                               struct rte_eth_dev *dev,
1964                               const struct rte_flow_attr *attr,
1965                               uint64_t item_flags,
1966                               struct rte_flow_error *error)
1967 {
1968         const struct rte_flow_action_rss *rss = action->conf;
1969         int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1970         int ret;
1971
1972         if (action_flags & MLX5_FLOW_FATE_ACTIONS)
1973                 return rte_flow_error_set(error, EINVAL,
1974                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1975                                           "can't have 2 fate actions"
1976                                           " in same flow");
1977         ret = mlx5_validate_action_rss(dev, action, error);
1978         if (ret)
1979                 return ret;
1980         if (attr->egress)
1981                 return rte_flow_error_set(error, ENOTSUP,
1982                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1983                                           "rss action not supported for "
1984                                           "egress");
1985         if (rss->level > 1 && !tunnel)
1986                 return rte_flow_error_set(error, EINVAL,
1987                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
1988                                           "inner RSS is not supported for "
1989                                           "non-tunnel flows");
1990         if ((item_flags & MLX5_FLOW_LAYER_ECPRI) &&
1991             !(item_flags & MLX5_FLOW_LAYER_INNER_L4_UDP)) {
1992                 return rte_flow_error_set(error, EINVAL,
1993                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
1994                                           "RSS on eCPRI is not supported now");
1995         }
1996         if ((item_flags & MLX5_FLOW_LAYER_MPLS) &&
1997             !(item_flags &
1998               (MLX5_FLOW_LAYER_INNER_L2 | MLX5_FLOW_LAYER_INNER_L3)) &&
1999             rss->level > 1)
2000                 return rte_flow_error_set(error, EINVAL,
2001                                           RTE_FLOW_ERROR_TYPE_ITEM, NULL,
2002                                           "MPLS inner RSS needs to specify inner L2/L3 items after MPLS in pattern");
2003         return 0;
2004 }
2005
2006 /*
2007  * Validate the default miss action.
2008  *
2009  * @param[in] action_flags
2010  *   Bit-fields that holds the actions detected until now.
2011  * @param[out] error
2012  *   Pointer to error structure.
2013  *
2014  * @return
2015  *   0 on success, a negative errno value otherwise and rte_errno is set.
2016  */
2017 int
2018 mlx5_flow_validate_action_default_miss(uint64_t action_flags,
2019                                 const struct rte_flow_attr *attr,
2020                                 struct rte_flow_error *error)
2021 {
2022         if (action_flags & MLX5_FLOW_FATE_ACTIONS)
2023                 return rte_flow_error_set(error, EINVAL,
2024                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
2025                                           "can't have 2 fate actions in"
2026                                           " same flow");
2027         if (attr->egress)
2028                 return rte_flow_error_set(error, ENOTSUP,
2029                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
2030                                           "default miss action not supported "
2031                                           "for egress");
2032         if (attr->group)
2033                 return rte_flow_error_set(error, ENOTSUP,
2034                                           RTE_FLOW_ERROR_TYPE_ATTR_GROUP, NULL,
2035                                           "only group 0 is supported");
2036         if (attr->transfer)
2037                 return rte_flow_error_set(error, ENOTSUP,
2038                                           RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
2039                                           NULL, "transfer is not supported");
2040         return 0;
2041 }
2042
2043 /*
2044  * Validate the count action.
2045  *
2046  * @param[in] dev
2047  *   Pointer to the Ethernet device structure.
2048  * @param[in] attr
2049  *   Attributes of flow that includes this action.
2050  * @param[out] error
2051  *   Pointer to error structure.
2052  *
2053  * @return
2054  *   0 on success, a negative errno value otherwise and rte_errno is set.
2055  */
2056 int
2057 mlx5_flow_validate_action_count(struct rte_eth_dev *dev __rte_unused,
2058                                 const struct rte_flow_attr *attr,
2059                                 struct rte_flow_error *error)
2060 {
2061         if (attr->egress)
2062                 return rte_flow_error_set(error, ENOTSUP,
2063                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
2064                                           "count action not supported for "
2065                                           "egress");
2066         return 0;
2067 }
2068
2069 /*
2070  * Validate the ASO CT action.
2071  *
2072  * @param[in] dev
2073  *   Pointer to the Ethernet device structure.
2074  * @param[in] conntrack
2075  *   Pointer to the CT action profile.
2076  * @param[out] error
2077  *   Pointer to error structure.
2078  *
2079  * @return
2080  *   0 on success, a negative errno value otherwise and rte_errno is set.
2081  */
2082 int
2083 mlx5_validate_action_ct(struct rte_eth_dev *dev,
2084                         const struct rte_flow_action_conntrack *conntrack,
2085                         struct rte_flow_error *error)
2086 {
2087         RTE_SET_USED(dev);
2088
2089         if (conntrack->state > RTE_FLOW_CONNTRACK_STATE_TIME_WAIT)
2090                 return rte_flow_error_set(error, EINVAL,
2091                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
2092                                           "Invalid CT state");
2093         if (conntrack->last_index > RTE_FLOW_CONNTRACK_FLAG_RST)
2094                 return rte_flow_error_set(error, EINVAL,
2095                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
2096                                           "Invalid last TCP packet flag");
2097         return 0;
2098 }
2099
2100 /**
2101  * Verify the @p attributes will be correctly understood by the NIC and store
2102  * them in the @p flow if everything is correct.
2103  *
2104  * @param[in] dev
2105  *   Pointer to the Ethernet device structure.
2106  * @param[in] attributes
2107  *   Pointer to flow attributes
2108  * @param[out] error
2109  *   Pointer to error structure.
2110  *
2111  * @return
2112  *   0 on success, a negative errno value otherwise and rte_errno is set.
2113  */
2114 int
2115 mlx5_flow_validate_attributes(struct rte_eth_dev *dev,
2116                               const struct rte_flow_attr *attributes,
2117                               struct rte_flow_error *error)
2118 {
2119         struct mlx5_priv *priv = dev->data->dev_private;
2120         uint32_t priority_max = priv->sh->flow_max_priority - 1;
2121
2122         if (attributes->group)
2123                 return rte_flow_error_set(error, ENOTSUP,
2124                                           RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
2125                                           NULL, "groups is not supported");
2126         if (attributes->priority != MLX5_FLOW_LOWEST_PRIO_INDICATOR &&
2127             attributes->priority >= priority_max)
2128                 return rte_flow_error_set(error, ENOTSUP,
2129                                           RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
2130                                           NULL, "priority out of range");
2131         if (attributes->egress)
2132                 return rte_flow_error_set(error, ENOTSUP,
2133                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
2134                                           "egress is not supported");
2135         if (attributes->transfer && !priv->sh->config.dv_esw_en)
2136                 return rte_flow_error_set(error, ENOTSUP,
2137                                           RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
2138                                           NULL, "transfer is not supported");
2139         if (!attributes->ingress)
2140                 return rte_flow_error_set(error, EINVAL,
2141                                           RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
2142                                           NULL,
2143                                           "ingress attribute is mandatory");
2144         return 0;
2145 }
2146
2147 /**
2148  * Validate ICMP6 item.
2149  *
2150  * @param[in] item
2151  *   Item specification.
2152  * @param[in] item_flags
2153  *   Bit-fields that holds the items detected until now.
2154  * @param[in] ext_vlan_sup
2155  *   Whether extended VLAN features are supported or not.
2156  * @param[out] error
2157  *   Pointer to error structure.
2158  *
2159  * @return
2160  *   0 on success, a negative errno value otherwise and rte_errno is set.
2161  */
2162 int
2163 mlx5_flow_validate_item_icmp6(const struct rte_flow_item *item,
2164                                uint64_t item_flags,
2165                                uint8_t target_protocol,
2166                                struct rte_flow_error *error)
2167 {
2168         const struct rte_flow_item_icmp6 *mask = item->mask;
2169         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2170         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
2171                                       MLX5_FLOW_LAYER_OUTER_L3_IPV6;
2172         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2173                                       MLX5_FLOW_LAYER_OUTER_L4;
2174         int ret;
2175
2176         if (target_protocol != 0xFF && target_protocol != IPPROTO_ICMPV6)
2177                 return rte_flow_error_set(error, EINVAL,
2178                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2179                                           "protocol filtering not compatible"
2180                                           " with ICMP6 layer");
2181         if (!(item_flags & l3m))
2182                 return rte_flow_error_set(error, EINVAL,
2183                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2184                                           "IPv6 is mandatory to filter on"
2185                                           " ICMP6");
2186         if (item_flags & l4m)
2187                 return rte_flow_error_set(error, EINVAL,
2188                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2189                                           "multiple L4 layers not supported");
2190         if (!mask)
2191                 mask = &rte_flow_item_icmp6_mask;
2192         ret = mlx5_flow_item_acceptable
2193                 (item, (const uint8_t *)mask,
2194                  (const uint8_t *)&rte_flow_item_icmp6_mask,
2195                  sizeof(struct rte_flow_item_icmp6),
2196                  MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2197         if (ret < 0)
2198                 return ret;
2199         return 0;
2200 }
2201
2202 /**
2203  * Validate ICMP item.
2204  *
2205  * @param[in] item
2206  *   Item specification.
2207  * @param[in] item_flags
2208  *   Bit-fields that holds the items detected until now.
2209  * @param[out] error
2210  *   Pointer to error structure.
2211  *
2212  * @return
2213  *   0 on success, a negative errno value otherwise and rte_errno is set.
2214  */
2215 int
2216 mlx5_flow_validate_item_icmp(const struct rte_flow_item *item,
2217                              uint64_t item_flags,
2218                              uint8_t target_protocol,
2219                              struct rte_flow_error *error)
2220 {
2221         const struct rte_flow_item_icmp *mask = item->mask;
2222         const struct rte_flow_item_icmp nic_mask = {
2223                 .hdr.icmp_type = 0xff,
2224                 .hdr.icmp_code = 0xff,
2225                 .hdr.icmp_ident = RTE_BE16(0xffff),
2226                 .hdr.icmp_seq_nb = RTE_BE16(0xffff),
2227         };
2228         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2229         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
2230                                       MLX5_FLOW_LAYER_OUTER_L3_IPV4;
2231         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2232                                       MLX5_FLOW_LAYER_OUTER_L4;
2233         int ret;
2234
2235         if (target_protocol != 0xFF && target_protocol != IPPROTO_ICMP)
2236                 return rte_flow_error_set(error, EINVAL,
2237                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2238                                           "protocol filtering not compatible"
2239                                           " with ICMP layer");
2240         if (!(item_flags & l3m))
2241                 return rte_flow_error_set(error, EINVAL,
2242                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2243                                           "IPv4 is mandatory to filter"
2244                                           " on ICMP");
2245         if (item_flags & l4m)
2246                 return rte_flow_error_set(error, EINVAL,
2247                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2248                                           "multiple L4 layers not supported");
2249         if (!mask)
2250                 mask = &nic_mask;
2251         ret = mlx5_flow_item_acceptable
2252                 (item, (const uint8_t *)mask,
2253                  (const uint8_t *)&nic_mask,
2254                  sizeof(struct rte_flow_item_icmp),
2255                  MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2256         if (ret < 0)
2257                 return ret;
2258         return 0;
2259 }
2260
2261 /**
2262  * Validate Ethernet item.
2263  *
2264  * @param[in] item
2265  *   Item specification.
2266  * @param[in] item_flags
2267  *   Bit-fields that holds the items detected until now.
2268  * @param[out] error
2269  *   Pointer to error structure.
2270  *
2271  * @return
2272  *   0 on success, a negative errno value otherwise and rte_errno is set.
2273  */
2274 int
2275 mlx5_flow_validate_item_eth(const struct rte_flow_item *item,
2276                             uint64_t item_flags, bool ext_vlan_sup,
2277                             struct rte_flow_error *error)
2278 {
2279         const struct rte_flow_item_eth *mask = item->mask;
2280         const struct rte_flow_item_eth nic_mask = {
2281                 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
2282                 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
2283                 .type = RTE_BE16(0xffff),
2284                 .has_vlan = ext_vlan_sup ? 1 : 0,
2285         };
2286         int ret;
2287         int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2288         const uint64_t ethm = tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
2289                                        MLX5_FLOW_LAYER_OUTER_L2;
2290
2291         if (item_flags & ethm)
2292                 return rte_flow_error_set(error, ENOTSUP,
2293                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2294                                           "multiple L2 layers not supported");
2295         if ((!tunnel && (item_flags & MLX5_FLOW_LAYER_OUTER_L3)) ||
2296             (tunnel && (item_flags & MLX5_FLOW_LAYER_INNER_L3)))
2297                 return rte_flow_error_set(error, EINVAL,
2298                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2299                                           "L2 layer should not follow "
2300                                           "L3 layers");
2301         if ((!tunnel && (item_flags & MLX5_FLOW_LAYER_OUTER_VLAN)) ||
2302             (tunnel && (item_flags & MLX5_FLOW_LAYER_INNER_VLAN)))
2303                 return rte_flow_error_set(error, EINVAL,
2304                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2305                                           "L2 layer should not follow VLAN");
2306         if (item_flags & MLX5_FLOW_LAYER_GTP)
2307                 return rte_flow_error_set(error, EINVAL,
2308                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2309                                           "L2 layer should not follow GTP");
2310         if (!mask)
2311                 mask = &rte_flow_item_eth_mask;
2312         ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
2313                                         (const uint8_t *)&nic_mask,
2314                                         sizeof(struct rte_flow_item_eth),
2315                                         MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2316         return ret;
2317 }
2318
2319 /**
2320  * Validate VLAN item.
2321  *
2322  * @param[in] item
2323  *   Item specification.
2324  * @param[in] item_flags
2325  *   Bit-fields that holds the items detected until now.
2326  * @param[in] dev
2327  *   Ethernet device flow is being created on.
2328  * @param[out] error
2329  *   Pointer to error structure.
2330  *
2331  * @return
2332  *   0 on success, a negative errno value otherwise and rte_errno is set.
2333  */
2334 int
2335 mlx5_flow_validate_item_vlan(const struct rte_flow_item *item,
2336                              uint64_t item_flags,
2337                              struct rte_eth_dev *dev,
2338                              struct rte_flow_error *error)
2339 {
2340         const struct rte_flow_item_vlan *spec = item->spec;
2341         const struct rte_flow_item_vlan *mask = item->mask;
2342         const struct rte_flow_item_vlan nic_mask = {
2343                 .tci = RTE_BE16(UINT16_MAX),
2344                 .inner_type = RTE_BE16(UINT16_MAX),
2345         };
2346         uint16_t vlan_tag = 0;
2347         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2348         int ret;
2349         const uint64_t l34m = tunnel ? (MLX5_FLOW_LAYER_INNER_L3 |
2350                                         MLX5_FLOW_LAYER_INNER_L4) :
2351                                        (MLX5_FLOW_LAYER_OUTER_L3 |
2352                                         MLX5_FLOW_LAYER_OUTER_L4);
2353         const uint64_t vlanm = tunnel ? MLX5_FLOW_LAYER_INNER_VLAN :
2354                                         MLX5_FLOW_LAYER_OUTER_VLAN;
2355
2356         if (item_flags & vlanm)
2357                 return rte_flow_error_set(error, EINVAL,
2358                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2359                                           "multiple VLAN layers not supported");
2360         else if ((item_flags & l34m) != 0)
2361                 return rte_flow_error_set(error, EINVAL,
2362                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2363                                           "VLAN cannot follow L3/L4 layer");
2364         if (!mask)
2365                 mask = &rte_flow_item_vlan_mask;
2366         ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
2367                                         (const uint8_t *)&nic_mask,
2368                                         sizeof(struct rte_flow_item_vlan),
2369                                         MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2370         if (ret)
2371                 return ret;
2372         if (!tunnel && mask->tci != RTE_BE16(0x0fff)) {
2373                 struct mlx5_priv *priv = dev->data->dev_private;
2374
2375                 if (priv->vmwa_context) {
2376                         /*
2377                          * Non-NULL context means we have a virtual machine
2378                          * and SR-IOV enabled, we have to create VLAN interface
2379                          * to make hypervisor to setup E-Switch vport
2380                          * context correctly. We avoid creating the multiple
2381                          * VLAN interfaces, so we cannot support VLAN tag mask.
2382                          */
2383                         return rte_flow_error_set(error, EINVAL,
2384                                                   RTE_FLOW_ERROR_TYPE_ITEM,
2385                                                   item,
2386                                                   "VLAN tag mask is not"
2387                                                   " supported in virtual"
2388                                                   " environment");
2389                 }
2390         }
2391         if (spec) {
2392                 vlan_tag = spec->tci;
2393                 vlan_tag &= mask->tci;
2394         }
2395         /*
2396          * From verbs perspective an empty VLAN is equivalent
2397          * to a packet without VLAN layer.
2398          */
2399         if (!vlan_tag)
2400                 return rte_flow_error_set(error, EINVAL,
2401                                           RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
2402                                           item->spec,
2403                                           "VLAN cannot be empty");
2404         return 0;
2405 }
2406
2407 /**
2408  * Validate IPV4 item.
2409  *
2410  * @param[in] item
2411  *   Item specification.
2412  * @param[in] item_flags
2413  *   Bit-fields that holds the items detected until now.
2414  * @param[in] last_item
2415  *   Previous validated item in the pattern items.
2416  * @param[in] ether_type
2417  *   Type in the ethernet layer header (including dot1q).
2418  * @param[in] acc_mask
2419  *   Acceptable mask, if NULL default internal default mask
2420  *   will be used to check whether item fields are supported.
2421  * @param[in] range_accepted
2422  *   True if range of values is accepted for specific fields, false otherwise.
2423  * @param[out] error
2424  *   Pointer to error structure.
2425  *
2426  * @return
2427  *   0 on success, a negative errno value otherwise and rte_errno is set.
2428  */
2429 int
2430 mlx5_flow_validate_item_ipv4(const struct rte_flow_item *item,
2431                              uint64_t item_flags,
2432                              uint64_t last_item,
2433                              uint16_t ether_type,
2434                              const struct rte_flow_item_ipv4 *acc_mask,
2435                              bool range_accepted,
2436                              struct rte_flow_error *error)
2437 {
2438         const struct rte_flow_item_ipv4 *mask = item->mask;
2439         const struct rte_flow_item_ipv4 *spec = item->spec;
2440         const struct rte_flow_item_ipv4 nic_mask = {
2441                 .hdr = {
2442                         .src_addr = RTE_BE32(0xffffffff),
2443                         .dst_addr = RTE_BE32(0xffffffff),
2444                         .type_of_service = 0xff,
2445                         .next_proto_id = 0xff,
2446                 },
2447         };
2448         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2449         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
2450                                       MLX5_FLOW_LAYER_OUTER_L3;
2451         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2452                                       MLX5_FLOW_LAYER_OUTER_L4;
2453         int ret;
2454         uint8_t next_proto = 0xFF;
2455         const uint64_t l2_vlan = (MLX5_FLOW_LAYER_L2 |
2456                                   MLX5_FLOW_LAYER_OUTER_VLAN |
2457                                   MLX5_FLOW_LAYER_INNER_VLAN);
2458
2459         if ((last_item & l2_vlan) && ether_type &&
2460             ether_type != RTE_ETHER_TYPE_IPV4)
2461                 return rte_flow_error_set(error, EINVAL,
2462                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2463                                           "IPv4 cannot follow L2/VLAN layer "
2464                                           "which ether type is not IPv4");
2465         if (item_flags & MLX5_FLOW_LAYER_TUNNEL) {
2466                 if (mask && spec)
2467                         next_proto = mask->hdr.next_proto_id &
2468                                      spec->hdr.next_proto_id;
2469                 if (next_proto == IPPROTO_IPIP || next_proto == IPPROTO_IPV6)
2470                         return rte_flow_error_set(error, EINVAL,
2471                                                   RTE_FLOW_ERROR_TYPE_ITEM,
2472                                                   item,
2473                                                   "multiple tunnel "
2474                                                   "not supported");
2475         }
2476         if (item_flags & MLX5_FLOW_LAYER_IPV6_ENCAP)
2477                 return rte_flow_error_set(error, EINVAL,
2478                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2479                                           "wrong tunnel type - IPv6 specified "
2480                                           "but IPv4 item provided");
2481         if (item_flags & l3m)
2482                 return rte_flow_error_set(error, ENOTSUP,
2483                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2484                                           "multiple L3 layers not supported");
2485         else if (item_flags & l4m)
2486                 return rte_flow_error_set(error, EINVAL,
2487                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2488                                           "L3 cannot follow an L4 layer.");
2489         else if ((item_flags & MLX5_FLOW_LAYER_NVGRE) &&
2490                   !(item_flags & MLX5_FLOW_LAYER_INNER_L2))
2491                 return rte_flow_error_set(error, EINVAL,
2492                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2493                                           "L3 cannot follow an NVGRE layer.");
2494         if (!mask)
2495                 mask = &rte_flow_item_ipv4_mask;
2496         else if (mask->hdr.next_proto_id != 0 &&
2497                  mask->hdr.next_proto_id != 0xff)
2498                 return rte_flow_error_set(error, EINVAL,
2499                                           RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
2500                                           "partial mask is not supported"
2501                                           " for protocol");
2502         ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
2503                                         acc_mask ? (const uint8_t *)acc_mask
2504                                                  : (const uint8_t *)&nic_mask,
2505                                         sizeof(struct rte_flow_item_ipv4),
2506                                         range_accepted, error);
2507         if (ret < 0)
2508                 return ret;
2509         return 0;
2510 }
2511
2512 /**
2513  * Validate IPV6 item.
2514  *
2515  * @param[in] item
2516  *   Item specification.
2517  * @param[in] item_flags
2518  *   Bit-fields that holds the items detected until now.
2519  * @param[in] last_item
2520  *   Previous validated item in the pattern items.
2521  * @param[in] ether_type
2522  *   Type in the ethernet layer header (including dot1q).
2523  * @param[in] acc_mask
2524  *   Acceptable mask, if NULL default internal default mask
2525  *   will be used to check whether item fields are supported.
2526  * @param[out] error
2527  *   Pointer to error structure.
2528  *
2529  * @return
2530  *   0 on success, a negative errno value otherwise and rte_errno is set.
2531  */
2532 int
2533 mlx5_flow_validate_item_ipv6(const struct rte_flow_item *item,
2534                              uint64_t item_flags,
2535                              uint64_t last_item,
2536                              uint16_t ether_type,
2537                              const struct rte_flow_item_ipv6 *acc_mask,
2538                              struct rte_flow_error *error)
2539 {
2540         const struct rte_flow_item_ipv6 *mask = item->mask;
2541         const struct rte_flow_item_ipv6 *spec = item->spec;
2542         const struct rte_flow_item_ipv6 nic_mask = {
2543                 .hdr = {
2544                         .src_addr =
2545                                 "\xff\xff\xff\xff\xff\xff\xff\xff"
2546                                 "\xff\xff\xff\xff\xff\xff\xff\xff",
2547                         .dst_addr =
2548                                 "\xff\xff\xff\xff\xff\xff\xff\xff"
2549                                 "\xff\xff\xff\xff\xff\xff\xff\xff",
2550                         .vtc_flow = RTE_BE32(0xffffffff),
2551                         .proto = 0xff,
2552                 },
2553         };
2554         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2555         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
2556                                       MLX5_FLOW_LAYER_OUTER_L3;
2557         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2558                                       MLX5_FLOW_LAYER_OUTER_L4;
2559         int ret;
2560         uint8_t next_proto = 0xFF;
2561         const uint64_t l2_vlan = (MLX5_FLOW_LAYER_L2 |
2562                                   MLX5_FLOW_LAYER_OUTER_VLAN |
2563                                   MLX5_FLOW_LAYER_INNER_VLAN);
2564
2565         if ((last_item & l2_vlan) && ether_type &&
2566             ether_type != RTE_ETHER_TYPE_IPV6)
2567                 return rte_flow_error_set(error, EINVAL,
2568                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2569                                           "IPv6 cannot follow L2/VLAN layer "
2570                                           "which ether type is not IPv6");
2571         if (mask && mask->hdr.proto == UINT8_MAX && spec)
2572                 next_proto = spec->hdr.proto;
2573         if (item_flags & MLX5_FLOW_LAYER_TUNNEL) {
2574                 if (next_proto == IPPROTO_IPIP || next_proto == IPPROTO_IPV6)
2575                         return rte_flow_error_set(error, EINVAL,
2576                                                   RTE_FLOW_ERROR_TYPE_ITEM,
2577                                                   item,
2578                                                   "multiple tunnel "
2579                                                   "not supported");
2580         }
2581         if (next_proto == IPPROTO_HOPOPTS  ||
2582             next_proto == IPPROTO_ROUTING  ||
2583             next_proto == IPPROTO_FRAGMENT ||
2584             next_proto == IPPROTO_ESP      ||
2585             next_proto == IPPROTO_AH       ||
2586             next_proto == IPPROTO_DSTOPTS)
2587                 return rte_flow_error_set(error, EINVAL,
2588                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2589                                           "IPv6 proto (next header) should "
2590                                           "not be set as extension header");
2591         if (item_flags & MLX5_FLOW_LAYER_IPIP)
2592                 return rte_flow_error_set(error, EINVAL,
2593                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2594                                           "wrong tunnel type - IPv4 specified "
2595                                           "but IPv6 item provided");
2596         if (item_flags & l3m)
2597                 return rte_flow_error_set(error, ENOTSUP,
2598                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2599                                           "multiple L3 layers not supported");
2600         else if (item_flags & l4m)
2601                 return rte_flow_error_set(error, EINVAL,
2602                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2603                                           "L3 cannot follow an L4 layer.");
2604         else if ((item_flags & MLX5_FLOW_LAYER_NVGRE) &&
2605                   !(item_flags & MLX5_FLOW_LAYER_INNER_L2))
2606                 return rte_flow_error_set(error, EINVAL,
2607                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2608                                           "L3 cannot follow an NVGRE layer.");
2609         if (!mask)
2610                 mask = &rte_flow_item_ipv6_mask;
2611         ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
2612                                         acc_mask ? (const uint8_t *)acc_mask
2613                                                  : (const uint8_t *)&nic_mask,
2614                                         sizeof(struct rte_flow_item_ipv6),
2615                                         MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2616         if (ret < 0)
2617                 return ret;
2618         return 0;
2619 }
2620
2621 /**
2622  * Validate UDP item.
2623  *
2624  * @param[in] item
2625  *   Item specification.
2626  * @param[in] item_flags
2627  *   Bit-fields that holds the items detected until now.
2628  * @param[in] target_protocol
2629  *   The next protocol in the previous item.
2630  * @param[in] flow_mask
2631  *   mlx5 flow-specific (DV, verbs, etc.) supported header fields mask.
2632  * @param[out] error
2633  *   Pointer to error structure.
2634  *
2635  * @return
2636  *   0 on success, a negative errno value otherwise and rte_errno is set.
2637  */
2638 int
2639 mlx5_flow_validate_item_udp(const struct rte_flow_item *item,
2640                             uint64_t item_flags,
2641                             uint8_t target_protocol,
2642                             struct rte_flow_error *error)
2643 {
2644         const struct rte_flow_item_udp *mask = item->mask;
2645         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2646         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
2647                                       MLX5_FLOW_LAYER_OUTER_L3;
2648         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2649                                       MLX5_FLOW_LAYER_OUTER_L4;
2650         int ret;
2651
2652         if (target_protocol != 0xff && target_protocol != IPPROTO_UDP)
2653                 return rte_flow_error_set(error, EINVAL,
2654                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2655                                           "protocol filtering not compatible"
2656                                           " with UDP layer");
2657         if (!(item_flags & l3m))
2658                 return rte_flow_error_set(error, EINVAL,
2659                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2660                                           "L3 is mandatory to filter on L4");
2661         if (item_flags & l4m)
2662                 return rte_flow_error_set(error, EINVAL,
2663                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2664                                           "multiple L4 layers not supported");
2665         if (!mask)
2666                 mask = &rte_flow_item_udp_mask;
2667         ret = mlx5_flow_item_acceptable
2668                 (item, (const uint8_t *)mask,
2669                  (const uint8_t *)&rte_flow_item_udp_mask,
2670                  sizeof(struct rte_flow_item_udp), MLX5_ITEM_RANGE_NOT_ACCEPTED,
2671                  error);
2672         if (ret < 0)
2673                 return ret;
2674         return 0;
2675 }
2676
2677 /**
2678  * Validate TCP item.
2679  *
2680  * @param[in] item
2681  *   Item specification.
2682  * @param[in] item_flags
2683  *   Bit-fields that holds the items detected until now.
2684  * @param[in] target_protocol
2685  *   The next protocol in the previous item.
2686  * @param[out] error
2687  *   Pointer to error structure.
2688  *
2689  * @return
2690  *   0 on success, a negative errno value otherwise and rte_errno is set.
2691  */
2692 int
2693 mlx5_flow_validate_item_tcp(const struct rte_flow_item *item,
2694                             uint64_t item_flags,
2695                             uint8_t target_protocol,
2696                             const struct rte_flow_item_tcp *flow_mask,
2697                             struct rte_flow_error *error)
2698 {
2699         const struct rte_flow_item_tcp *mask = item->mask;
2700         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2701         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
2702                                       MLX5_FLOW_LAYER_OUTER_L3;
2703         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2704                                       MLX5_FLOW_LAYER_OUTER_L4;
2705         int ret;
2706
2707         MLX5_ASSERT(flow_mask);
2708         if (target_protocol != 0xff && target_protocol != IPPROTO_TCP)
2709                 return rte_flow_error_set(error, EINVAL,
2710                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2711                                           "protocol filtering not compatible"
2712                                           " with TCP layer");
2713         if (!(item_flags & l3m))
2714                 return rte_flow_error_set(error, EINVAL,
2715                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2716                                           "L3 is mandatory to filter on L4");
2717         if (item_flags & l4m)
2718                 return rte_flow_error_set(error, EINVAL,
2719                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2720                                           "multiple L4 layers not supported");
2721         if (!mask)
2722                 mask = &rte_flow_item_tcp_mask;
2723         ret = mlx5_flow_item_acceptable
2724                 (item, (const uint8_t *)mask,
2725                  (const uint8_t *)flow_mask,
2726                  sizeof(struct rte_flow_item_tcp), MLX5_ITEM_RANGE_NOT_ACCEPTED,
2727                  error);
2728         if (ret < 0)
2729                 return ret;
2730         return 0;
2731 }
2732
2733 /**
2734  * Validate VXLAN item.
2735  *
2736  * @param[in] dev
2737  *   Pointer to the Ethernet device structure.
2738  * @param[in] udp_dport
2739  *   UDP destination port
2740  * @param[in] item
2741  *   Item specification.
2742  * @param[in] item_flags
2743  *   Bit-fields that holds the items detected until now.
2744  * @param[in] attr
2745  *   Flow rule attributes.
2746  * @param[out] error
2747  *   Pointer to error structure.
2748  *
2749  * @return
2750  *   0 on success, a negative errno value otherwise and rte_errno is set.
2751  */
2752 int
2753 mlx5_flow_validate_item_vxlan(struct rte_eth_dev *dev,
2754                               uint16_t udp_dport,
2755                               const struct rte_flow_item *item,
2756                               uint64_t item_flags,
2757                               const struct rte_flow_attr *attr,
2758                               struct rte_flow_error *error)
2759 {
2760         const struct rte_flow_item_vxlan *spec = item->spec;
2761         const struct rte_flow_item_vxlan *mask = item->mask;
2762         int ret;
2763         struct mlx5_priv *priv = dev->data->dev_private;
2764         union vni {
2765                 uint32_t vlan_id;
2766                 uint8_t vni[4];
2767         } id = { .vlan_id = 0, };
2768         const struct rte_flow_item_vxlan nic_mask = {
2769                 .vni = "\xff\xff\xff",
2770                 .rsvd1 = 0xff,
2771         };
2772         const struct rte_flow_item_vxlan *valid_mask;
2773
2774         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2775                 return rte_flow_error_set(error, ENOTSUP,
2776                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2777                                           "multiple tunnel layers not"
2778                                           " supported");
2779         valid_mask = &rte_flow_item_vxlan_mask;
2780         /*
2781          * Verify only UDPv4 is present as defined in
2782          * https://tools.ietf.org/html/rfc7348
2783          */
2784         if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
2785                 return rte_flow_error_set(error, EINVAL,
2786                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2787                                           "no outer UDP layer found");
2788         if (!mask)
2789                 mask = &rte_flow_item_vxlan_mask;
2790
2791         if (priv->sh->steering_format_version !=
2792             MLX5_STEERING_LOGIC_FORMAT_CONNECTX_5 ||
2793             !udp_dport || udp_dport == MLX5_UDP_PORT_VXLAN) {
2794                 /* FDB domain & NIC domain non-zero group */
2795                 if ((attr->transfer || attr->group) && priv->sh->misc5_cap)
2796                         valid_mask = &nic_mask;
2797                 /* Group zero in NIC domain */
2798                 if (!attr->group && !attr->transfer &&
2799                     priv->sh->tunnel_header_0_1)
2800                         valid_mask = &nic_mask;
2801         }
2802         ret = mlx5_flow_item_acceptable
2803                 (item, (const uint8_t *)mask,
2804                  (const uint8_t *)valid_mask,
2805                  sizeof(struct rte_flow_item_vxlan),
2806                  MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2807         if (ret < 0)
2808                 return ret;
2809         if (spec) {
2810                 memcpy(&id.vni[1], spec->vni, 3);
2811                 memcpy(&id.vni[1], mask->vni, 3);
2812         }
2813         if (!(item_flags & MLX5_FLOW_LAYER_OUTER))
2814                 return rte_flow_error_set(error, ENOTSUP,
2815                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2816                                           "VXLAN tunnel must be fully defined");
2817         return 0;
2818 }
2819
2820 /**
2821  * Validate VXLAN_GPE item.
2822  *
2823  * @param[in] item
2824  *   Item specification.
2825  * @param[in] item_flags
2826  *   Bit-fields that holds the items detected until now.
2827  * @param[in] priv
2828  *   Pointer to the private data structure.
2829  * @param[in] target_protocol
2830  *   The next protocol in the previous item.
2831  * @param[out] error
2832  *   Pointer to error structure.
2833  *
2834  * @return
2835  *   0 on success, a negative errno value otherwise and rte_errno is set.
2836  */
2837 int
2838 mlx5_flow_validate_item_vxlan_gpe(const struct rte_flow_item *item,
2839                                   uint64_t item_flags,
2840                                   struct rte_eth_dev *dev,
2841                                   struct rte_flow_error *error)
2842 {
2843         struct mlx5_priv *priv = dev->data->dev_private;
2844         const struct rte_flow_item_vxlan_gpe *spec = item->spec;
2845         const struct rte_flow_item_vxlan_gpe *mask = item->mask;
2846         int ret;
2847         union vni {
2848                 uint32_t vlan_id;
2849                 uint8_t vni[4];
2850         } id = { .vlan_id = 0, };
2851
2852         if (!priv->sh->config.l3_vxlan_en)
2853                 return rte_flow_error_set(error, ENOTSUP,
2854                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2855                                           "L3 VXLAN is not enabled by device"
2856                                           " parameter and/or not configured in"
2857                                           " firmware");
2858         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2859                 return rte_flow_error_set(error, ENOTSUP,
2860                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2861                                           "multiple tunnel layers not"
2862                                           " supported");
2863         /*
2864          * Verify only UDPv4 is present as defined in
2865          * https://tools.ietf.org/html/rfc7348
2866          */
2867         if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
2868                 return rte_flow_error_set(error, EINVAL,
2869                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2870                                           "no outer UDP layer found");
2871         if (!mask)
2872                 mask = &rte_flow_item_vxlan_gpe_mask;
2873         ret = mlx5_flow_item_acceptable
2874                 (item, (const uint8_t *)mask,
2875                  (const uint8_t *)&rte_flow_item_vxlan_gpe_mask,
2876                  sizeof(struct rte_flow_item_vxlan_gpe),
2877                  MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2878         if (ret < 0)
2879                 return ret;
2880         if (spec) {
2881                 if (spec->protocol)
2882                         return rte_flow_error_set(error, ENOTSUP,
2883                                                   RTE_FLOW_ERROR_TYPE_ITEM,
2884                                                   item,
2885                                                   "VxLAN-GPE protocol"
2886                                                   " not supported");
2887                 memcpy(&id.vni[1], spec->vni, 3);
2888                 memcpy(&id.vni[1], mask->vni, 3);
2889         }
2890         if (!(item_flags & MLX5_FLOW_LAYER_OUTER))
2891                 return rte_flow_error_set(error, ENOTSUP,
2892                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2893                                           "VXLAN-GPE tunnel must be fully"
2894                                           " defined");
2895         return 0;
2896 }
2897 /**
2898  * Validate GRE Key item.
2899  *
2900  * @param[in] item
2901  *   Item specification.
2902  * @param[in] item_flags
2903  *   Bit flags to mark detected items.
2904  * @param[in] gre_item
2905  *   Pointer to gre_item
2906  * @param[out] error
2907  *   Pointer to error structure.
2908  *
2909  * @return
2910  *   0 on success, a negative errno value otherwise and rte_errno is set.
2911  */
2912 int
2913 mlx5_flow_validate_item_gre_key(const struct rte_flow_item *item,
2914                                 uint64_t item_flags,
2915                                 const struct rte_flow_item *gre_item,
2916                                 struct rte_flow_error *error)
2917 {
2918         const rte_be32_t *mask = item->mask;
2919         int ret = 0;
2920         rte_be32_t gre_key_default_mask = RTE_BE32(UINT32_MAX);
2921         const struct rte_flow_item_gre *gre_spec;
2922         const struct rte_flow_item_gre *gre_mask;
2923
2924         if (item_flags & MLX5_FLOW_LAYER_GRE_KEY)
2925                 return rte_flow_error_set(error, ENOTSUP,
2926                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2927                                           "Multiple GRE key not support");
2928         if (!(item_flags & MLX5_FLOW_LAYER_GRE))
2929                 return rte_flow_error_set(error, ENOTSUP,
2930                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2931                                           "No preceding GRE header");
2932         if (item_flags & MLX5_FLOW_LAYER_INNER)
2933                 return rte_flow_error_set(error, ENOTSUP,
2934                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2935                                           "GRE key following a wrong item");
2936         gre_mask = gre_item->mask;
2937         if (!gre_mask)
2938                 gre_mask = &rte_flow_item_gre_mask;
2939         gre_spec = gre_item->spec;
2940         if (gre_spec && (gre_mask->c_rsvd0_ver & RTE_BE16(0x2000)) &&
2941                          !(gre_spec->c_rsvd0_ver & RTE_BE16(0x2000)))
2942                 return rte_flow_error_set(error, EINVAL,
2943                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2944                                           "Key bit must be on");
2945
2946         if (!mask)
2947                 mask = &gre_key_default_mask;
2948         ret = mlx5_flow_item_acceptable
2949                 (item, (const uint8_t *)mask,
2950                  (const uint8_t *)&gre_key_default_mask,
2951                  sizeof(rte_be32_t), MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2952         return ret;
2953 }
2954
2955 /**
2956  * Validate GRE optional item.
2957  *
2958  * @param[in] dev
2959  *   Pointer to the Ethernet device structure.
2960  * @param[in] item
2961  *   Item specification.
2962  * @param[in] item_flags
2963  *   Bit flags to mark detected items.
2964  * @param[in] attr
2965  *   Flow rule attributes.
2966  * @param[in] gre_item
2967  *   Pointer to gre_item
2968  * @param[out] error
2969  *   Pointer to error structure.
2970  *
2971  * @return
2972  *   0 on success, a negative errno value otherwise and rte_errno is set.
2973  */
2974 int
2975 mlx5_flow_validate_item_gre_option(struct rte_eth_dev *dev,
2976                                    const struct rte_flow_item *item,
2977                                    uint64_t item_flags,
2978                                    const struct rte_flow_attr *attr,
2979                                    const struct rte_flow_item *gre_item,
2980                                    struct rte_flow_error *error)
2981 {
2982         const struct rte_flow_item_gre *gre_spec = gre_item->spec;
2983         const struct rte_flow_item_gre *gre_mask = gre_item->mask;
2984         const struct rte_flow_item_gre_opt *spec = item->spec;
2985         const struct rte_flow_item_gre_opt *mask = item->mask;
2986         struct mlx5_priv *priv = dev->data->dev_private;
2987         int ret = 0;
2988         struct rte_flow_item_gre_opt nic_mask = {
2989                 .checksum_rsvd = {
2990                         .checksum = RTE_BE16(UINT16_MAX),
2991                         .reserved1 = 0x0,
2992                 },
2993                 .key = {
2994                         .key = RTE_BE32(UINT32_MAX),
2995                 },
2996                 .sequence = {
2997                         .sequence = RTE_BE32(UINT32_MAX),
2998                 },
2999         };
3000
3001         if (!(item_flags & MLX5_FLOW_LAYER_GRE))
3002                 return rte_flow_error_set(error, ENOTSUP,
3003                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3004                                           "No preceding GRE header");
3005         if (item_flags & MLX5_FLOW_LAYER_INNER)
3006                 return rte_flow_error_set(error, ENOTSUP,
3007                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3008                                           "GRE option following a wrong item");
3009         if (!spec || !mask)
3010                 return rte_flow_error_set(error, EINVAL,
3011                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3012                                           "At least one field gre_option(checksum/key/sequence) must be specified");
3013         if (!gre_mask)
3014                 gre_mask = &rte_flow_item_gre_mask;
3015         if (mask->checksum_rsvd.checksum)
3016                 if (gre_spec && (gre_mask->c_rsvd0_ver & RTE_BE16(0x8000)) &&
3017                                  !(gre_spec->c_rsvd0_ver & RTE_BE16(0x8000)))
3018                         return rte_flow_error_set(error, EINVAL,
3019                                                   RTE_FLOW_ERROR_TYPE_ITEM,
3020                                                   item,
3021                                                   "Checksum bit must be on");
3022         if (mask->key.key)
3023                 if (gre_spec && (gre_mask->c_rsvd0_ver & RTE_BE16(0x2000)) &&
3024                                  !(gre_spec->c_rsvd0_ver & RTE_BE16(0x2000)))
3025                         return rte_flow_error_set(error, EINVAL,
3026                                                   RTE_FLOW_ERROR_TYPE_ITEM,
3027                                                   item, "Key bit must be on");
3028         if (mask->sequence.sequence)
3029                 if (gre_spec && (gre_mask->c_rsvd0_ver & RTE_BE16(0x1000)) &&
3030                                  !(gre_spec->c_rsvd0_ver & RTE_BE16(0x1000)))
3031                         return rte_flow_error_set(error, EINVAL,
3032                                                   RTE_FLOW_ERROR_TYPE_ITEM,
3033                                                   item,
3034                                                   "Sequence bit must be on");
3035         if (mask->checksum_rsvd.checksum || mask->sequence.sequence) {
3036                 if (priv->sh->steering_format_version ==
3037                     MLX5_STEERING_LOGIC_FORMAT_CONNECTX_5 ||
3038                     ((attr->group || attr->transfer) &&
3039                      !priv->sh->misc5_cap) ||
3040                     (!(priv->sh->tunnel_header_0_1 &&
3041                        priv->sh->tunnel_header_2_3) &&
3042                     !attr->group && !attr->transfer))
3043                         return rte_flow_error_set(error, EINVAL,
3044                                                   RTE_FLOW_ERROR_TYPE_ITEM,
3045                                                   item,
3046                                                   "Checksum/Sequence not supported");
3047         }
3048         ret = mlx5_flow_item_acceptable
3049                 (item, (const uint8_t *)mask,
3050                  (const uint8_t *)&nic_mask,
3051                  sizeof(struct rte_flow_item_gre_opt),
3052                  MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
3053         return ret;
3054 }
3055
3056 /**
3057  * Validate GRE item.
3058  *
3059  * @param[in] item
3060  *   Item specification.
3061  * @param[in] item_flags
3062  *   Bit flags to mark detected items.
3063  * @param[in] target_protocol
3064  *   The next protocol in the previous item.
3065  * @param[out] error
3066  *   Pointer to error structure.
3067  *
3068  * @return
3069  *   0 on success, a negative errno value otherwise and rte_errno is set.
3070  */
3071 int
3072 mlx5_flow_validate_item_gre(const struct rte_flow_item *item,
3073                             uint64_t item_flags,
3074                             uint8_t target_protocol,
3075                             struct rte_flow_error *error)
3076 {
3077         const struct rte_flow_item_gre *spec __rte_unused = item->spec;
3078         const struct rte_flow_item_gre *mask = item->mask;
3079         int ret;
3080         const struct rte_flow_item_gre nic_mask = {
3081                 .c_rsvd0_ver = RTE_BE16(0xB000),
3082                 .protocol = RTE_BE16(UINT16_MAX),
3083         };
3084
3085         if (target_protocol != 0xff && target_protocol != IPPROTO_GRE)
3086                 return rte_flow_error_set(error, EINVAL,
3087                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3088                                           "protocol filtering not compatible"
3089                                           " with this GRE layer");
3090         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
3091                 return rte_flow_error_set(error, ENOTSUP,
3092                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3093                                           "multiple tunnel layers not"
3094                                           " supported");
3095         if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3))
3096                 return rte_flow_error_set(error, ENOTSUP,
3097                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3098                                           "L3 Layer is missing");
3099         if (!mask)
3100                 mask = &rte_flow_item_gre_mask;
3101         ret = mlx5_flow_item_acceptable
3102                 (item, (const uint8_t *)mask,
3103                  (const uint8_t *)&nic_mask,
3104                  sizeof(struct rte_flow_item_gre), MLX5_ITEM_RANGE_NOT_ACCEPTED,
3105                  error);
3106         if (ret < 0)
3107                 return ret;
3108 #ifndef HAVE_MLX5DV_DR
3109 #ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT
3110         if (spec && (spec->protocol & mask->protocol))
3111                 return rte_flow_error_set(error, ENOTSUP,
3112                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3113                                           "without MPLS support the"
3114                                           " specification cannot be used for"
3115                                           " filtering");
3116 #endif
3117 #endif
3118         return 0;
3119 }
3120
3121 /**
3122  * Validate Geneve item.
3123  *
3124  * @param[in] item
3125  *   Item specification.
3126  * @param[in] itemFlags
3127  *   Bit-fields that holds the items detected until now.
3128  * @param[in] enPriv
3129  *   Pointer to the private data structure.
3130  * @param[out] error
3131  *   Pointer to error structure.
3132  *
3133  * @return
3134  *   0 on success, a negative errno value otherwise and rte_errno is set.
3135  */
3136
3137 int
3138 mlx5_flow_validate_item_geneve(const struct rte_flow_item *item,
3139                                uint64_t item_flags,
3140                                struct rte_eth_dev *dev,
3141                                struct rte_flow_error *error)
3142 {
3143         struct mlx5_priv *priv = dev->data->dev_private;
3144         const struct rte_flow_item_geneve *spec = item->spec;
3145         const struct rte_flow_item_geneve *mask = item->mask;
3146         int ret;
3147         uint16_t gbhdr;
3148         uint8_t opt_len = priv->sh->cdev->config.hca_attr.geneve_max_opt_len ?
3149                           MLX5_GENEVE_OPT_LEN_1 : MLX5_GENEVE_OPT_LEN_0;
3150         const struct rte_flow_item_geneve nic_mask = {
3151                 .ver_opt_len_o_c_rsvd0 = RTE_BE16(0x3f80),
3152                 .vni = "\xff\xff\xff",
3153                 .protocol = RTE_BE16(UINT16_MAX),
3154         };
3155
3156         if (!priv->sh->cdev->config.hca_attr.tunnel_stateless_geneve_rx)
3157                 return rte_flow_error_set(error, ENOTSUP,
3158                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3159                                           "L3 Geneve is not enabled by device"
3160                                           " parameter and/or not configured in"
3161                                           " firmware");
3162         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
3163                 return rte_flow_error_set(error, ENOTSUP,
3164                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3165                                           "multiple tunnel layers not"
3166                                           " supported");
3167         /*
3168          * Verify only UDPv4 is present as defined in
3169          * https://tools.ietf.org/html/rfc7348
3170          */
3171         if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
3172                 return rte_flow_error_set(error, EINVAL,
3173                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3174                                           "no outer UDP layer found");
3175         if (!mask)
3176                 mask = &rte_flow_item_geneve_mask;
3177         ret = mlx5_flow_item_acceptable
3178                                   (item, (const uint8_t *)mask,
3179                                    (const uint8_t *)&nic_mask,
3180                                    sizeof(struct rte_flow_item_geneve),
3181                                    MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
3182         if (ret)
3183                 return ret;
3184         if (spec) {
3185                 gbhdr = rte_be_to_cpu_16(spec->ver_opt_len_o_c_rsvd0);
3186                 if (MLX5_GENEVE_VER_VAL(gbhdr) ||
3187                      MLX5_GENEVE_CRITO_VAL(gbhdr) ||
3188                      MLX5_GENEVE_RSVD_VAL(gbhdr) || spec->rsvd1)
3189                         return rte_flow_error_set(error, ENOTSUP,
3190                                                   RTE_FLOW_ERROR_TYPE_ITEM,
3191                                                   item,
3192                                                   "Geneve protocol unsupported"
3193                                                   " fields are being used");
3194                 if (MLX5_GENEVE_OPTLEN_VAL(gbhdr) > opt_len)
3195                         return rte_flow_error_set
3196                                         (error, ENOTSUP,
3197                                          RTE_FLOW_ERROR_TYPE_ITEM,
3198                                          item,
3199                                          "Unsupported Geneve options length");
3200         }
3201         if (!(item_flags & MLX5_FLOW_LAYER_OUTER))
3202                 return rte_flow_error_set
3203                                     (error, ENOTSUP,
3204                                      RTE_FLOW_ERROR_TYPE_ITEM, item,
3205                                      "Geneve tunnel must be fully defined");
3206         return 0;
3207 }
3208
3209 /**
3210  * Validate Geneve TLV option item.
3211  *
3212  * @param[in] item
3213  *   Item specification.
3214  * @param[in] last_item
3215  *   Previous validated item in the pattern items.
3216  * @param[in] geneve_item
3217  *   Previous GENEVE item specification.
3218  * @param[in] dev
3219  *   Pointer to the rte_eth_dev structure.
3220  * @param[out] error
3221  *   Pointer to error structure.
3222  *
3223  * @return
3224  *   0 on success, a negative errno value otherwise and rte_errno is set.
3225  */
3226 int
3227 mlx5_flow_validate_item_geneve_opt(const struct rte_flow_item *item,
3228                                    uint64_t last_item,
3229                                    const struct rte_flow_item *geneve_item,
3230                                    struct rte_eth_dev *dev,
3231                                    struct rte_flow_error *error)
3232 {
3233         struct mlx5_priv *priv = dev->data->dev_private;
3234         struct mlx5_dev_ctx_shared *sh = priv->sh;
3235         struct mlx5_geneve_tlv_option_resource *geneve_opt_resource;
3236         struct mlx5_hca_attr *hca_attr = &sh->cdev->config.hca_attr;
3237         uint8_t data_max_supported =
3238                         hca_attr->max_geneve_tlv_option_data_len * 4;
3239         const struct rte_flow_item_geneve *geneve_spec;
3240         const struct rte_flow_item_geneve *geneve_mask;
3241         const struct rte_flow_item_geneve_opt *spec = item->spec;
3242         const struct rte_flow_item_geneve_opt *mask = item->mask;
3243         unsigned int i;
3244         unsigned int data_len;
3245         uint8_t tlv_option_len;
3246         uint16_t optlen_m, optlen_v;
3247         const struct rte_flow_item_geneve_opt full_mask = {
3248                 .option_class = RTE_BE16(0xffff),
3249                 .option_type = 0xff,
3250                 .option_len = 0x1f,
3251         };
3252
3253         if (!mask)
3254                 mask = &rte_flow_item_geneve_opt_mask;
3255         if (!spec)
3256                 return rte_flow_error_set
3257                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
3258                         "Geneve TLV opt class/type/length must be specified");
3259         if ((uint32_t)spec->option_len > MLX5_GENEVE_OPTLEN_MASK)
3260                 return rte_flow_error_set
3261                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
3262                         "Geneve TLV opt length exceeds the limit (31)");
3263         /* Check if class type and length masks are full. */
3264         if (full_mask.option_class != mask->option_class ||
3265             full_mask.option_type != mask->option_type ||
3266             full_mask.option_len != (mask->option_len & full_mask.option_len))
3267                 return rte_flow_error_set
3268                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
3269                         "Geneve TLV opt class/type/length masks must be full");
3270         /* Check if length is supported */
3271         if ((uint32_t)spec->option_len >
3272                         hca_attr->max_geneve_tlv_option_data_len)
3273                 return rte_flow_error_set
3274                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
3275                         "Geneve TLV opt length not supported");
3276         if (hca_attr->max_geneve_tlv_options > 1)
3277                 DRV_LOG(DEBUG,
3278                         "max_geneve_tlv_options supports more than 1 option");
3279         /* Check GENEVE item preceding. */
3280         if (!geneve_item || !(last_item & MLX5_FLOW_LAYER_GENEVE))
3281                 return rte_flow_error_set
3282                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
3283                         "Geneve opt item must be preceded with Geneve item");
3284         geneve_spec = geneve_item->spec;
3285         geneve_mask = geneve_item->mask ? geneve_item->mask :
3286                                           &rte_flow_item_geneve_mask;
3287         /* Check if GENEVE TLV option size doesn't exceed option length */
3288         if (geneve_spec && (geneve_mask->ver_opt_len_o_c_rsvd0 ||
3289                             geneve_spec->ver_opt_len_o_c_rsvd0)) {
3290                 tlv_option_len = spec->option_len & mask->option_len;
3291                 optlen_v = rte_be_to_cpu_16(geneve_spec->ver_opt_len_o_c_rsvd0);
3292                 optlen_v = MLX5_GENEVE_OPTLEN_VAL(optlen_v);
3293                 optlen_m = rte_be_to_cpu_16(geneve_mask->ver_opt_len_o_c_rsvd0);
3294                 optlen_m = MLX5_GENEVE_OPTLEN_VAL(optlen_m);
3295                 if ((optlen_v & optlen_m) <= tlv_option_len)
3296                         return rte_flow_error_set
3297                                 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
3298                                  "GENEVE TLV option length exceeds optlen");
3299         }
3300         /* Check if length is 0 or data is 0. */
3301         if (spec->data == NULL || spec->option_len == 0)
3302                 return rte_flow_error_set
3303                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
3304                         "Geneve TLV opt with zero data/length not supported");
3305         /* Check not all data & mask are 0. */
3306         data_len = spec->option_len * 4;
3307         if (mask->data == NULL) {
3308                 for (i = 0; i < data_len; i++)
3309                         if (spec->data[i])
3310                                 break;
3311                 if (i == data_len)
3312                         return rte_flow_error_set(error, ENOTSUP,
3313                                 RTE_FLOW_ERROR_TYPE_ITEM, item,
3314                                 "Can't match on Geneve option data 0");
3315         } else {
3316                 for (i = 0; i < data_len; i++)
3317                         if (spec->data[i] & mask->data[i])
3318                                 break;
3319                 if (i == data_len)
3320                         return rte_flow_error_set(error, ENOTSUP,
3321                                 RTE_FLOW_ERROR_TYPE_ITEM, item,
3322                                 "Can't match on Geneve option data and mask 0");
3323                 /* Check data mask supported. */
3324                 for (i = data_max_supported; i < data_len ; i++)
3325                         if (mask->data[i])
3326                                 return rte_flow_error_set(error, ENOTSUP,
3327                                         RTE_FLOW_ERROR_TYPE_ITEM, item,
3328                                         "Data mask is of unsupported size");
3329         }
3330         /* Check GENEVE option is supported in NIC. */
3331         if (!hca_attr->geneve_tlv_opt)
3332                 return rte_flow_error_set
3333                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
3334                         "Geneve TLV opt not supported");
3335         /* Check if we already have geneve option with different type/class. */
3336         rte_spinlock_lock(&sh->geneve_tlv_opt_sl);
3337         geneve_opt_resource = sh->geneve_tlv_option_resource;
3338         if (geneve_opt_resource != NULL)
3339                 if (geneve_opt_resource->option_class != spec->option_class ||
3340                     geneve_opt_resource->option_type != spec->option_type ||
3341                     geneve_opt_resource->length != spec->option_len) {
3342                         rte_spinlock_unlock(&sh->geneve_tlv_opt_sl);
3343                         return rte_flow_error_set(error, ENOTSUP,
3344                                 RTE_FLOW_ERROR_TYPE_ITEM, item,
3345                                 "Only one Geneve TLV option supported");
3346                 }
3347         rte_spinlock_unlock(&sh->geneve_tlv_opt_sl);
3348         return 0;
3349 }
3350
3351 /**
3352  * Validate MPLS item.
3353  *
3354  * @param[in] dev
3355  *   Pointer to the rte_eth_dev structure.
3356  * @param[in] item
3357  *   Item specification.
3358  * @param[in] item_flags
3359  *   Bit-fields that holds the items detected until now.
3360  * @param[in] prev_layer
3361  *   The protocol layer indicated in previous item.
3362  * @param[out] error
3363  *   Pointer to error structure.
3364  *
3365  * @return
3366  *   0 on success, a negative errno value otherwise and rte_errno is set.
3367  */
3368 int
3369 mlx5_flow_validate_item_mpls(struct rte_eth_dev *dev __rte_unused,
3370                              const struct rte_flow_item *item __rte_unused,
3371                              uint64_t item_flags __rte_unused,
3372                              uint64_t prev_layer __rte_unused,
3373                              struct rte_flow_error *error)
3374 {
3375 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
3376         const struct rte_flow_item_mpls *mask = item->mask;
3377         struct mlx5_priv *priv = dev->data->dev_private;
3378         int ret;
3379
3380         if (!priv->sh->dev_cap.mpls_en)
3381                 return rte_flow_error_set(error, ENOTSUP,
3382                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3383                                           "MPLS not supported or"
3384                                           " disabled in firmware"
3385                                           " configuration.");
3386         /* MPLS over UDP, GRE is allowed */
3387         if (!(prev_layer & (MLX5_FLOW_LAYER_OUTER_L4_UDP |
3388                             MLX5_FLOW_LAYER_GRE |
3389                             MLX5_FLOW_LAYER_GRE_KEY)))
3390                 return rte_flow_error_set(error, EINVAL,
3391                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3392                                           "protocol filtering not compatible"
3393                                           " with MPLS layer");
3394         /* Multi-tunnel isn't allowed but MPLS over GRE is an exception. */
3395         if ((item_flags & MLX5_FLOW_LAYER_TUNNEL) &&
3396             !(item_flags & MLX5_FLOW_LAYER_GRE))
3397                 return rte_flow_error_set(error, ENOTSUP,
3398                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3399                                           "multiple tunnel layers not"
3400                                           " supported");
3401         if (!mask)
3402                 mask = &rte_flow_item_mpls_mask;
3403         ret = mlx5_flow_item_acceptable
3404                 (item, (const uint8_t *)mask,
3405                  (const uint8_t *)&rte_flow_item_mpls_mask,
3406                  sizeof(struct rte_flow_item_mpls),
3407                  MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
3408         if (ret < 0)
3409                 return ret;
3410         return 0;
3411 #else
3412         return rte_flow_error_set(error, ENOTSUP,
3413                                   RTE_FLOW_ERROR_TYPE_ITEM, item,
3414                                   "MPLS is not supported by Verbs, please"
3415                                   " update.");
3416 #endif
3417 }
3418
3419 /**
3420  * Validate NVGRE item.
3421  *
3422  * @param[in] item
3423  *   Item specification.
3424  * @param[in] item_flags
3425  *   Bit flags to mark detected items.
3426  * @param[in] target_protocol
3427  *   The next protocol in the previous item.
3428  * @param[out] error
3429  *   Pointer to error structure.
3430  *
3431  * @return
3432  *   0 on success, a negative errno value otherwise and rte_errno is set.
3433  */
3434 int
3435 mlx5_flow_validate_item_nvgre(const struct rte_flow_item *item,
3436                               uint64_t item_flags,
3437                               uint8_t target_protocol,
3438                               struct rte_flow_error *error)
3439 {
3440         const struct rte_flow_item_nvgre *mask = item->mask;
3441         int ret;
3442
3443         if (target_protocol != 0xff && target_protocol != IPPROTO_GRE)
3444                 return rte_flow_error_set(error, EINVAL,
3445                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3446                                           "protocol filtering not compatible"
3447                                           " with this GRE layer");
3448         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
3449                 return rte_flow_error_set(error, ENOTSUP,
3450                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3451                                           "multiple tunnel layers not"
3452                                           " supported");
3453         if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3))
3454                 return rte_flow_error_set(error, ENOTSUP,
3455                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3456                                           "L3 Layer is missing");
3457         if (!mask)
3458                 mask = &rte_flow_item_nvgre_mask;
3459         ret = mlx5_flow_item_acceptable
3460                 (item, (const uint8_t *)mask,
3461                  (const uint8_t *)&rte_flow_item_nvgre_mask,
3462                  sizeof(struct rte_flow_item_nvgre),
3463                  MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
3464         if (ret < 0)
3465                 return ret;
3466         return 0;
3467 }
3468
3469 /**
3470  * Validate eCPRI item.
3471  *
3472  * @param[in] item
3473  *   Item specification.
3474  * @param[in] item_flags
3475  *   Bit-fields that holds the items detected until now.
3476  * @param[in] last_item
3477  *   Previous validated item in the pattern items.
3478  * @param[in] ether_type
3479  *   Type in the ethernet layer header (including dot1q).
3480  * @param[in] acc_mask
3481  *   Acceptable mask, if NULL default internal default mask
3482  *   will be used to check whether item fields are supported.
3483  * @param[out] error
3484  *   Pointer to error structure.
3485  *
3486  * @return
3487  *   0 on success, a negative errno value otherwise and rte_errno is set.
3488  */
3489 int
3490 mlx5_flow_validate_item_ecpri(const struct rte_flow_item *item,
3491                               uint64_t item_flags,
3492                               uint64_t last_item,
3493                               uint16_t ether_type,
3494                               const struct rte_flow_item_ecpri *acc_mask,
3495                               struct rte_flow_error *error)
3496 {
3497         const struct rte_flow_item_ecpri *mask = item->mask;
3498         const struct rte_flow_item_ecpri nic_mask = {
3499                 .hdr = {
3500                         .common = {
3501                                 .u32 =
3502                                 RTE_BE32(((const struct rte_ecpri_common_hdr) {
3503                                         .type = 0xFF,
3504                                         }).u32),
3505                         },
3506                         .dummy[0] = 0xFFFFFFFF,
3507                 },
3508         };
3509         const uint64_t outer_l2_vlan = (MLX5_FLOW_LAYER_OUTER_L2 |
3510                                         MLX5_FLOW_LAYER_OUTER_VLAN);
3511         struct rte_flow_item_ecpri mask_lo;
3512
3513         if (!(last_item & outer_l2_vlan) &&
3514             last_item != MLX5_FLOW_LAYER_OUTER_L4_UDP)
3515                 return rte_flow_error_set(error, EINVAL,
3516                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3517                                           "eCPRI can only follow L2/VLAN layer or UDP layer");
3518         if ((last_item & outer_l2_vlan) && ether_type &&
3519             ether_type != RTE_ETHER_TYPE_ECPRI)
3520                 return rte_flow_error_set(error, EINVAL,
3521                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3522                                           "eCPRI cannot follow L2/VLAN layer which ether type is not 0xAEFE");
3523         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
3524                 return rte_flow_error_set(error, EINVAL,
3525                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3526                                           "eCPRI with tunnel is not supported right now");
3527         if (item_flags & MLX5_FLOW_LAYER_OUTER_L3)
3528                 return rte_flow_error_set(error, ENOTSUP,
3529                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3530                                           "multiple L3 layers not supported");
3531         else if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_TCP)
3532                 return rte_flow_error_set(error, EINVAL,
3533                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3534                                           "eCPRI cannot coexist with a TCP layer");
3535         /* In specification, eCPRI could be over UDP layer. */
3536         else if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)
3537                 return rte_flow_error_set(error, EINVAL,
3538                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3539                                           "eCPRI over UDP layer is not yet supported right now");
3540         /* Mask for type field in common header could be zero. */
3541         if (!mask)
3542                 mask = &rte_flow_item_ecpri_mask;
3543         mask_lo.hdr.common.u32 = rte_be_to_cpu_32(mask->hdr.common.u32);
3544         /* Input mask is in big-endian format. */
3545         if (mask_lo.hdr.common.type != 0 && mask_lo.hdr.common.type != 0xff)
3546                 return rte_flow_error_set(error, EINVAL,
3547                                           RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
3548                                           "partial mask is not supported for protocol");
3549         else if (mask_lo.hdr.common.type == 0 && mask->hdr.dummy[0] != 0)
3550                 return rte_flow_error_set(error, EINVAL,
3551                                           RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
3552                                           "message header mask must be after a type mask");
3553         return mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
3554                                          acc_mask ? (const uint8_t *)acc_mask
3555                                                   : (const uint8_t *)&nic_mask,
3556                                          sizeof(struct rte_flow_item_ecpri),
3557                                          MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
3558 }
3559
3560 static int
3561 flow_null_validate(struct rte_eth_dev *dev __rte_unused,
3562                    const struct rte_flow_attr *attr __rte_unused,
3563                    const struct rte_flow_item items[] __rte_unused,
3564                    const struct rte_flow_action actions[] __rte_unused,
3565                    bool external __rte_unused,
3566                    int hairpin __rte_unused,
3567                    struct rte_flow_error *error)
3568 {
3569         return rte_flow_error_set(error, ENOTSUP,
3570                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
3571 }
3572
3573 static struct mlx5_flow *
3574 flow_null_prepare(struct rte_eth_dev *dev __rte_unused,
3575                   const struct rte_flow_attr *attr __rte_unused,
3576                   const struct rte_flow_item items[] __rte_unused,
3577                   const struct rte_flow_action actions[] __rte_unused,
3578                   struct rte_flow_error *error)
3579 {
3580         rte_flow_error_set(error, ENOTSUP,
3581                            RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
3582         return NULL;
3583 }
3584
3585 static int
3586 flow_null_translate(struct rte_eth_dev *dev __rte_unused,
3587                     struct mlx5_flow *dev_flow __rte_unused,
3588                     const struct rte_flow_attr *attr __rte_unused,
3589                     const struct rte_flow_item items[] __rte_unused,
3590                     const struct rte_flow_action actions[] __rte_unused,
3591                     struct rte_flow_error *error)
3592 {
3593         return rte_flow_error_set(error, ENOTSUP,
3594                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
3595 }
3596
3597 static int
3598 flow_null_apply(struct rte_eth_dev *dev __rte_unused,
3599                 struct rte_flow *flow __rte_unused,
3600                 struct rte_flow_error *error)
3601 {
3602         return rte_flow_error_set(error, ENOTSUP,
3603                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
3604 }
3605
3606 static void
3607 flow_null_remove(struct rte_eth_dev *dev __rte_unused,
3608                  struct rte_flow *flow __rte_unused)
3609 {
3610 }
3611
3612 static void
3613 flow_null_destroy(struct rte_eth_dev *dev __rte_unused,
3614                   struct rte_flow *flow __rte_unused)
3615 {
3616 }
3617
3618 static int
3619 flow_null_query(struct rte_eth_dev *dev __rte_unused,
3620                 struct rte_flow *flow __rte_unused,
3621                 const struct rte_flow_action *actions __rte_unused,
3622                 void *data __rte_unused,
3623                 struct rte_flow_error *error)
3624 {
3625         return rte_flow_error_set(error, ENOTSUP,
3626                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
3627 }
3628
3629 static int
3630 flow_null_sync_domain(struct rte_eth_dev *dev __rte_unused,
3631                       uint32_t domains __rte_unused,
3632                       uint32_t flags __rte_unused)
3633 {
3634         return 0;
3635 }
3636
3637 /* Void driver to protect from null pointer reference. */
3638 const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops = {
3639         .validate = flow_null_validate,
3640         .prepare = flow_null_prepare,
3641         .translate = flow_null_translate,
3642         .apply = flow_null_apply,
3643         .remove = flow_null_remove,
3644         .destroy = flow_null_destroy,
3645         .query = flow_null_query,
3646         .sync_domain = flow_null_sync_domain,
3647 };
3648
3649 /**
3650  * Select flow driver type according to flow attributes and device
3651  * configuration.
3652  *
3653  * @param[in] dev
3654  *   Pointer to the dev structure.
3655  * @param[in] attr
3656  *   Pointer to the flow attributes.
3657  *
3658  * @return
3659  *   flow driver type, MLX5_FLOW_TYPE_MAX otherwise.
3660  */
3661 static enum mlx5_flow_drv_type
3662 flow_get_drv_type(struct rte_eth_dev *dev, const struct rte_flow_attr *attr)
3663 {
3664         struct mlx5_priv *priv = dev->data->dev_private;
3665         /* The OS can determine first a specific flow type (DV, VERBS) */
3666         enum mlx5_flow_drv_type type = mlx5_flow_os_get_type();
3667
3668         if (type != MLX5_FLOW_TYPE_MAX)
3669                 return type;
3670         /*
3671          * Currently when dv_flow_en == 2, only HW steering engine is
3672          * supported. New engines can also be chosen here if ready.
3673          */
3674         if (priv->sh->config.dv_flow_en == 2)
3675                 return MLX5_FLOW_TYPE_HW;
3676         /* If no OS specific type - continue with DV/VERBS selection */
3677         if (attr->transfer && priv->sh->config.dv_esw_en)
3678                 type = MLX5_FLOW_TYPE_DV;
3679         if (!attr->transfer)
3680                 type = priv->sh->config.dv_flow_en ? MLX5_FLOW_TYPE_DV :
3681                                                      MLX5_FLOW_TYPE_VERBS;
3682         return type;
3683 }
3684
3685 #define flow_get_drv_ops(type) flow_drv_ops[type]
3686
3687 /**
3688  * Flow driver validation API. This abstracts calling driver specific functions.
3689  * The type of flow driver is determined according to flow attributes.
3690  *
3691  * @param[in] dev
3692  *   Pointer to the dev structure.
3693  * @param[in] attr
3694  *   Pointer to the flow attributes.
3695  * @param[in] items
3696  *   Pointer to the list of items.
3697  * @param[in] actions
3698  *   Pointer to the list of actions.
3699  * @param[in] external
3700  *   This flow rule is created by request external to PMD.
3701  * @param[in] hairpin
3702  *   Number of hairpin TX actions, 0 means classic flow.
3703  * @param[out] error
3704  *   Pointer to the error structure.
3705  *
3706  * @return
3707  *   0 on success, a negative errno value otherwise and rte_errno is set.
3708  */
3709 static inline int
3710 flow_drv_validate(struct rte_eth_dev *dev,
3711                   const struct rte_flow_attr *attr,
3712                   const struct rte_flow_item items[],
3713                   const struct rte_flow_action actions[],
3714                   bool external, int hairpin, struct rte_flow_error *error)
3715 {
3716         const struct mlx5_flow_driver_ops *fops;
3717         enum mlx5_flow_drv_type type = flow_get_drv_type(dev, attr);
3718
3719         fops = flow_get_drv_ops(type);
3720         return fops->validate(dev, attr, items, actions, external,
3721                               hairpin, error);
3722 }
3723
3724 /**
3725  * Flow driver preparation API. This abstracts calling driver specific
3726  * functions. Parent flow (rte_flow) should have driver type (drv_type). It
3727  * calculates the size of memory required for device flow, allocates the memory,
3728  * initializes the device flow and returns the pointer.
3729  *
3730  * @note
3731  *   This function initializes device flow structure such as dv or verbs in
3732  *   struct mlx5_flow. However, it is caller's responsibility to initialize the
3733  *   rest. For example, adding returning device flow to flow->dev_flow list and
3734  *   setting backward reference to the flow should be done out of this function.
3735  *   layers field is not filled either.
3736  *
3737  * @param[in] dev
3738  *   Pointer to the dev structure.
3739  * @param[in] attr
3740  *   Pointer to the flow attributes.
3741  * @param[in] items
3742  *   Pointer to the list of items.
3743  * @param[in] actions
3744  *   Pointer to the list of actions.
3745  * @param[in] flow_idx
3746  *   This memory pool index to the flow.
3747  * @param[out] error
3748  *   Pointer to the error structure.
3749  *
3750  * @return
3751  *   Pointer to device flow on success, otherwise NULL and rte_errno is set.
3752  */
3753 static inline struct mlx5_flow *
3754 flow_drv_prepare(struct rte_eth_dev *dev,
3755                  const struct rte_flow *flow,
3756                  const struct rte_flow_attr *attr,
3757                  const struct rte_flow_item items[],
3758                  const struct rte_flow_action actions[],
3759                  uint32_t flow_idx,
3760                  struct rte_flow_error *error)
3761 {
3762         const struct mlx5_flow_driver_ops *fops;
3763         enum mlx5_flow_drv_type type = flow->drv_type;
3764         struct mlx5_flow *mlx5_flow = NULL;
3765
3766         MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
3767         fops = flow_get_drv_ops(type);
3768         mlx5_flow = fops->prepare(dev, attr, items, actions, error);
3769         if (mlx5_flow)
3770                 mlx5_flow->flow_idx = flow_idx;
3771         return mlx5_flow;
3772 }
3773
3774 /**
3775  * Flow driver translation API. This abstracts calling driver specific
3776  * functions. Parent flow (rte_flow) should have driver type (drv_type). It
3777  * translates a generic flow into a driver flow. flow_drv_prepare() must
3778  * precede.
3779  *
3780  * @note
3781  *   dev_flow->layers could be filled as a result of parsing during translation
3782  *   if needed by flow_drv_apply(). dev_flow->flow->actions can also be filled
3783  *   if necessary. As a flow can have multiple dev_flows by RSS flow expansion,
3784  *   flow->actions could be overwritten even though all the expanded dev_flows
3785  *   have the same actions.
3786  *
3787  * @param[in] dev
3788  *   Pointer to the rte dev structure.
3789  * @param[in, out] dev_flow
3790  *   Pointer to the mlx5 flow.
3791  * @param[in] attr
3792  *   Pointer to the flow attributes.
3793  * @param[in] items
3794  *   Pointer to the list of items.
3795  * @param[in] actions
3796  *   Pointer to the list of actions.
3797  * @param[out] error
3798  *   Pointer to the error structure.
3799  *
3800  * @return
3801  *   0 on success, a negative errno value otherwise and rte_errno is set.
3802  */
3803 static inline int
3804 flow_drv_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
3805                    const struct rte_flow_attr *attr,
3806                    const struct rte_flow_item items[],
3807                    const struct rte_flow_action actions[],
3808                    struct rte_flow_error *error)
3809 {
3810         const struct mlx5_flow_driver_ops *fops;
3811         enum mlx5_flow_drv_type type = dev_flow->flow->drv_type;
3812
3813         MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
3814         fops = flow_get_drv_ops(type);
3815         return fops->translate(dev, dev_flow, attr, items, actions, error);
3816 }
3817
3818 /**
3819  * Flow driver apply API. This abstracts calling driver specific functions.
3820  * Parent flow (rte_flow) should have driver type (drv_type). It applies
3821  * translated driver flows on to device. flow_drv_translate() must precede.
3822  *
3823  * @param[in] dev
3824  *   Pointer to Ethernet device structure.
3825  * @param[in, out] flow
3826  *   Pointer to flow structure.
3827  * @param[out] error
3828  *   Pointer to error structure.
3829  *
3830  * @return
3831  *   0 on success, a negative errno value otherwise and rte_errno is set.
3832  */
3833 static inline int
3834 flow_drv_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
3835                struct rte_flow_error *error)
3836 {
3837         const struct mlx5_flow_driver_ops *fops;
3838         enum mlx5_flow_drv_type type = flow->drv_type;
3839
3840         MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
3841         fops = flow_get_drv_ops(type);
3842         return fops->apply(dev, flow, error);
3843 }
3844
3845 /**
3846  * Flow driver destroy API. This abstracts calling driver specific functions.
3847  * Parent flow (rte_flow) should have driver type (drv_type). It removes a flow
3848  * on device and releases resources of the flow.
3849  *
3850  * @param[in] dev
3851  *   Pointer to Ethernet device.
3852  * @param[in, out] flow
3853  *   Pointer to flow structure.
3854  */
3855 static inline void
3856 flow_drv_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
3857 {
3858         const struct mlx5_flow_driver_ops *fops;
3859         enum mlx5_flow_drv_type type = flow->drv_type;
3860
3861         MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
3862         fops = flow_get_drv_ops(type);
3863         fops->destroy(dev, flow);
3864 }
3865
3866 /**
3867  * Flow driver find RSS policy tbl API. This abstracts calling driver
3868  * specific functions. Parent flow (rte_flow) should have driver
3869  * type (drv_type). It will find the RSS policy table that has the rss_desc.
3870  *
3871  * @param[in] dev
3872  *   Pointer to Ethernet device.
3873  * @param[in, out] flow
3874  *   Pointer to flow structure.
3875  * @param[in] policy
3876  *   Pointer to meter policy table.
3877  * @param[in] rss_desc
3878  *   Pointer to rss_desc
3879  */
3880 static struct mlx5_flow_meter_sub_policy *
3881 flow_drv_meter_sub_policy_rss_prepare(struct rte_eth_dev *dev,
3882                 struct rte_flow *flow,
3883                 struct mlx5_flow_meter_policy *policy,
3884                 struct mlx5_flow_rss_desc *rss_desc[MLX5_MTR_RTE_COLORS])
3885 {
3886         const struct mlx5_flow_driver_ops *fops;
3887         enum mlx5_flow_drv_type type = flow->drv_type;
3888
3889         MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
3890         fops = flow_get_drv_ops(type);
3891         return fops->meter_sub_policy_rss_prepare(dev, policy, rss_desc);
3892 }
3893
3894 /**
3895  * Flow driver color tag rule API. This abstracts calling driver
3896  * specific functions. Parent flow (rte_flow) should have driver
3897  * type (drv_type). It will create the color tag rules in hierarchy meter.
3898  *
3899  * @param[in] dev
3900  *   Pointer to Ethernet device.
3901  * @param[in, out] flow
3902  *   Pointer to flow structure.
3903  * @param[in] fm
3904  *   Pointer to flow meter structure.
3905  * @param[in] src_port
3906  *   The src port this extra rule should use.
3907  * @param[in] item
3908  *   The src port id match item.
3909  * @param[out] error
3910  *   Pointer to error structure.
3911  */
3912 static int
3913 flow_drv_mtr_hierarchy_rule_create(struct rte_eth_dev *dev,
3914                 struct rte_flow *flow,
3915                 struct mlx5_flow_meter_info *fm,
3916                 int32_t src_port,
3917                 const struct rte_flow_item *item,
3918                 struct rte_flow_error *error)
3919 {
3920         const struct mlx5_flow_driver_ops *fops;
3921         enum mlx5_flow_drv_type type = flow->drv_type;
3922
3923         MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
3924         fops = flow_get_drv_ops(type);
3925         return fops->meter_hierarchy_rule_create(dev, fm,
3926                                                 src_port, item, error);
3927 }
3928
3929 /**
3930  * Get RSS action from the action list.
3931  *
3932  * @param[in] dev
3933  *   Pointer to Ethernet device.
3934  * @param[in] actions
3935  *   Pointer to the list of actions.
3936  * @param[in] flow
3937  *   Parent flow structure pointer.
3938  *
3939  * @return
3940  *   Pointer to the RSS action if exist, else return NULL.
3941  */
3942 static const struct rte_flow_action_rss*
3943 flow_get_rss_action(struct rte_eth_dev *dev,
3944                     const struct rte_flow_action actions[])
3945 {
3946         struct mlx5_priv *priv = dev->data->dev_private;
3947         const struct rte_flow_action_rss *rss = NULL;
3948         struct mlx5_meter_policy_action_container *acg;
3949         struct mlx5_meter_policy_action_container *acy;
3950
3951         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
3952                 switch (actions->type) {
3953                 case RTE_FLOW_ACTION_TYPE_RSS:
3954                         rss = actions->conf;
3955                         break;
3956                 case RTE_FLOW_ACTION_TYPE_SAMPLE:
3957                 {
3958                         const struct rte_flow_action_sample *sample =
3959                                                                 actions->conf;
3960                         const struct rte_flow_action *act = sample->actions;
3961                         for (; act->type != RTE_FLOW_ACTION_TYPE_END; act++)
3962                                 if (act->type == RTE_FLOW_ACTION_TYPE_RSS)
3963                                         rss = act->conf;
3964                         break;
3965                 }
3966                 case RTE_FLOW_ACTION_TYPE_METER:
3967                 {
3968                         uint32_t mtr_idx;
3969                         struct mlx5_flow_meter_info *fm;
3970                         struct mlx5_flow_meter_policy *policy;
3971                         const struct rte_flow_action_meter *mtr = actions->conf;
3972
3973                         fm = mlx5_flow_meter_find(priv, mtr->mtr_id, &mtr_idx);
3974                         if (fm && !fm->def_policy) {
3975                                 policy = mlx5_flow_meter_policy_find(dev,
3976                                                 fm->policy_id, NULL);
3977                                 MLX5_ASSERT(policy);
3978                                 if (policy->is_hierarchy) {
3979                                         policy =
3980                                 mlx5_flow_meter_hierarchy_get_final_policy(dev,
3981                                                                         policy);
3982                                         if (!policy)
3983                                                 return NULL;
3984                                 }
3985                                 if (policy->is_rss) {
3986                                         acg =
3987                                         &policy->act_cnt[RTE_COLOR_GREEN];
3988                                         acy =
3989                                         &policy->act_cnt[RTE_COLOR_YELLOW];
3990                                         if (acg->fate_action ==
3991                                             MLX5_FLOW_FATE_SHARED_RSS)
3992                                                 rss = acg->rss->conf;
3993                                         else if (acy->fate_action ==
3994                                                  MLX5_FLOW_FATE_SHARED_RSS)
3995                                                 rss = acy->rss->conf;
3996                                 }
3997                         }
3998                         break;
3999                 }
4000                 default:
4001                         break;
4002                 }
4003         }
4004         return rss;
4005 }
4006
4007 /**
4008  * Get ASO age action by index.
4009  *
4010  * @param[in] dev
4011  *   Pointer to the Ethernet device structure.
4012  * @param[in] age_idx
4013  *   Index to the ASO age action.
4014  *
4015  * @return
4016  *   The specified ASO age action.
4017  */
4018 struct mlx5_aso_age_action*
4019 flow_aso_age_get_by_idx(struct rte_eth_dev *dev, uint32_t age_idx)
4020 {
4021         uint16_t pool_idx = age_idx & UINT16_MAX;
4022         uint16_t offset = (age_idx >> 16) & UINT16_MAX;
4023         struct mlx5_priv *priv = dev->data->dev_private;
4024         struct mlx5_aso_age_mng *mng = priv->sh->aso_age_mng;
4025         struct mlx5_aso_age_pool *pool;
4026
4027         rte_rwlock_read_lock(&mng->resize_rwl);
4028         pool = mng->pools[pool_idx];
4029         rte_rwlock_read_unlock(&mng->resize_rwl);
4030         return &pool->actions[offset - 1];
4031 }
4032
4033 /* maps indirect action to translated direct in some actions array */
4034 struct mlx5_translated_action_handle {
4035         struct rte_flow_action_handle *action; /**< Indirect action handle. */
4036         int index; /**< Index in related array of rte_flow_action. */
4037 };
4038
4039 /**
4040  * Translates actions of type RTE_FLOW_ACTION_TYPE_INDIRECT to related
4041  * direct action if translation possible.
4042  * This functionality used to run same execution path for both direct and
4043  * indirect actions on flow create. All necessary preparations for indirect
4044  * action handling should be performed on *handle* actions list returned
4045  * from this call.
4046  *
4047  * @param[in] dev
4048  *   Pointer to Ethernet device.
4049  * @param[in] actions
4050  *   List of actions to translate.
4051  * @param[out] handle
4052  *   List to store translated indirect action object handles.
4053  * @param[in, out] indir_n
4054  *   Size of *handle* array. On return should be updated with number of
4055  *   indirect actions retrieved from the *actions* list.
4056  * @param[out] translated_actions
4057  *   List of actions where all indirect actions were translated to direct
4058  *   if possible. NULL if no translation took place.
4059  * @param[out] error
4060  *   Pointer to the error structure.
4061  *
4062  * @return
4063  *   0 on success, a negative errno value otherwise and rte_errno is set.
4064  */
4065 static int
4066 flow_action_handles_translate(struct rte_eth_dev *dev,
4067                               const struct rte_flow_action actions[],
4068                               struct mlx5_translated_action_handle *handle,
4069                               int *indir_n,
4070                               struct rte_flow_action **translated_actions,
4071                               struct rte_flow_error *error)
4072 {
4073         struct mlx5_priv *priv = dev->data->dev_private;
4074         struct rte_flow_action *translated = NULL;
4075         size_t actions_size;
4076         int n;
4077         int copied_n = 0;
4078         struct mlx5_translated_action_handle *handle_end = NULL;
4079
4080         for (n = 0; actions[n].type != RTE_FLOW_ACTION_TYPE_END; n++) {
4081                 if (actions[n].type != RTE_FLOW_ACTION_TYPE_INDIRECT)
4082                         continue;
4083                 if (copied_n == *indir_n) {
4084                         return rte_flow_error_set
4085                                 (error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION_NUM,
4086                                  NULL, "too many shared actions");
4087                 }
4088                 rte_memcpy(&handle[copied_n].action, &actions[n].conf,
4089                            sizeof(actions[n].conf));
4090                 handle[copied_n].index = n;
4091                 copied_n++;
4092         }
4093         n++;
4094         *indir_n = copied_n;
4095         if (!copied_n)
4096                 return 0;
4097         actions_size = sizeof(struct rte_flow_action) * n;
4098         translated = mlx5_malloc(MLX5_MEM_ZERO, actions_size, 0, SOCKET_ID_ANY);
4099         if (!translated) {
4100                 rte_errno = ENOMEM;
4101                 return -ENOMEM;
4102         }
4103         memcpy(translated, actions, actions_size);
4104         for (handle_end = handle + copied_n; handle < handle_end; handle++) {
4105                 struct mlx5_shared_action_rss *shared_rss;
4106                 uint32_t act_idx = (uint32_t)(uintptr_t)handle->action;
4107                 uint32_t type = act_idx >> MLX5_INDIRECT_ACTION_TYPE_OFFSET;
4108                 uint32_t idx = act_idx &
4109                                ((1u << MLX5_INDIRECT_ACTION_TYPE_OFFSET) - 1);
4110
4111                 switch (type) {
4112                 case MLX5_INDIRECT_ACTION_TYPE_RSS:
4113                         shared_rss = mlx5_ipool_get
4114                           (priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS], idx);
4115                         translated[handle->index].type =
4116                                 RTE_FLOW_ACTION_TYPE_RSS;
4117                         translated[handle->index].conf =
4118                                 &shared_rss->origin;
4119                         break;
4120                 case MLX5_INDIRECT_ACTION_TYPE_COUNT:
4121                         translated[handle->index].type =
4122                                                 (enum rte_flow_action_type)
4123                                                 MLX5_RTE_FLOW_ACTION_TYPE_COUNT;
4124                         translated[handle->index].conf = (void *)(uintptr_t)idx;
4125                         break;
4126                 case MLX5_INDIRECT_ACTION_TYPE_AGE:
4127                         if (priv->sh->flow_hit_aso_en) {
4128                                 translated[handle->index].type =
4129                                         (enum rte_flow_action_type)
4130                                         MLX5_RTE_FLOW_ACTION_TYPE_AGE;
4131                                 translated[handle->index].conf =
4132                                                          (void *)(uintptr_t)idx;
4133                                 break;
4134                         }
4135                         /* Fall-through */
4136                 case MLX5_INDIRECT_ACTION_TYPE_CT:
4137                         if (priv->sh->ct_aso_en) {
4138                                 translated[handle->index].type =
4139                                         RTE_FLOW_ACTION_TYPE_CONNTRACK;
4140                                 translated[handle->index].conf =
4141                                                          (void *)(uintptr_t)idx;
4142                                 break;
4143                         }
4144                         /* Fall-through */
4145                 default:
4146                         mlx5_free(translated);
4147                         return rte_flow_error_set
4148                                 (error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION,
4149                                  NULL, "invalid indirect action type");
4150                 }
4151         }
4152         *translated_actions = translated;
4153         return 0;
4154 }
4155
4156 /**
4157  * Get Shared RSS action from the action list.
4158  *
4159  * @param[in] dev
4160  *   Pointer to Ethernet device.
4161  * @param[in] shared
4162  *   Pointer to the list of actions.
4163  * @param[in] shared_n
4164  *   Actions list length.
4165  *
4166  * @return
4167  *   The MLX5 RSS action ID if exists, otherwise return 0.
4168  */
4169 static uint32_t
4170 flow_get_shared_rss_action(struct rte_eth_dev *dev,
4171                            struct mlx5_translated_action_handle *handle,
4172                            int shared_n)
4173 {
4174         struct mlx5_translated_action_handle *handle_end;
4175         struct mlx5_priv *priv = dev->data->dev_private;
4176         struct mlx5_shared_action_rss *shared_rss;
4177
4178
4179         for (handle_end = handle + shared_n; handle < handle_end; handle++) {
4180                 uint32_t act_idx = (uint32_t)(uintptr_t)handle->action;
4181                 uint32_t type = act_idx >> MLX5_INDIRECT_ACTION_TYPE_OFFSET;
4182                 uint32_t idx = act_idx &
4183                                ((1u << MLX5_INDIRECT_ACTION_TYPE_OFFSET) - 1);
4184                 switch (type) {
4185                 case MLX5_INDIRECT_ACTION_TYPE_RSS:
4186                         shared_rss = mlx5_ipool_get
4187                                 (priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS],
4188                                                                            idx);
4189                         __atomic_add_fetch(&shared_rss->refcnt, 1,
4190                                            __ATOMIC_RELAXED);
4191                         return idx;
4192                 default:
4193                         break;
4194                 }
4195         }
4196         return 0;
4197 }
4198
4199 static unsigned int
4200 find_graph_root(uint32_t rss_level)
4201 {
4202         return rss_level < 2 ? MLX5_EXPANSION_ROOT :
4203                                MLX5_EXPANSION_ROOT_OUTER;
4204 }
4205
4206 /**
4207  *  Get layer flags from the prefix flow.
4208  *
4209  *  Some flows may be split to several subflows, the prefix subflow gets the
4210  *  match items and the suffix sub flow gets the actions.
4211  *  Some actions need the user defined match item flags to get the detail for
4212  *  the action.
4213  *  This function helps the suffix flow to get the item layer flags from prefix
4214  *  subflow.
4215  *
4216  * @param[in] dev_flow
4217  *   Pointer the created prefix subflow.
4218  *
4219  * @return
4220  *   The layers get from prefix subflow.
4221  */
4222 static inline uint64_t
4223 flow_get_prefix_layer_flags(struct mlx5_flow *dev_flow)
4224 {
4225         uint64_t layers = 0;
4226
4227         /*
4228          * Layers bits could be localization, but usually the compiler will
4229          * help to do the optimization work for source code.
4230          * If no decap actions, use the layers directly.
4231          */
4232         if (!(dev_flow->act_flags & MLX5_FLOW_ACTION_DECAP))
4233                 return dev_flow->handle->layers;
4234         /* Convert L3 layers with decap action. */
4235         if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L3_IPV4)
4236                 layers |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
4237         else if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L3_IPV6)
4238                 layers |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
4239         /* Convert L4 layers with decap action.  */
4240         if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L4_TCP)
4241                 layers |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
4242         else if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L4_UDP)
4243                 layers |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
4244         return layers;
4245 }
4246
4247 /**
4248  * Get metadata split action information.
4249  *
4250  * @param[in] actions
4251  *   Pointer to the list of actions.
4252  * @param[out] qrss
4253  *   Pointer to the return pointer.
4254  * @param[out] qrss_type
4255  *   Pointer to the action type to return. RTE_FLOW_ACTION_TYPE_END is returned
4256  *   if no QUEUE/RSS is found.
4257  * @param[out] encap_idx
4258  *   Pointer to the index of the encap action if exists, otherwise the last
4259  *   action index.
4260  *
4261  * @return
4262  *   Total number of actions.
4263  */
4264 static int
4265 flow_parse_metadata_split_actions_info(const struct rte_flow_action actions[],
4266                                        const struct rte_flow_action **qrss,
4267                                        int *encap_idx)
4268 {
4269         const struct rte_flow_action_raw_encap *raw_encap;
4270         int actions_n = 0;
4271         int raw_decap_idx = -1;
4272
4273         *encap_idx = -1;
4274         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
4275                 switch (actions->type) {
4276                 case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
4277                 case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
4278                         *encap_idx = actions_n;
4279                         break;
4280                 case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
4281                         raw_decap_idx = actions_n;
4282                         break;
4283                 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
4284                         raw_encap = actions->conf;
4285                         if (raw_encap->size > MLX5_ENCAPSULATION_DECISION_SIZE)
4286                                 *encap_idx = raw_decap_idx != -1 ?
4287                                                       raw_decap_idx : actions_n;
4288                         break;
4289                 case RTE_FLOW_ACTION_TYPE_QUEUE:
4290                 case RTE_FLOW_ACTION_TYPE_RSS:
4291                         *qrss = actions;
4292                         break;
4293                 default:
4294                         break;
4295                 }
4296                 actions_n++;
4297         }
4298         if (*encap_idx == -1)
4299                 *encap_idx = actions_n;
4300         /* Count RTE_FLOW_ACTION_TYPE_END. */
4301         return actions_n + 1;
4302 }
4303
4304 /**
4305  * Check if the action will change packet.
4306  *
4307  * @param dev
4308  *   Pointer to Ethernet device.
4309  * @param[in] type
4310  *   action type.
4311  *
4312  * @return
4313  *   true if action will change packet, false otherwise.
4314  */
4315 static bool flow_check_modify_action_type(struct rte_eth_dev *dev,
4316                                           enum rte_flow_action_type type)
4317 {
4318         struct mlx5_priv *priv = dev->data->dev_private;
4319
4320         switch (type) {
4321         case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
4322         case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
4323         case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
4324         case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
4325         case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
4326         case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
4327         case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
4328         case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
4329         case RTE_FLOW_ACTION_TYPE_DEC_TTL:
4330         case RTE_FLOW_ACTION_TYPE_SET_TTL:
4331         case RTE_FLOW_ACTION_TYPE_INC_TCP_SEQ:
4332         case RTE_FLOW_ACTION_TYPE_DEC_TCP_SEQ:
4333         case RTE_FLOW_ACTION_TYPE_INC_TCP_ACK:
4334         case RTE_FLOW_ACTION_TYPE_DEC_TCP_ACK:
4335         case RTE_FLOW_ACTION_TYPE_SET_IPV4_DSCP:
4336         case RTE_FLOW_ACTION_TYPE_SET_IPV6_DSCP:
4337         case RTE_FLOW_ACTION_TYPE_SET_META:
4338         case RTE_FLOW_ACTION_TYPE_SET_TAG:
4339         case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
4340         case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
4341         case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
4342         case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
4343         case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
4344         case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP:
4345         case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
4346         case RTE_FLOW_ACTION_TYPE_NVGRE_DECAP:
4347         case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
4348         case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
4349         case RTE_FLOW_ACTION_TYPE_MODIFY_FIELD:
4350                 return true;
4351         case RTE_FLOW_ACTION_TYPE_FLAG:
4352         case RTE_FLOW_ACTION_TYPE_MARK:
4353                 if (priv->sh->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY)
4354                         return true;
4355                 else
4356                         return false;
4357         default:
4358                 return false;
4359         }
4360 }
4361
4362 /**
4363  * Check meter action from the action list.
4364  *
4365  * @param dev
4366  *   Pointer to Ethernet device.
4367  * @param[in] actions
4368  *   Pointer to the list of actions.
4369  * @param[out] has_mtr
4370  *   Pointer to the meter exist flag.
4371  * @param[out] has_modify
4372  *   Pointer to the flag showing there's packet change action.
4373  * @param[out] meter_id
4374  *   Pointer to the meter id.
4375  *
4376  * @return
4377  *   Total number of actions.
4378  */
4379 static int
4380 flow_check_meter_action(struct rte_eth_dev *dev,
4381                         const struct rte_flow_action actions[],
4382                         bool *has_mtr, bool *has_modify, uint32_t *meter_id)
4383 {
4384         const struct rte_flow_action_meter *mtr = NULL;
4385         int actions_n = 0;
4386
4387         MLX5_ASSERT(has_mtr);
4388         *has_mtr = false;
4389         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
4390                 switch (actions->type) {
4391                 case RTE_FLOW_ACTION_TYPE_METER:
4392                         mtr = actions->conf;
4393                         *meter_id = mtr->mtr_id;
4394                         *has_mtr = true;
4395                         break;
4396                 default:
4397                         break;
4398                 }
4399                 if (!*has_mtr)
4400                         *has_modify |= flow_check_modify_action_type(dev,
4401                                                                 actions->type);
4402                 actions_n++;
4403         }
4404         /* Count RTE_FLOW_ACTION_TYPE_END. */
4405         return actions_n + 1;
4406 }
4407
4408 /**
4409  * Check if the flow should be split due to hairpin.
4410  * The reason for the split is that in current HW we can't
4411  * support encap and push-vlan on Rx, so if a flow contains
4412  * these actions we move it to Tx.
4413  *
4414  * @param dev
4415  *   Pointer to Ethernet device.
4416  * @param[in] attr
4417  *   Flow rule attributes.
4418  * @param[in] actions
4419  *   Associated actions (list terminated by the END action).
4420  *
4421  * @return
4422  *   > 0 the number of actions and the flow should be split,
4423  *   0 when no split required.
4424  */
4425 static int
4426 flow_check_hairpin_split(struct rte_eth_dev *dev,
4427                          const struct rte_flow_attr *attr,
4428                          const struct rte_flow_action actions[])
4429 {
4430         int queue_action = 0;
4431         int action_n = 0;
4432         int split = 0;
4433         const struct rte_flow_action_queue *queue;
4434         const struct rte_flow_action_rss *rss;
4435         const struct rte_flow_action_raw_encap *raw_encap;
4436         const struct rte_eth_hairpin_conf *conf;
4437
4438         if (!attr->ingress)
4439                 return 0;
4440         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
4441                 switch (actions->type) {
4442                 case RTE_FLOW_ACTION_TYPE_QUEUE:
4443                         queue = actions->conf;
4444                         if (queue == NULL)
4445                                 return 0;
4446                         conf = mlx5_rxq_get_hairpin_conf(dev, queue->index);
4447                         if (conf == NULL || conf->tx_explicit != 0)
4448                                 return 0;
4449                         queue_action = 1;
4450                         action_n++;
4451                         break;
4452                 case RTE_FLOW_ACTION_TYPE_RSS:
4453                         rss = actions->conf;
4454                         if (rss == NULL || rss->queue_num == 0)
4455                                 return 0;
4456                         conf = mlx5_rxq_get_hairpin_conf(dev, rss->queue[0]);
4457                         if (conf == NULL || conf->tx_explicit != 0)
4458                                 return 0;
4459                         queue_action = 1;
4460                         action_n++;
4461                         break;
4462                 case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
4463                 case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
4464                 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
4465                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
4466                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
4467                         split++;
4468                         action_n++;
4469                         break;
4470                 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
4471                         raw_encap = actions->conf;
4472                         if (raw_encap->size > MLX5_ENCAPSULATION_DECISION_SIZE)
4473                                 split++;
4474                         action_n++;
4475                         break;
4476                 default:
4477                         action_n++;
4478                         break;
4479                 }
4480         }
4481         if (split && queue_action)
4482                 return action_n;
4483         return 0;
4484 }
4485
4486 /* Declare flow create/destroy prototype in advance. */
4487 static uint32_t
4488 flow_list_create(struct rte_eth_dev *dev, enum mlx5_flow_type type,
4489                  const struct rte_flow_attr *attr,
4490                  const struct rte_flow_item items[],
4491                  const struct rte_flow_action actions[],
4492                  bool external, struct rte_flow_error *error);
4493
4494 static void
4495 flow_list_destroy(struct rte_eth_dev *dev, enum mlx5_flow_type type,
4496                   uint32_t flow_idx);
4497
4498 int
4499 flow_dv_mreg_match_cb(void *tool_ctx __rte_unused,
4500                       struct mlx5_list_entry *entry, void *cb_ctx)
4501 {
4502         struct mlx5_flow_cb_ctx *ctx = cb_ctx;
4503         struct mlx5_flow_mreg_copy_resource *mcp_res =
4504                                container_of(entry, typeof(*mcp_res), hlist_ent);
4505
4506         return mcp_res->mark_id != *(uint32_t *)(ctx->data);
4507 }
4508
4509 struct mlx5_list_entry *
4510 flow_dv_mreg_create_cb(void *tool_ctx, void *cb_ctx)
4511 {
4512         struct rte_eth_dev *dev = tool_ctx;
4513         struct mlx5_priv *priv = dev->data->dev_private;
4514         struct mlx5_flow_cb_ctx *ctx = cb_ctx;
4515         struct mlx5_flow_mreg_copy_resource *mcp_res;
4516         struct rte_flow_error *error = ctx->error;
4517         uint32_t idx = 0;
4518         int ret;
4519         uint32_t mark_id = *(uint32_t *)(ctx->data);
4520         struct rte_flow_attr attr = {
4521                 .group = MLX5_FLOW_MREG_CP_TABLE_GROUP,
4522                 .ingress = 1,
4523         };
4524         struct mlx5_rte_flow_item_tag tag_spec = {
4525                 .data = mark_id,
4526         };
4527         struct rte_flow_item items[] = {
4528                 [1] = { .type = RTE_FLOW_ITEM_TYPE_END, },
4529         };
4530         struct rte_flow_action_mark ftag = {
4531                 .id = mark_id,
4532         };
4533         struct mlx5_flow_action_copy_mreg cp_mreg = {
4534                 .dst = REG_B,
4535                 .src = REG_NON,
4536         };
4537         struct rte_flow_action_jump jump = {
4538                 .group = MLX5_FLOW_MREG_ACT_TABLE_GROUP,
4539         };
4540         struct rte_flow_action actions[] = {
4541                 [3] = { .type = RTE_FLOW_ACTION_TYPE_END, },
4542         };
4543
4544         /* Fill the register fields in the flow. */
4545         ret = mlx5_flow_get_reg_id(dev, MLX5_FLOW_MARK, 0, error);
4546         if (ret < 0)
4547                 return NULL;
4548         tag_spec.id = ret;
4549         ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_RX, 0, error);
4550         if (ret < 0)
4551                 return NULL;
4552         cp_mreg.src = ret;
4553         /* Provide the full width of FLAG specific value. */
4554         if (mark_id == (priv->sh->dv_regc0_mask & MLX5_FLOW_MARK_DEFAULT))
4555                 tag_spec.data = MLX5_FLOW_MARK_DEFAULT;
4556         /* Build a new flow. */
4557         if (mark_id != MLX5_DEFAULT_COPY_ID) {
4558                 items[0] = (struct rte_flow_item){
4559                         .type = (enum rte_flow_item_type)
4560                                 MLX5_RTE_FLOW_ITEM_TYPE_TAG,
4561                         .spec = &tag_spec,
4562                 };
4563                 items[1] = (struct rte_flow_item){
4564                         .type = RTE_FLOW_ITEM_TYPE_END,
4565                 };
4566                 actions[0] = (struct rte_flow_action){
4567                         .type = (enum rte_flow_action_type)
4568                                 MLX5_RTE_FLOW_ACTION_TYPE_MARK,
4569                         .conf = &ftag,
4570                 };
4571                 actions[1] = (struct rte_flow_action){
4572                         .type = (enum rte_flow_action_type)
4573                                 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
4574                         .conf = &cp_mreg,
4575                 };
4576                 actions[2] = (struct rte_flow_action){
4577                         .type = RTE_FLOW_ACTION_TYPE_JUMP,
4578                         .conf = &jump,
4579                 };
4580                 actions[3] = (struct rte_flow_action){
4581                         .type = RTE_FLOW_ACTION_TYPE_END,
4582                 };
4583         } else {
4584                 /* Default rule, wildcard match. */
4585                 attr.priority = MLX5_FLOW_LOWEST_PRIO_INDICATOR;
4586                 items[0] = (struct rte_flow_item){
4587                         .type = RTE_FLOW_ITEM_TYPE_END,
4588                 };
4589                 actions[0] = (struct rte_flow_action){
4590                         .type = (enum rte_flow_action_type)
4591                                 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
4592                         .conf = &cp_mreg,
4593                 };
4594                 actions[1] = (struct rte_flow_action){
4595                         .type = RTE_FLOW_ACTION_TYPE_JUMP,
4596                         .conf = &jump,
4597                 };
4598                 actions[2] = (struct rte_flow_action){
4599                         .type = RTE_FLOW_ACTION_TYPE_END,
4600                 };
4601         }
4602         /* Build a new entry. */
4603         mcp_res = mlx5_ipool_zmalloc(priv->sh->ipool[MLX5_IPOOL_MCP], &idx);
4604         if (!mcp_res) {
4605                 rte_errno = ENOMEM;
4606                 return NULL;
4607         }
4608         mcp_res->idx = idx;
4609         mcp_res->mark_id = mark_id;
4610         /*
4611          * The copy Flows are not included in any list. There
4612          * ones are referenced from other Flows and can not
4613          * be applied, removed, deleted in arbitrary order
4614          * by list traversing.
4615          */
4616         mcp_res->rix_flow = flow_list_create(dev, MLX5_FLOW_TYPE_MCP,
4617                                         &attr, items, actions, false, error);
4618         if (!mcp_res->rix_flow) {
4619                 mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MCP], idx);
4620                 return NULL;
4621         }
4622         return &mcp_res->hlist_ent;
4623 }
4624
4625 struct mlx5_list_entry *
4626 flow_dv_mreg_clone_cb(void *tool_ctx, struct mlx5_list_entry *oentry,
4627                       void *cb_ctx __rte_unused)
4628 {
4629         struct rte_eth_dev *dev = tool_ctx;
4630         struct mlx5_priv *priv = dev->data->dev_private;
4631         struct mlx5_flow_mreg_copy_resource *mcp_res;
4632         uint32_t idx = 0;
4633
4634         mcp_res = mlx5_ipool_malloc(priv->sh->ipool[MLX5_IPOOL_MCP], &idx);
4635         if (!mcp_res) {
4636                 rte_errno = ENOMEM;
4637                 return NULL;
4638         }
4639         memcpy(mcp_res, oentry, sizeof(*mcp_res));
4640         mcp_res->idx = idx;
4641         return &mcp_res->hlist_ent;
4642 }
4643
4644 void
4645 flow_dv_mreg_clone_free_cb(void *tool_ctx, struct mlx5_list_entry *entry)
4646 {
4647         struct mlx5_flow_mreg_copy_resource *mcp_res =
4648                                container_of(entry, typeof(*mcp_res), hlist_ent);
4649         struct rte_eth_dev *dev = tool_ctx;
4650         struct mlx5_priv *priv = dev->data->dev_private;
4651
4652         mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MCP], mcp_res->idx);
4653 }
4654
4655 /**
4656  * Add a flow of copying flow metadata registers in RX_CP_TBL.
4657  *
4658  * As mark_id is unique, if there's already a registered flow for the mark_id,
4659  * return by increasing the reference counter of the resource. Otherwise, create
4660  * the resource (mcp_res) and flow.
4661  *
4662  * Flow looks like,
4663  *   - If ingress port is ANY and reg_c[1] is mark_id,
4664  *     flow_tag := mark_id, reg_b := reg_c[0] and jump to RX_ACT_TBL.
4665  *
4666  * For default flow (zero mark_id), flow is like,
4667  *   - If ingress port is ANY,
4668  *     reg_b := reg_c[0] and jump to RX_ACT_TBL.
4669  *
4670  * @param dev
4671  *   Pointer to Ethernet device.
4672  * @param mark_id
4673  *   ID of MARK action, zero means default flow for META.
4674  * @param[out] error
4675  *   Perform verbose error reporting if not NULL.
4676  *
4677  * @return
4678  *   Associated resource on success, NULL otherwise and rte_errno is set.
4679  */
4680 static struct mlx5_flow_mreg_copy_resource *
4681 flow_mreg_add_copy_action(struct rte_eth_dev *dev, uint32_t mark_id,
4682                           struct rte_flow_error *error)
4683 {
4684         struct mlx5_priv *priv = dev->data->dev_private;
4685         struct mlx5_list_entry *entry;
4686         struct mlx5_flow_cb_ctx ctx = {
4687                 .dev = dev,
4688                 .error = error,
4689                 .data = &mark_id,
4690         };
4691
4692         /* Check if already registered. */
4693         MLX5_ASSERT(priv->mreg_cp_tbl);
4694         entry = mlx5_hlist_register(priv->mreg_cp_tbl, mark_id, &ctx);
4695         if (!entry)
4696                 return NULL;
4697         return container_of(entry, struct mlx5_flow_mreg_copy_resource,
4698                             hlist_ent);
4699 }
4700
4701 void
4702 flow_dv_mreg_remove_cb(void *tool_ctx, struct mlx5_list_entry *entry)
4703 {
4704         struct mlx5_flow_mreg_copy_resource *mcp_res =
4705                                container_of(entry, typeof(*mcp_res), hlist_ent);
4706         struct rte_eth_dev *dev = tool_ctx;
4707         struct mlx5_priv *priv = dev->data->dev_private;
4708
4709         MLX5_ASSERT(mcp_res->rix_flow);
4710         flow_list_destroy(dev, MLX5_FLOW_TYPE_MCP, mcp_res->rix_flow);
4711         mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MCP], mcp_res->idx);
4712 }
4713
4714 /**
4715  * Release flow in RX_CP_TBL.
4716  *
4717  * @param dev
4718  *   Pointer to Ethernet device.
4719  * @flow
4720  *   Parent flow for wich copying is provided.
4721  */
4722 static void
4723 flow_mreg_del_copy_action(struct rte_eth_dev *dev,
4724                           struct rte_flow *flow)
4725 {
4726         struct mlx5_flow_mreg_copy_resource *mcp_res;
4727         struct mlx5_priv *priv = dev->data->dev_private;
4728
4729         if (!flow->rix_mreg_copy)
4730                 return;
4731         mcp_res = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_MCP],
4732                                  flow->rix_mreg_copy);
4733         if (!mcp_res || !priv->mreg_cp_tbl)
4734                 return;
4735         MLX5_ASSERT(mcp_res->rix_flow);
4736         mlx5_hlist_unregister(priv->mreg_cp_tbl, &mcp_res->hlist_ent);
4737         flow->rix_mreg_copy = 0;
4738 }
4739
4740 /**
4741  * Remove the default copy action from RX_CP_TBL.
4742  *
4743  * This functions is called in the mlx5_dev_start(). No thread safe
4744  * is guaranteed.
4745  *
4746  * @param dev
4747  *   Pointer to Ethernet device.
4748  */
4749 static void
4750 flow_mreg_del_default_copy_action(struct rte_eth_dev *dev)
4751 {
4752         struct mlx5_list_entry *entry;
4753         struct mlx5_priv *priv = dev->data->dev_private;
4754         struct mlx5_flow_cb_ctx ctx;
4755         uint32_t mark_id;
4756
4757         /* Check if default flow is registered. */
4758         if (!priv->mreg_cp_tbl)
4759                 return;
4760         mark_id = MLX5_DEFAULT_COPY_ID;
4761         ctx.data = &mark_id;
4762         entry = mlx5_hlist_lookup(priv->mreg_cp_tbl, mark_id, &ctx);
4763         if (!entry)
4764                 return;
4765         mlx5_hlist_unregister(priv->mreg_cp_tbl, entry);
4766 }
4767
4768 /**
4769  * Add the default copy action in in RX_CP_TBL.
4770  *
4771  * This functions is called in the mlx5_dev_start(). No thread safe
4772  * is guaranteed.
4773  *
4774  * @param dev
4775  *   Pointer to Ethernet device.
4776  * @param[out] error
4777  *   Perform verbose error reporting if not NULL.
4778  *
4779  * @return
4780  *   0 for success, negative value otherwise and rte_errno is set.
4781  */
4782 static int
4783 flow_mreg_add_default_copy_action(struct rte_eth_dev *dev,
4784                                   struct rte_flow_error *error)
4785 {
4786         struct mlx5_priv *priv = dev->data->dev_private;
4787         struct mlx5_flow_mreg_copy_resource *mcp_res;
4788         struct mlx5_flow_cb_ctx ctx;
4789         uint32_t mark_id;
4790
4791         /* Check whether extensive metadata feature is engaged. */
4792         if (!priv->sh->config.dv_flow_en ||
4793             priv->sh->config.dv_xmeta_en == MLX5_XMETA_MODE_LEGACY ||
4794             !mlx5_flow_ext_mreg_supported(dev) ||
4795             !priv->sh->dv_regc0_mask)
4796                 return 0;
4797         /*
4798          * Add default mreg copy flow may be called multiple time, but
4799          * only be called once in stop. Avoid register it twice.
4800          */
4801         mark_id = MLX5_DEFAULT_COPY_ID;
4802         ctx.data = &mark_id;
4803         if (mlx5_hlist_lookup(priv->mreg_cp_tbl, mark_id, &ctx))
4804                 return 0;
4805         mcp_res = flow_mreg_add_copy_action(dev, mark_id, error);
4806         if (!mcp_res)
4807                 return -rte_errno;
4808         return 0;
4809 }
4810
4811 /**
4812  * Add a flow of copying flow metadata registers in RX_CP_TBL.
4813  *
4814  * All the flow having Q/RSS action should be split by
4815  * flow_mreg_split_qrss_prep() to pass by RX_CP_TBL. A flow in the RX_CP_TBL
4816  * performs the following,
4817  *   - CQE->flow_tag := reg_c[1] (MARK)
4818  *   - CQE->flow_table_metadata (reg_b) := reg_c[0] (META)
4819  * As CQE's flow_tag is not a register, it can't be simply copied from reg_c[1]
4820  * but there should be a flow per each MARK ID set by MARK action.
4821  *
4822  * For the aforementioned reason, if there's a MARK action in flow's action
4823  * list, a corresponding flow should be added to the RX_CP_TBL in order to copy
4824  * the MARK ID to CQE's flow_tag like,
4825  *   - If reg_c[1] is mark_id,
4826  *     flow_tag := mark_id, reg_b := reg_c[0] and jump to RX_ACT_TBL.
4827  *
4828  * For SET_META action which stores value in reg_c[0], as the destination is
4829  * also a flow metadata register (reg_b), adding a default flow is enough. Zero
4830  * MARK ID means the default flow. The default flow looks like,
4831  *   - For all flow, reg_b := reg_c[0] and jump to RX_ACT_TBL.
4832  *
4833  * @param dev
4834  *   Pointer to Ethernet device.
4835  * @param flow
4836  *   Pointer to flow structure.
4837  * @param[in] actions
4838  *   Pointer to the list of actions.
4839  * @param[out] error
4840  *   Perform verbose error reporting if not NULL.
4841  *
4842  * @return
4843  *   0 on success, negative value otherwise and rte_errno is set.
4844  */
4845 static int
4846 flow_mreg_update_copy_table(struct rte_eth_dev *dev,
4847                             struct rte_flow *flow,
4848                             const struct rte_flow_action *actions,
4849                             struct rte_flow_error *error)
4850 {
4851         struct mlx5_priv *priv = dev->data->dev_private;
4852         struct mlx5_sh_config *config = &priv->sh->config;
4853         struct mlx5_flow_mreg_copy_resource *mcp_res;
4854         const struct rte_flow_action_mark *mark;
4855
4856         /* Check whether extensive metadata feature is engaged. */
4857         if (!config->dv_flow_en ||
4858             config->dv_xmeta_en == MLX5_XMETA_MODE_LEGACY ||
4859             !mlx5_flow_ext_mreg_supported(dev) ||
4860             !priv->sh->dv_regc0_mask)
4861                 return 0;
4862         /* Find MARK action. */
4863         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
4864                 switch (actions->type) {
4865                 case RTE_FLOW_ACTION_TYPE_FLAG:
4866                         mcp_res = flow_mreg_add_copy_action
4867                                 (dev, MLX5_FLOW_MARK_DEFAULT, error);
4868                         if (!mcp_res)
4869                                 return -rte_errno;
4870                         flow->rix_mreg_copy = mcp_res->idx;
4871                         return 0;
4872                 case RTE_FLOW_ACTION_TYPE_MARK:
4873                         mark = (const struct rte_flow_action_mark *)
4874                                 actions->conf;
4875                         mcp_res =
4876                                 flow_mreg_add_copy_action(dev, mark->id, error);
4877                         if (!mcp_res)
4878                                 return -rte_errno;
4879                         flow->rix_mreg_copy = mcp_res->idx;
4880                         return 0;
4881                 default:
4882                         break;
4883                 }
4884         }
4885         return 0;
4886 }
4887
4888 #define MLX5_MAX_SPLIT_ACTIONS 24
4889 #define MLX5_MAX_SPLIT_ITEMS 24
4890
4891 /**
4892  * Split the hairpin flow.
4893  * Since HW can't support encap and push-vlan on Rx, we move these
4894  * actions to Tx.
4895  * If the count action is after the encap then we also
4896  * move the count action. in this case the count will also measure
4897  * the outer bytes.
4898  *
4899  * @param dev
4900  *   Pointer to Ethernet device.
4901  * @param[in] actions
4902  *   Associated actions (list terminated by the END action).
4903  * @param[out] actions_rx
4904  *   Rx flow actions.
4905  * @param[out] actions_tx
4906  *   Tx flow actions..
4907  * @param[out] pattern_tx
4908  *   The pattern items for the Tx flow.
4909  * @param[out] flow_id
4910  *   The flow ID connected to this flow.
4911  *
4912  * @return
4913  *   0 on success.
4914  */
4915 static int
4916 flow_hairpin_split(struct rte_eth_dev *dev,
4917                    const struct rte_flow_action actions[],
4918                    struct rte_flow_action actions_rx[],
4919                    struct rte_flow_action actions_tx[],
4920                    struct rte_flow_item pattern_tx[],
4921                    uint32_t flow_id)
4922 {
4923         const struct rte_flow_action_raw_encap *raw_encap;
4924         const struct rte_flow_action_raw_decap *raw_decap;
4925         struct mlx5_rte_flow_action_set_tag *set_tag;
4926         struct rte_flow_action *tag_action;
4927         struct mlx5_rte_flow_item_tag *tag_item;
4928         struct rte_flow_item *item;
4929         char *addr;
4930         int encap = 0;
4931
4932         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
4933                 switch (actions->type) {
4934                 case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
4935                 case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
4936                 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
4937                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
4938                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
4939                         rte_memcpy(actions_tx, actions,
4940                                sizeof(struct rte_flow_action));
4941                         actions_tx++;
4942                         break;
4943                 case RTE_FLOW_ACTION_TYPE_COUNT:
4944                         if (encap) {
4945                                 rte_memcpy(actions_tx, actions,
4946                                            sizeof(struct rte_flow_action));
4947                                 actions_tx++;
4948                         } else {
4949                                 rte_memcpy(actions_rx, actions,
4950                                            sizeof(struct rte_flow_action));
4951                                 actions_rx++;
4952                         }
4953                         break;
4954                 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
4955                         raw_encap = actions->conf;
4956                         if (raw_encap->size > MLX5_ENCAPSULATION_DECISION_SIZE) {
4957                                 memcpy(actions_tx, actions,
4958                                        sizeof(struct rte_flow_action));
4959                                 actions_tx++;
4960                                 encap = 1;
4961                         } else {
4962                                 rte_memcpy(actions_rx, actions,
4963                                            sizeof(struct rte_flow_action));
4964                                 actions_rx++;
4965                         }
4966                         break;
4967                 case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
4968                         raw_decap = actions->conf;
4969                         if (raw_decap->size < MLX5_ENCAPSULATION_DECISION_SIZE) {
4970                                 memcpy(actions_tx, actions,
4971                                        sizeof(struct rte_flow_action));
4972                                 actions_tx++;
4973                         } else {
4974                                 rte_memcpy(actions_rx, actions,
4975                                            sizeof(struct rte_flow_action));
4976                                 actions_rx++;
4977                         }
4978                         break;
4979                 default:
4980                         rte_memcpy(actions_rx, actions,
4981                                    sizeof(struct rte_flow_action));
4982                         actions_rx++;
4983                         break;
4984                 }
4985         }
4986         /* Add set meta action and end action for the Rx flow. */
4987         tag_action = actions_rx;
4988         tag_action->type = (enum rte_flow_action_type)
4989                            MLX5_RTE_FLOW_ACTION_TYPE_TAG;
4990         actions_rx++;
4991         rte_memcpy(actions_rx, actions, sizeof(struct rte_flow_action));
4992         actions_rx++;
4993         set_tag = (void *)actions_rx;
4994         *set_tag = (struct mlx5_rte_flow_action_set_tag) {
4995                 .id = mlx5_flow_get_reg_id(dev, MLX5_HAIRPIN_RX, 0, NULL),
4996                 .data = flow_id,
4997         };
4998         MLX5_ASSERT(set_tag->id > REG_NON);
4999         tag_action->conf = set_tag;
5000         /* Create Tx item list. */
5001         rte_memcpy(actions_tx, actions, sizeof(struct rte_flow_action));
5002         addr = (void *)&pattern_tx[2];
5003         item = pattern_tx;
5004         item->type = (enum rte_flow_item_type)
5005                      MLX5_RTE_FLOW_ITEM_TYPE_TAG;
5006         tag_item = (void *)addr;
5007         tag_item->data = flow_id;
5008         tag_item->id = mlx5_flow_get_reg_id(dev, MLX5_HAIRPIN_TX, 0, NULL);
5009         MLX5_ASSERT(set_tag->id > REG_NON);
5010         item->spec = tag_item;
5011         addr += sizeof(struct mlx5_rte_flow_item_tag);
5012         tag_item = (void *)addr;
5013         tag_item->data = UINT32_MAX;
5014         tag_item->id = UINT16_MAX;
5015         item->mask = tag_item;
5016         item->last = NULL;
5017         item++;
5018         item->type = RTE_FLOW_ITEM_TYPE_END;
5019         return 0;
5020 }
5021
5022 /**
5023  * The last stage of splitting chain, just creates the subflow
5024  * without any modification.
5025  *
5026  * @param[in] dev
5027  *   Pointer to Ethernet device.
5028  * @param[in] flow
5029  *   Parent flow structure pointer.
5030  * @param[in, out] sub_flow
5031  *   Pointer to return the created subflow, may be NULL.
5032  * @param[in] attr
5033  *   Flow rule attributes.
5034  * @param[in] items
5035  *   Pattern specification (list terminated by the END pattern item).
5036  * @param[in] actions
5037  *   Associated actions (list terminated by the END action).
5038  * @param[in] flow_split_info
5039  *   Pointer to flow split info structure.
5040  * @param[out] error
5041  *   Perform verbose error reporting if not NULL.
5042  * @return
5043  *   0 on success, negative value otherwise
5044  */
5045 static int
5046 flow_create_split_inner(struct rte_eth_dev *dev,
5047                         struct rte_flow *flow,
5048                         struct mlx5_flow **sub_flow,
5049                         const struct rte_flow_attr *attr,
5050                         const struct rte_flow_item items[],
5051                         const struct rte_flow_action actions[],
5052                         struct mlx5_flow_split_info *flow_split_info,
5053                         struct rte_flow_error *error)
5054 {
5055         struct mlx5_flow *dev_flow;
5056         struct mlx5_flow_workspace *wks = mlx5_flow_get_thread_workspace();
5057
5058         dev_flow = flow_drv_prepare(dev, flow, attr, items, actions,
5059                                     flow_split_info->flow_idx, error);
5060         if (!dev_flow)
5061                 return -rte_errno;
5062         dev_flow->flow = flow;
5063         dev_flow->external = flow_split_info->external;
5064         dev_flow->skip_scale = flow_split_info->skip_scale;
5065         /* Subflow object was created, we must include one in the list. */
5066         SILIST_INSERT(&flow->dev_handles, dev_flow->handle_idx,
5067                       dev_flow->handle, next);
5068         /*
5069          * If dev_flow is as one of the suffix flow, some actions in suffix
5070          * flow may need some user defined item layer flags, and pass the
5071          * Metadata rxq mark flag to suffix flow as well.
5072          */
5073         if (flow_split_info->prefix_layers)
5074                 dev_flow->handle->layers = flow_split_info->prefix_layers;
5075         if (flow_split_info->prefix_mark) {
5076                 MLX5_ASSERT(wks);
5077                 wks->mark = 1;
5078         }
5079         if (sub_flow)
5080                 *sub_flow = dev_flow;
5081 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
5082         dev_flow->dv.table_id = flow_split_info->table_id;
5083 #endif
5084         return flow_drv_translate(dev, dev_flow, attr, items, actions, error);
5085 }
5086
5087 /**
5088  * Get the sub policy of a meter.
5089  *
5090  * @param[in] dev
5091  *   Pointer to Ethernet device.
5092  * @param[in] flow
5093  *   Parent flow structure pointer.
5094  * @param wks
5095  *   Pointer to thread flow work space.
5096  * @param[in] attr
5097  *   Flow rule attributes.
5098  * @param[in] items
5099  *   Pattern specification (list terminated by the END pattern item).
5100  * @param[out] error
5101  *   Perform verbose error reporting if not NULL.
5102  *
5103  * @return
5104  *   Pointer to the meter sub policy, NULL otherwise and rte_errno is set.
5105  */
5106 static struct mlx5_flow_meter_sub_policy *
5107 get_meter_sub_policy(struct rte_eth_dev *dev,
5108                      struct rte_flow *flow,
5109                      struct mlx5_flow_workspace *wks,
5110                      const struct rte_flow_attr *attr,
5111                      const struct rte_flow_item items[],
5112                      struct rte_flow_error *error)
5113 {
5114         struct mlx5_flow_meter_policy *policy;
5115         struct mlx5_flow_meter_policy *final_policy;
5116         struct mlx5_flow_meter_sub_policy *sub_policy = NULL;
5117
5118         policy = wks->policy;
5119         final_policy = policy->is_hierarchy ? wks->final_policy : policy;
5120         if (final_policy->is_rss || final_policy->is_queue) {
5121                 struct mlx5_flow_rss_desc rss_desc_v[MLX5_MTR_RTE_COLORS];
5122                 struct mlx5_flow_rss_desc *rss_desc[MLX5_MTR_RTE_COLORS] = {0};
5123                 uint32_t i;
5124
5125                 /*
5126                  * This is a tmp dev_flow,
5127                  * no need to register any matcher for it in translate.
5128                  */
5129                 wks->skip_matcher_reg = 1;
5130                 for (i = 0; i < MLX5_MTR_RTE_COLORS; i++) {
5131                         struct mlx5_flow dev_flow = {0};
5132                         struct mlx5_flow_handle dev_handle = { {0} };
5133                         uint8_t fate = final_policy->act_cnt[i].fate_action;
5134
5135                         if (fate == MLX5_FLOW_FATE_SHARED_RSS) {
5136                                 const struct rte_flow_action_rss *rss_act =
5137                                         final_policy->act_cnt[i].rss->conf;
5138                                 struct rte_flow_action rss_actions[2] = {
5139                                         [0] = {
5140                                         .type = RTE_FLOW_ACTION_TYPE_RSS,
5141                                         .conf = rss_act,
5142                                         },
5143                                         [1] = {
5144                                         .type = RTE_FLOW_ACTION_TYPE_END,
5145                                         .conf = NULL,
5146                                         }
5147                                 };
5148
5149                                 dev_flow.handle = &dev_handle;
5150                                 dev_flow.ingress = attr->ingress;
5151                                 dev_flow.flow = flow;
5152                                 dev_flow.external = 0;
5153 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
5154                                 dev_flow.dv.transfer = attr->transfer;
5155 #endif
5156                                 /**
5157                                  * Translate RSS action to get rss hash fields.
5158                                  */
5159                                 if (flow_drv_translate(dev, &dev_flow, attr,
5160                                                 items, rss_actions, error))
5161                                         goto exit;
5162                                 rss_desc_v[i] = wks->rss_desc;
5163                                 rss_desc_v[i].key_len = MLX5_RSS_HASH_KEY_LEN;
5164                                 rss_desc_v[i].hash_fields =
5165                                                 dev_flow.hash_fields;
5166                                 rss_desc_v[i].queue_num =
5167                                                 rss_desc_v[i].hash_fields ?
5168                                                 rss_desc_v[i].queue_num : 1;
5169                                 rss_desc_v[i].tunnel =
5170                                                 !!(dev_flow.handle->layers &
5171                                                    MLX5_FLOW_LAYER_TUNNEL);
5172                                 /* Use the RSS queues in the containers. */
5173                                 rss_desc_v[i].queue =
5174                                         (uint16_t *)(uintptr_t)rss_act->queue;
5175                                 rss_desc[i] = &rss_desc_v[i];
5176                         } else if (fate == MLX5_FLOW_FATE_QUEUE) {
5177                                 /* This is queue action. */
5178                                 rss_desc_v[i] = wks->rss_desc;
5179                                 rss_desc_v[i].key_len = 0;
5180                                 rss_desc_v[i].hash_fields = 0;
5181                                 rss_desc_v[i].queue =
5182                                         &final_policy->act_cnt[i].queue;
5183                                 rss_desc_v[i].queue_num = 1;
5184                                 rss_desc[i] = &rss_desc_v[i];
5185                         } else {
5186                                 rss_desc[i] = NULL;
5187                         }
5188                 }
5189                 sub_policy = flow_drv_meter_sub_policy_rss_prepare(dev,
5190                                                 flow, policy, rss_desc);
5191         } else {
5192                 enum mlx5_meter_domain mtr_domain =
5193                         attr->transfer ? MLX5_MTR_DOMAIN_TRANSFER :
5194                                 (attr->egress ? MLX5_MTR_DOMAIN_EGRESS :
5195                                                 MLX5_MTR_DOMAIN_INGRESS);
5196                 sub_policy = policy->sub_policys[mtr_domain][0];
5197         }
5198         if (!sub_policy)
5199                 rte_flow_error_set(error, EINVAL,
5200                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
5201                                    "Failed to get meter sub-policy.");
5202 exit:
5203         return sub_policy;
5204 }
5205
5206 /**
5207  * Split the meter flow.
5208  *
5209  * As meter flow will split to three sub flow, other than meter
5210  * action, the other actions make sense to only meter accepts
5211  * the packet. If it need to be dropped, no other additional
5212  * actions should be take.
5213  *
5214  * One kind of special action which decapsulates the L3 tunnel
5215  * header will be in the prefix sub flow, as not to take the
5216  * L3 tunnel header into account.
5217  *
5218  * @param[in] dev
5219  *   Pointer to Ethernet device.
5220  * @param[in] flow
5221  *   Parent flow structure pointer.
5222  * @param wks
5223  *   Pointer to thread flow work space.
5224  * @param[in] attr
5225  *   Flow rule attributes.
5226  * @param[in] items
5227  *   Pattern specification (list terminated by the END pattern item).
5228  * @param[out] sfx_items
5229  *   Suffix flow match items (list terminated by the END pattern item).
5230  * @param[in] actions
5231  *   Associated actions (list terminated by the END action).
5232  * @param[out] actions_sfx
5233  *   Suffix flow actions.
5234  * @param[out] actions_pre
5235  *   Prefix flow actions.
5236  * @param[out] mtr_flow_id
5237  *   Pointer to meter flow id.
5238  * @param[out] error
5239  *   Perform verbose error reporting if not NULL.
5240  *
5241  * @return
5242  *   0 on success, a negative errno value otherwise and rte_errno is set.
5243  */
5244 static int
5245 flow_meter_split_prep(struct rte_eth_dev *dev,
5246                       struct rte_flow *flow,
5247                       struct mlx5_flow_workspace *wks,
5248                       const struct rte_flow_attr *attr,
5249                       const struct rte_flow_item items[],
5250                       struct rte_flow_item sfx_items[],
5251                       const struct rte_flow_action actions[],
5252                       struct rte_flow_action actions_sfx[],
5253                       struct rte_flow_action actions_pre[],
5254                       uint32_t *mtr_flow_id,
5255                       struct rte_flow_error *error)
5256 {
5257         struct mlx5_priv *priv = dev->data->dev_private;
5258         struct mlx5_flow_meter_info *fm = wks->fm;
5259         struct rte_flow_action *tag_action = NULL;
5260         struct rte_flow_item *tag_item;
5261         struct mlx5_rte_flow_action_set_tag *set_tag;
5262         const struct rte_flow_action_raw_encap *raw_encap;
5263         const struct rte_flow_action_raw_decap *raw_decap;
5264         struct mlx5_rte_flow_item_tag *tag_item_spec;
5265         struct mlx5_rte_flow_item_tag *tag_item_mask;
5266         uint32_t tag_id = 0;
5267         struct rte_flow_item *vlan_item_dst = NULL;
5268         const struct rte_flow_item *vlan_item_src = NULL;
5269         const struct rte_flow_item *orig_items = items;
5270         struct rte_flow_action *hw_mtr_action;
5271         struct rte_flow_action *action_pre_head = NULL;
5272         int32_t flow_src_port = priv->representor_id;
5273         bool mtr_first;
5274         uint8_t mtr_id_offset = priv->mtr_reg_share ? MLX5_MTR_COLOR_BITS : 0;
5275         uint8_t mtr_reg_bits = priv->mtr_reg_share ?
5276                                 MLX5_MTR_IDLE_BITS_IN_COLOR_REG : MLX5_REG_BITS;
5277         uint32_t flow_id = 0;
5278         uint32_t flow_id_reversed = 0;
5279         uint8_t flow_id_bits = 0;
5280         bool after_meter = false;
5281         int shift;
5282
5283         /* Prepare the suffix subflow items. */
5284         tag_item = sfx_items++;
5285         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
5286                 struct mlx5_priv *port_priv;
5287                 const struct rte_flow_item_port_id *pid_v;
5288                 int item_type = items->type;
5289
5290                 switch (item_type) {
5291                 case RTE_FLOW_ITEM_TYPE_PORT_ID:
5292                         pid_v = items->spec;
5293                         MLX5_ASSERT(pid_v);
5294                         port_priv = mlx5_port_to_eswitch_info(pid_v->id, false);
5295                         if (!port_priv)
5296                                 return rte_flow_error_set(error,
5297                                                 rte_errno,
5298                                                 RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
5299                                                 pid_v,
5300                                                 "Failed to get port info.");
5301                         flow_src_port = port_priv->representor_id;
5302                         if (!fm->def_policy && wks->policy->is_hierarchy &&
5303                             flow_src_port != priv->representor_id) {
5304                                 if (flow_drv_mtr_hierarchy_rule_create(dev,
5305                                                                 flow, fm,
5306                                                                 flow_src_port,
5307                                                                 items,
5308                                                                 error))
5309                                         return -rte_errno;
5310                         }
5311                         memcpy(sfx_items, items, sizeof(*sfx_items));
5312                         sfx_items++;
5313                         break;
5314                 case RTE_FLOW_ITEM_TYPE_VLAN:
5315                         /* Determine if copy vlan item below. */
5316                         vlan_item_src = items;
5317                         vlan_item_dst = sfx_items++;
5318                         vlan_item_dst->type = RTE_FLOW_ITEM_TYPE_VOID;
5319                         break;
5320                 default:
5321                         break;
5322                 }
5323         }
5324         sfx_items->type = RTE_FLOW_ITEM_TYPE_END;
5325         sfx_items++;
5326         mtr_first = priv->sh->meter_aso_en &&
5327                 (attr->egress || (attr->transfer && flow_src_port != UINT16_MAX));
5328         /* For ASO meter, meter must be before tag in TX direction. */
5329         if (mtr_first) {
5330                 action_pre_head = actions_pre++;
5331                 /* Leave space for tag action. */
5332                 tag_action = actions_pre++;
5333         }
5334         /* Prepare the actions for prefix and suffix flow. */
5335         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
5336                 struct rte_flow_action *action_cur = NULL;
5337
5338                 switch (actions->type) {
5339                 case RTE_FLOW_ACTION_TYPE_METER:
5340                         if (mtr_first) {
5341                                 action_cur = action_pre_head;
5342                         } else {
5343                                 /* Leave space for tag action. */
5344                                 tag_action = actions_pre++;
5345                                 action_cur = actions_pre++;
5346                         }
5347                         after_meter = true;
5348                         break;
5349                 case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP:
5350                 case RTE_FLOW_ACTION_TYPE_NVGRE_DECAP:
5351                         action_cur = actions_pre++;
5352                         break;
5353                 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
5354                         raw_encap = actions->conf;
5355                         if (raw_encap->size < MLX5_ENCAPSULATION_DECISION_SIZE)
5356                                 action_cur = actions_pre++;
5357                         break;
5358                 case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
5359                         raw_decap = actions->conf;
5360                         if (raw_decap->size > MLX5_ENCAPSULATION_DECISION_SIZE)
5361                                 action_cur = actions_pre++;
5362                         break;
5363                 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
5364                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
5365                         if (vlan_item_dst && vlan_item_src) {
5366                                 memcpy(vlan_item_dst, vlan_item_src,
5367                                         sizeof(*vlan_item_dst));
5368                                 /*
5369                                  * Convert to internal match item, it is used
5370                                  * for vlan push and set vid.
5371                                  */
5372                                 vlan_item_dst->type = (enum rte_flow_item_type)
5373                                                 MLX5_RTE_FLOW_ITEM_TYPE_VLAN;
5374                         }
5375                         break;
5376                 case RTE_FLOW_ACTION_TYPE_COUNT:
5377                         if (fm->def_policy)
5378                                 action_cur = after_meter ?
5379                                                 actions_sfx++ : actions_pre++;
5380                         break;
5381                 default:
5382                         break;
5383                 }
5384                 if (!action_cur)
5385                         action_cur = (fm->def_policy) ?
5386                                         actions_sfx++ : actions_pre++;
5387                 memcpy(action_cur, actions, sizeof(struct rte_flow_action));
5388         }
5389         /* Add end action to the actions. */
5390         actions_sfx->type = RTE_FLOW_ACTION_TYPE_END;
5391         if (priv->sh->meter_aso_en) {
5392                 /**
5393                  * For ASO meter, need to add an extra jump action explicitly,
5394                  * to jump from meter to policer table.
5395                  */
5396                 struct mlx5_flow_meter_sub_policy *sub_policy;
5397                 struct mlx5_flow_tbl_data_entry *tbl_data;
5398
5399                 if (!fm->def_policy) {
5400                         sub_policy = get_meter_sub_policy(dev, flow, wks,
5401                                                           attr, orig_items,
5402                                                           error);
5403                         if (!sub_policy)
5404                                 return -rte_errno;
5405                 } else {
5406                         enum mlx5_meter_domain mtr_domain =
5407                         attr->transfer ? MLX5_MTR_DOMAIN_TRANSFER :
5408                                 (attr->egress ? MLX5_MTR_DOMAIN_EGRESS :
5409                                                 MLX5_MTR_DOMAIN_INGRESS);
5410
5411                         sub_policy =
5412                         &priv->sh->mtrmng->def_policy[mtr_domain]->sub_policy;
5413                 }
5414                 tbl_data = container_of(sub_policy->tbl_rsc,
5415                                         struct mlx5_flow_tbl_data_entry, tbl);
5416                 hw_mtr_action = actions_pre++;
5417                 hw_mtr_action->type = (enum rte_flow_action_type)
5418                                       MLX5_RTE_FLOW_ACTION_TYPE_JUMP;
5419                 hw_mtr_action->conf = tbl_data->jump.action;
5420         }
5421         actions_pre->type = RTE_FLOW_ACTION_TYPE_END;
5422         actions_pre++;
5423         if (!tag_action)
5424                 return rte_flow_error_set(error, ENOMEM,
5425                                           RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
5426                                           NULL, "No tag action space.");
5427         if (!mtr_flow_id) {
5428                 tag_action->type = RTE_FLOW_ACTION_TYPE_VOID;
5429                 goto exit;
5430         }
5431         /* Only default-policy Meter creates mtr flow id. */
5432         if (fm->def_policy) {
5433                 mlx5_ipool_malloc(fm->flow_ipool, &tag_id);
5434                 if (!tag_id)
5435                         return rte_flow_error_set(error, ENOMEM,
5436                                         RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
5437                                         "Failed to allocate meter flow id.");
5438                 flow_id = tag_id - 1;
5439                 flow_id_bits = (!flow_id) ? 1 :
5440                                 (MLX5_REG_BITS - __builtin_clz(flow_id));
5441                 if ((flow_id_bits + priv->sh->mtrmng->max_mtr_bits) >
5442                     mtr_reg_bits) {
5443                         mlx5_ipool_free(fm->flow_ipool, tag_id);
5444                         return rte_flow_error_set(error, EINVAL,
5445                                         RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
5446                                         "Meter flow id exceeds max limit.");
5447                 }
5448                 if (flow_id_bits > priv->sh->mtrmng->max_mtr_flow_bits)
5449                         priv->sh->mtrmng->max_mtr_flow_bits = flow_id_bits;
5450         }
5451         /* Build tag actions and items for meter_id/meter flow_id. */
5452         set_tag = (struct mlx5_rte_flow_action_set_tag *)actions_pre;
5453         tag_item_spec = (struct mlx5_rte_flow_item_tag *)sfx_items;
5454         tag_item_mask = tag_item_spec + 1;
5455         /* Both flow_id and meter_id share the same register. */
5456         *set_tag = (struct mlx5_rte_flow_action_set_tag) {
5457                 .id = (enum modify_reg)mlx5_flow_get_reg_id(dev, MLX5_MTR_ID,
5458                                                             0, error),
5459                 .offset = mtr_id_offset,
5460                 .length = mtr_reg_bits,
5461                 .data = flow->meter,
5462         };
5463         /*
5464          * The color Reg bits used by flow_id are growing from
5465          * msb to lsb, so must do bit reverse for flow_id val in RegC.
5466          */
5467         for (shift = 0; shift < flow_id_bits; shift++)
5468                 flow_id_reversed = (flow_id_reversed << 1) |
5469                                 ((flow_id >> shift) & 0x1);
5470         set_tag->data |=
5471                 flow_id_reversed << (mtr_reg_bits - flow_id_bits);
5472         tag_item_spec->id = set_tag->id;
5473         tag_item_spec->data = set_tag->data << mtr_id_offset;
5474         tag_item_mask->data = UINT32_MAX << mtr_id_offset;
5475         tag_action->type = (enum rte_flow_action_type)
5476                                 MLX5_RTE_FLOW_ACTION_TYPE_TAG;
5477         tag_action->conf = set_tag;
5478         tag_item->type = (enum rte_flow_item_type)
5479                                 MLX5_RTE_FLOW_ITEM_TYPE_TAG;
5480         tag_item->spec = tag_item_spec;
5481         tag_item->last = NULL;
5482         tag_item->mask = tag_item_mask;
5483 exit:
5484         if (mtr_flow_id)
5485                 *mtr_flow_id = tag_id;
5486         return 0;
5487 }
5488
5489 /**
5490  * Split action list having QUEUE/RSS for metadata register copy.
5491  *
5492  * Once Q/RSS action is detected in user's action list, the flow action
5493  * should be split in order to copy metadata registers, which will happen in
5494  * RX_CP_TBL like,
5495  *   - CQE->flow_tag := reg_c[1] (MARK)
5496  *   - CQE->flow_table_metadata (reg_b) := reg_c[0] (META)
5497  * The Q/RSS action will be performed on RX_ACT_TBL after passing by RX_CP_TBL.
5498  * This is because the last action of each flow must be a terminal action
5499  * (QUEUE, RSS or DROP).
5500  *
5501  * Flow ID must be allocated to identify actions in the RX_ACT_TBL and it is
5502  * stored and kept in the mlx5_flow structure per each sub_flow.
5503  *
5504  * The Q/RSS action is replaced with,
5505  *   - SET_TAG, setting the allocated flow ID to reg_c[2].
5506  * And the following JUMP action is added at the end,
5507  *   - JUMP, to RX_CP_TBL.
5508  *
5509  * A flow to perform remained Q/RSS action will be created in RX_ACT_TBL by
5510  * flow_create_split_metadata() routine. The flow will look like,
5511  *   - If flow ID matches (reg_c[2]), perform Q/RSS.
5512  *
5513  * @param dev
5514  *   Pointer to Ethernet device.
5515  * @param[out] split_actions
5516  *   Pointer to store split actions to jump to CP_TBL.
5517  * @param[in] actions
5518  *   Pointer to the list of original flow actions.
5519  * @param[in] qrss
5520  *   Pointer to the Q/RSS action.
5521  * @param[in] actions_n
5522  *   Number of original actions.
5523  * @param[in] mtr_sfx
5524  *   Check if it is in meter suffix table.
5525  * @param[out] error
5526  *   Perform verbose error reporting if not NULL.
5527  *
5528  * @return
5529  *   non-zero unique flow_id on success, otherwise 0 and
5530  *   error/rte_error are set.
5531  */
5532 static uint32_t
5533 flow_mreg_split_qrss_prep(struct rte_eth_dev *dev,
5534                           struct rte_flow_action *split_actions,
5535                           const struct rte_flow_action *actions,
5536                           const struct rte_flow_action *qrss,
5537                           int actions_n, int mtr_sfx,
5538                           struct rte_flow_error *error)
5539 {
5540         struct mlx5_priv *priv = dev->data->dev_private;
5541         struct mlx5_rte_flow_action_set_tag *set_tag;
5542         struct rte_flow_action_jump *jump;
5543         const int qrss_idx = qrss - actions;
5544         uint32_t flow_id = 0;
5545         int ret = 0;
5546
5547         /*
5548          * Given actions will be split
5549          * - Replace QUEUE/RSS action with SET_TAG to set flow ID.
5550          * - Add jump to mreg CP_TBL.
5551          * As a result, there will be one more action.
5552          */
5553         memcpy(split_actions, actions, sizeof(*split_actions) * actions_n);
5554         /* Count MLX5_RTE_FLOW_ACTION_TYPE_TAG. */
5555         ++actions_n;
5556         set_tag = (void *)(split_actions + actions_n);
5557         /*
5558          * If we are not the meter suffix flow, add the tag action.
5559          * Since meter suffix flow already has the tag added.
5560          */
5561         if (!mtr_sfx) {
5562                 /*
5563                  * Allocate the new subflow ID. This one is unique within
5564                  * device and not shared with representors. Otherwise,
5565                  * we would have to resolve multi-thread access synch
5566                  * issue. Each flow on the shared device is appended
5567                  * with source vport identifier, so the resulting
5568                  * flows will be unique in the shared (by master and
5569                  * representors) domain even if they have coinciding
5570                  * IDs.
5571                  */
5572                 mlx5_ipool_malloc(priv->sh->ipool
5573                                   [MLX5_IPOOL_RSS_EXPANTION_FLOW_ID], &flow_id);
5574                 if (!flow_id)
5575                         return rte_flow_error_set(error, ENOMEM,
5576                                                   RTE_FLOW_ERROR_TYPE_ACTION,
5577                                                   NULL, "can't allocate id "
5578                                                   "for split Q/RSS subflow");
5579                 /* Internal SET_TAG action to set flow ID. */
5580                 *set_tag = (struct mlx5_rte_flow_action_set_tag){
5581                         .data = flow_id,
5582                 };
5583                 ret = mlx5_flow_get_reg_id(dev, MLX5_COPY_MARK, 0, error);
5584                 if (ret < 0)
5585                         return ret;
5586                 set_tag->id = ret;
5587                 /* Construct new actions array. */
5588                 /* Replace QUEUE/RSS action. */
5589                 split_actions[qrss_idx] = (struct rte_flow_action){
5590                         .type = (enum rte_flow_action_type)
5591                                 MLX5_RTE_FLOW_ACTION_TYPE_TAG,
5592                         .conf = set_tag,
5593                 };
5594         } else {
5595                 /*
5596                  * If we are the suffix flow of meter, tag already exist.
5597                  * Set the QUEUE/RSS action to void.
5598                  */
5599                 split_actions[qrss_idx].type = RTE_FLOW_ACTION_TYPE_VOID;
5600         }
5601         /* JUMP action to jump to mreg copy table (CP_TBL). */
5602         jump = (void *)(set_tag + 1);
5603         *jump = (struct rte_flow_action_jump){
5604                 .group = MLX5_FLOW_MREG_CP_TABLE_GROUP,
5605         };
5606         split_actions[actions_n - 2] = (struct rte_flow_action){
5607                 .type = RTE_FLOW_ACTION_TYPE_JUMP,
5608                 .conf = jump,
5609         };
5610         split_actions[actions_n - 1] = (struct rte_flow_action){
5611                 .type = RTE_FLOW_ACTION_TYPE_END,
5612         };
5613         return flow_id;
5614 }
5615
5616 /**
5617  * Extend the given action list for Tx metadata copy.
5618  *
5619  * Copy the given action list to the ext_actions and add flow metadata register
5620  * copy action in order to copy reg_a set by WQE to reg_c[0].
5621  *
5622  * @param[out] ext_actions
5623  *   Pointer to the extended action list.
5624  * @param[in] actions
5625  *   Pointer to the list of actions.
5626  * @param[in] actions_n
5627  *   Number of actions in the list.
5628  * @param[out] error
5629  *   Perform verbose error reporting if not NULL.
5630  * @param[in] encap_idx
5631  *   The encap action index.
5632  *
5633  * @return
5634  *   0 on success, negative value otherwise
5635  */
5636 static int
5637 flow_mreg_tx_copy_prep(struct rte_eth_dev *dev,
5638                        struct rte_flow_action *ext_actions,
5639                        const struct rte_flow_action *actions,
5640                        int actions_n, struct rte_flow_error *error,
5641                        int encap_idx)
5642 {
5643         struct mlx5_flow_action_copy_mreg *cp_mreg =
5644                 (struct mlx5_flow_action_copy_mreg *)
5645                         (ext_actions + actions_n + 1);
5646         int ret;
5647
5648         ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_RX, 0, error);
5649         if (ret < 0)
5650                 return ret;
5651         cp_mreg->dst = ret;
5652         ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_TX, 0, error);
5653         if (ret < 0)
5654                 return ret;
5655         cp_mreg->src = ret;
5656         if (encap_idx != 0)
5657                 memcpy(ext_actions, actions, sizeof(*ext_actions) * encap_idx);
5658         if (encap_idx == actions_n - 1) {
5659                 ext_actions[actions_n - 1] = (struct rte_flow_action){
5660                         .type = (enum rte_flow_action_type)
5661                                 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
5662                         .conf = cp_mreg,
5663                 };
5664                 ext_actions[actions_n] = (struct rte_flow_action){
5665                         .type = RTE_FLOW_ACTION_TYPE_END,
5666                 };
5667         } else {
5668                 ext_actions[encap_idx] = (struct rte_flow_action){
5669                         .type = (enum rte_flow_action_type)
5670                                 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
5671                         .conf = cp_mreg,
5672                 };
5673                 memcpy(ext_actions + encap_idx + 1, actions + encap_idx,
5674                                 sizeof(*ext_actions) * (actions_n - encap_idx));
5675         }
5676         return 0;
5677 }
5678
5679 /**
5680  * Check the match action from the action list.
5681  *
5682  * @param[in] actions
5683  *   Pointer to the list of actions.
5684  * @param[in] attr
5685  *   Flow rule attributes.
5686  * @param[in] action
5687  *   The action to be check if exist.
5688  * @param[out] match_action_pos
5689  *   Pointer to the position of the matched action if exists, otherwise is -1.
5690  * @param[out] qrss_action_pos
5691  *   Pointer to the position of the Queue/RSS action if exists, otherwise is -1.
5692  * @param[out] modify_after_mirror
5693  *   Pointer to the flag of modify action after FDB mirroring.
5694  *
5695  * @return
5696  *   > 0 the total number of actions.
5697  *   0 if not found match action in action list.
5698  */
5699 static int
5700 flow_check_match_action(const struct rte_flow_action actions[],
5701                         const struct rte_flow_attr *attr,
5702                         enum rte_flow_action_type action,
5703                         int *match_action_pos, int *qrss_action_pos,
5704                         int *modify_after_mirror)
5705 {
5706         const struct rte_flow_action_sample *sample;
5707         const struct rte_flow_action_raw_decap *decap;
5708         int actions_n = 0;
5709         uint32_t ratio = 0;
5710         int sub_type = 0;
5711         int flag = 0;
5712         int fdb_mirror = 0;
5713
5714         *match_action_pos = -1;
5715         *qrss_action_pos = -1;
5716         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
5717                 if (actions->type == action) {
5718                         flag = 1;
5719                         *match_action_pos = actions_n;
5720                 }
5721                 switch (actions->type) {
5722                 case RTE_FLOW_ACTION_TYPE_QUEUE:
5723                 case RTE_FLOW_ACTION_TYPE_RSS:
5724                         *qrss_action_pos = actions_n;
5725                         break;
5726                 case RTE_FLOW_ACTION_TYPE_SAMPLE:
5727                         sample = actions->conf;
5728                         ratio = sample->ratio;
5729                         sub_type = ((const struct rte_flow_action *)
5730                                         (sample->actions))->type;
5731                         if (ratio == 1 && attr->transfer)
5732                                 fdb_mirror = 1;
5733                         break;
5734                 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
5735                 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
5736                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
5737                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
5738                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
5739                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
5740                 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
5741                 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
5742                 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
5743                 case RTE_FLOW_ACTION_TYPE_SET_TTL:
5744                 case RTE_FLOW_ACTION_TYPE_INC_TCP_SEQ:
5745                 case RTE_FLOW_ACTION_TYPE_DEC_TCP_SEQ:
5746                 case RTE_FLOW_ACTION_TYPE_INC_TCP_ACK:
5747                 case RTE_FLOW_ACTION_TYPE_DEC_TCP_ACK:
5748                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DSCP:
5749                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DSCP:
5750                 case RTE_FLOW_ACTION_TYPE_FLAG:
5751                 case RTE_FLOW_ACTION_TYPE_MARK:
5752                 case RTE_FLOW_ACTION_TYPE_SET_META:
5753                 case RTE_FLOW_ACTION_TYPE_SET_TAG:
5754                 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
5755                 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
5756                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
5757                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
5758                 case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP:
5759                 case RTE_FLOW_ACTION_TYPE_NVGRE_DECAP:
5760                 case RTE_FLOW_ACTION_TYPE_MODIFY_FIELD:
5761                 case RTE_FLOW_ACTION_TYPE_METER:
5762                         if (fdb_mirror)
5763                                 *modify_after_mirror = 1;
5764                         break;
5765                 case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
5766                         decap = actions->conf;
5767                         while ((++actions)->type == RTE_FLOW_ACTION_TYPE_VOID)
5768                                 ;
5769                         actions_n++;
5770                         if (actions->type == RTE_FLOW_ACTION_TYPE_RAW_ENCAP) {
5771                                 const struct rte_flow_action_raw_encap *encap =
5772                                                                 actions->conf;
5773                                 if (decap->size <=
5774                                         MLX5_ENCAPSULATION_DECISION_SIZE &&
5775                                     encap->size >
5776                                         MLX5_ENCAPSULATION_DECISION_SIZE)
5777                                         /* L3 encap. */
5778                                         break;
5779                         }
5780                         if (fdb_mirror)
5781                                 *modify_after_mirror = 1;
5782                         break;
5783                 default:
5784                         break;
5785                 }
5786                 actions_n++;
5787         }
5788         if (flag && fdb_mirror && !*modify_after_mirror) {
5789                 /* FDB mirroring uses the destination array to implement
5790                  * instead of FLOW_SAMPLER object.
5791                  */
5792                 if (sub_type != RTE_FLOW_ACTION_TYPE_END)
5793                         flag = 0;
5794         }
5795         /* Count RTE_FLOW_ACTION_TYPE_END. */
5796         return flag ? actions_n + 1 : 0;
5797 }
5798
5799 #define SAMPLE_SUFFIX_ITEM 3
5800
5801 /**
5802  * Split the sample flow.
5803  *
5804  * As sample flow will split to two sub flow, sample flow with
5805  * sample action, the other actions will move to new suffix flow.
5806  *
5807  * Also add unique tag id with tag action in the sample flow,
5808  * the same tag id will be as match in the suffix flow.
5809  *
5810  * @param dev
5811  *   Pointer to Ethernet device.
5812  * @param[in] add_tag
5813  *   Add extra tag action flag.
5814  * @param[out] sfx_items
5815  *   Suffix flow match items (list terminated by the END pattern item).
5816  * @param[in] actions
5817  *   Associated actions (list terminated by the END action).
5818  * @param[out] actions_sfx
5819  *   Suffix flow actions.
5820  * @param[out] actions_pre
5821  *   Prefix flow actions.
5822  * @param[in] actions_n
5823  *  The total number of actions.
5824  * @param[in] sample_action_pos
5825  *   The sample action position.
5826  * @param[in] qrss_action_pos
5827  *   The Queue/RSS action position.
5828  * @param[in] jump_table
5829  *   Add extra jump action flag.
5830  * @param[out] error
5831  *   Perform verbose error reporting if not NULL.
5832  *
5833  * @return
5834  *   0 on success, or unique flow_id, a negative errno value
5835  *   otherwise and rte_errno is set.
5836  */
5837 static int
5838 flow_sample_split_prep(struct rte_eth_dev *dev,
5839                        int add_tag,
5840                        const struct rte_flow_item items[],
5841                        struct rte_flow_item sfx_items[],
5842                        const struct rte_flow_action actions[],
5843                        struct rte_flow_action actions_sfx[],
5844                        struct rte_flow_action actions_pre[],
5845                        int actions_n,
5846                        int sample_action_pos,
5847                        int qrss_action_pos,
5848                        int jump_table,
5849                        struct rte_flow_error *error)
5850 {
5851         struct mlx5_priv *priv = dev->data->dev_private;
5852         struct mlx5_rte_flow_action_set_tag *set_tag;
5853         struct mlx5_rte_flow_item_tag *tag_spec;
5854         struct mlx5_rte_flow_item_tag *tag_mask;
5855         struct rte_flow_action_jump *jump_action;
5856         uint32_t tag_id = 0;
5857         int append_index = 0;
5858         int set_tag_idx = -1;
5859         int index;
5860         int ret;
5861
5862         if (sample_action_pos < 0)
5863                 return rte_flow_error_set(error, EINVAL,
5864                                           RTE_FLOW_ERROR_TYPE_ACTION,
5865                                           NULL, "invalid position of sample "
5866                                           "action in list");
5867         /* Prepare the actions for prefix and suffix flow. */
5868         if (add_tag) {
5869                 /* Update the new added tag action index preceding
5870                  * the PUSH_VLAN or ENCAP action.
5871                  */
5872                 const struct rte_flow_action_raw_encap *raw_encap;
5873                 const struct rte_flow_action *action = actions;
5874                 int encap_idx;
5875                 int action_idx = 0;
5876                 int raw_decap_idx = -1;
5877                 int push_vlan_idx = -1;
5878                 for (; action->type != RTE_FLOW_ACTION_TYPE_END; action++) {
5879                         switch (action->type) {
5880                         case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
5881                                 raw_decap_idx = action_idx;
5882                                 break;
5883                         case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
5884                                 raw_encap = action->conf;
5885                                 if (raw_encap->size >
5886                                         MLX5_ENCAPSULATION_DECISION_SIZE) {
5887                                         encap_idx = raw_decap_idx != -1 ?
5888                                                     raw_decap_idx : action_idx;
5889                                         if (encap_idx < sample_action_pos &&
5890                                             push_vlan_idx == -1)
5891                                                 set_tag_idx = encap_idx;
5892                                 }
5893                                 break;
5894                         case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
5895                         case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
5896                                 encap_idx = action_idx;
5897                                 if (encap_idx < sample_action_pos &&
5898                                     push_vlan_idx == -1)
5899                                         set_tag_idx = encap_idx;
5900                                 break;
5901                         case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
5902                         case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
5903                                 push_vlan_idx = action_idx;
5904                                 if (push_vlan_idx < sample_action_pos)
5905                                         set_tag_idx = action_idx;
5906                                 break;
5907                         default:
5908                                 break;
5909                         }
5910                         action_idx++;
5911                 }
5912         }
5913         /* Prepare the actions for prefix and suffix flow. */
5914         if (qrss_action_pos >= 0 && qrss_action_pos < sample_action_pos) {
5915                 index = qrss_action_pos;
5916                 /* Put the preceding the Queue/RSS action into prefix flow. */
5917                 if (index != 0)
5918                         memcpy(actions_pre, actions,
5919                                sizeof(struct rte_flow_action) * index);
5920                 /* Put others preceding the sample action into prefix flow. */
5921                 if (sample_action_pos > index + 1)
5922                         memcpy(actions_pre + index, actions + index + 1,
5923                                sizeof(struct rte_flow_action) *
5924                                (sample_action_pos - index - 1));
5925                 index = sample_action_pos - 1;
5926                 /* Put Queue/RSS action into Suffix flow. */
5927                 memcpy(actions_sfx, actions + qrss_action_pos,
5928                        sizeof(struct rte_flow_action));
5929                 actions_sfx++;
5930         } else if (add_tag && set_tag_idx >= 0) {
5931                 if (set_tag_idx > 0)
5932                         memcpy(actions_pre, actions,
5933                                sizeof(struct rte_flow_action) * set_tag_idx);
5934                 memcpy(actions_pre + set_tag_idx + 1, actions + set_tag_idx,
5935                        sizeof(struct rte_flow_action) *
5936                        (sample_action_pos - set_tag_idx));
5937                 index = sample_action_pos;
5938         } else {
5939                 index = sample_action_pos;
5940                 if (index != 0)
5941                         memcpy(actions_pre, actions,
5942                                sizeof(struct rte_flow_action) * index);
5943         }
5944         /* For CX5, add an extra tag action for NIC-RX and E-Switch ingress.
5945          * For CX6DX and above, metadata registers Cx preserve their value,
5946          * add an extra tag action for NIC-RX and E-Switch Domain.
5947          */
5948         if (add_tag) {
5949                 /* Prepare the prefix tag action. */
5950                 append_index++;
5951                 set_tag = (void *)(actions_pre + actions_n + append_index);
5952                 ret = mlx5_flow_get_reg_id(dev, MLX5_SAMPLE_ID, 0, error);
5953                 /* Trust VF/SF on CX5 not supported meter so that the reserved
5954                  * metadata regC is REG_NON, back to use application tag
5955                  * index 0.
5956                  */
5957                 if (unlikely(ret == REG_NON))
5958                         ret = mlx5_flow_get_reg_id(dev, MLX5_APP_TAG, 0, error);
5959                 if (ret < 0)
5960                         return ret;
5961                 mlx5_ipool_malloc(priv->sh->ipool
5962                                   [MLX5_IPOOL_RSS_EXPANTION_FLOW_ID], &tag_id);
5963                 *set_tag = (struct mlx5_rte_flow_action_set_tag) {
5964                         .id = ret,
5965                         .data = tag_id,
5966                 };
5967                 /* Prepare the suffix subflow items. */
5968                 for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
5969                         if (items->type == RTE_FLOW_ITEM_TYPE_PORT_ID) {
5970                                 memcpy(sfx_items, items, sizeof(*sfx_items));
5971                                 sfx_items++;
5972                         }
5973                 }
5974                 tag_spec = (void *)(sfx_items + SAMPLE_SUFFIX_ITEM);
5975                 tag_spec->data = tag_id;
5976                 tag_spec->id = set_tag->id;
5977                 tag_mask = tag_spec + 1;
5978                 tag_mask->data = UINT32_MAX;
5979                 sfx_items[0] = (struct rte_flow_item){
5980                         .type = (enum rte_flow_item_type)
5981                                 MLX5_RTE_FLOW_ITEM_TYPE_TAG,
5982                         .spec = tag_spec,
5983                         .last = NULL,
5984                         .mask = tag_mask,
5985                 };
5986                 sfx_items[1] = (struct rte_flow_item){
5987                         .type = (enum rte_flow_item_type)
5988                                 RTE_FLOW_ITEM_TYPE_END,
5989                 };
5990                 /* Prepare the tag action in prefix subflow. */
5991                 set_tag_idx = (set_tag_idx == -1) ? index : set_tag_idx;
5992                 actions_pre[set_tag_idx] =
5993                         (struct rte_flow_action){
5994                         .type = (enum rte_flow_action_type)
5995                                 MLX5_RTE_FLOW_ACTION_TYPE_TAG,
5996                         .conf = set_tag,
5997                 };
5998                 /* Update next sample position due to add one tag action */
5999                 index += 1;
6000         }
6001         /* Copy the sample action into prefix flow. */
6002         memcpy(actions_pre + index, actions + sample_action_pos,
6003                sizeof(struct rte_flow_action));
6004         index += 1;
6005         /* For the modify action after the sample action in E-Switch mirroring,
6006          * Add the extra jump action in prefix subflow and jump into the next
6007          * table, then do the modify action in the new table.
6008          */
6009         if (jump_table) {
6010                 /* Prepare the prefix jump action. */
6011                 append_index++;
6012                 jump_action = (void *)(actions_pre + actions_n + append_index);
6013                 jump_action->group = jump_table;
6014                 actions_pre[index++] =
6015                         (struct rte_flow_action){
6016                         .type = (enum rte_flow_action_type)
6017                                 RTE_FLOW_ACTION_TYPE_JUMP,
6018                         .conf = jump_action,
6019                 };
6020         }
6021         actions_pre[index] = (struct rte_flow_action){
6022                 .type = (enum rte_flow_action_type)
6023                         RTE_FLOW_ACTION_TYPE_END,
6024         };
6025         /* Put the actions after sample into Suffix flow. */
6026         memcpy(actions_sfx, actions + sample_action_pos + 1,
6027                sizeof(struct rte_flow_action) *
6028                (actions_n - sample_action_pos - 1));
6029         return tag_id;
6030 }
6031
6032 /**
6033  * The splitting for metadata feature.
6034  *
6035  * - Q/RSS action on NIC Rx should be split in order to pass by
6036  *   the mreg copy table (RX_CP_TBL) and then it jumps to the
6037  *   action table (RX_ACT_TBL) which has the split Q/RSS action.
6038  *
6039  * - All the actions on NIC Tx should have a mreg copy action to
6040  *   copy reg_a from WQE to reg_c[0].
6041  *
6042  * @param dev
6043  *   Pointer to Ethernet device.
6044  * @param[in] flow
6045  *   Parent flow structure pointer.
6046  * @param[in] attr
6047  *   Flow rule attributes.
6048  * @param[in] items
6049  *   Pattern specification (list terminated by the END pattern item).
6050  * @param[in] actions
6051  *   Associated actions (list terminated by the END action).
6052  * @param[in] flow_split_info
6053  *   Pointer to flow split info structure.
6054  * @param[out] error
6055  *   Perform verbose error reporting if not NULL.
6056  * @return
6057  *   0 on success, negative value otherwise
6058  */
6059 static int
6060 flow_create_split_metadata(struct rte_eth_dev *dev,
6061                            struct rte_flow *flow,
6062                            const struct rte_flow_attr *attr,
6063                            const struct rte_flow_item items[],
6064                            const struct rte_flow_action actions[],
6065                            struct mlx5_flow_split_info *flow_split_info,
6066                            struct rte_flow_error *error)
6067 {
6068         struct mlx5_priv *priv = dev->data->dev_private;
6069         struct mlx5_sh_config *config = &priv->sh->config;
6070         const struct rte_flow_action *qrss = NULL;
6071         struct rte_flow_action *ext_actions = NULL;
6072         struct mlx5_flow *dev_flow = NULL;
6073         uint32_t qrss_id = 0;
6074         int mtr_sfx = 0;
6075         size_t act_size;
6076         int actions_n;
6077         int encap_idx;
6078         int ret;
6079
6080         /* Check whether extensive metadata feature is engaged. */
6081         if (!config->dv_flow_en ||
6082             config->dv_xmeta_en == MLX5_XMETA_MODE_LEGACY ||
6083             !mlx5_flow_ext_mreg_supported(dev))
6084                 return flow_create_split_inner(dev, flow, NULL, attr, items,
6085                                                actions, flow_split_info, error);
6086         actions_n = flow_parse_metadata_split_actions_info(actions, &qrss,
6087                                                            &encap_idx);
6088         if (qrss) {
6089                 /* Exclude hairpin flows from splitting. */
6090                 if (qrss->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
6091                         const struct rte_flow_action_queue *queue;
6092
6093                         queue = qrss->conf;
6094                         if (mlx5_rxq_is_hairpin(dev, queue->index))
6095                                 qrss = NULL;
6096                 } else if (qrss->type == RTE_FLOW_ACTION_TYPE_RSS) {
6097                         const struct rte_flow_action_rss *rss;
6098
6099                         rss = qrss->conf;
6100                         if (mlx5_rxq_is_hairpin(dev, rss->queue[0]))
6101                                 qrss = NULL;
6102                 }
6103         }
6104         if (qrss) {
6105                 /* Check if it is in meter suffix table. */
6106                 mtr_sfx = attr->group == (attr->transfer ?
6107                           (MLX5_FLOW_TABLE_LEVEL_METER - 1) :
6108                           MLX5_FLOW_TABLE_LEVEL_METER);
6109                 /*
6110                  * Q/RSS action on NIC Rx should be split in order to pass by
6111                  * the mreg copy table (RX_CP_TBL) and then it jumps to the
6112                  * action table (RX_ACT_TBL) which has the split Q/RSS action.
6113                  */
6114                 act_size = sizeof(struct rte_flow_action) * (actions_n + 1) +
6115                            sizeof(struct rte_flow_action_set_tag) +
6116                            sizeof(struct rte_flow_action_jump);
6117                 ext_actions = mlx5_malloc(MLX5_MEM_ZERO, act_size, 0,
6118                                           SOCKET_ID_ANY);
6119                 if (!ext_actions)
6120                         return rte_flow_error_set(error, ENOMEM,
6121                                                   RTE_FLOW_ERROR_TYPE_ACTION,
6122                                                   NULL, "no memory to split "
6123                                                   "metadata flow");
6124                 /*
6125                  * Create the new actions list with removed Q/RSS action
6126                  * and appended set tag and jump to register copy table
6127                  * (RX_CP_TBL). We should preallocate unique tag ID here
6128                  * in advance, because it is needed for set tag action.
6129                  */
6130                 qrss_id = flow_mreg_split_qrss_prep(dev, ext_actions, actions,
6131                                                     qrss, actions_n,
6132                                                     mtr_sfx, error);
6133                 if (!mtr_sfx && !qrss_id) {
6134                         ret = -rte_errno;
6135                         goto exit;
6136                 }
6137         } else if (attr->egress && !attr->transfer) {
6138                 /*
6139                  * All the actions on NIC Tx should have a metadata register
6140                  * copy action to copy reg_a from WQE to reg_c[meta]
6141                  */
6142                 act_size = sizeof(struct rte_flow_action) * (actions_n + 1) +
6143                            sizeof(struct mlx5_flow_action_copy_mreg);
6144                 ext_actions = mlx5_malloc(MLX5_MEM_ZERO, act_size, 0,
6145                                           SOCKET_ID_ANY);
6146                 if (!ext_actions)
6147                         return rte_flow_error_set(error, ENOMEM,
6148                                                   RTE_FLOW_ERROR_TYPE_ACTION,
6149                                                   NULL, "no memory to split "
6150                                                   "metadata flow");
6151                 /* Create the action list appended with copy register. */
6152                 ret = flow_mreg_tx_copy_prep(dev, ext_actions, actions,
6153                                              actions_n, error, encap_idx);
6154                 if (ret < 0)
6155                         goto exit;
6156         }
6157         /* Add the unmodified original or prefix subflow. */
6158         ret = flow_create_split_inner(dev, flow, &dev_flow, attr,
6159                                       items, ext_actions ? ext_actions :
6160                                       actions, flow_split_info, error);
6161         if (ret < 0)
6162                 goto exit;
6163         MLX5_ASSERT(dev_flow);
6164         if (qrss) {
6165                 const struct rte_flow_attr q_attr = {
6166                         .group = MLX5_FLOW_MREG_ACT_TABLE_GROUP,
6167                         .ingress = 1,
6168                 };
6169                 /* Internal PMD action to set register. */
6170                 struct mlx5_rte_flow_item_tag q_tag_spec = {
6171                         .data = qrss_id,
6172                         .id = REG_NON,
6173                 };
6174                 struct rte_flow_item q_items[] = {
6175                         {
6176                                 .type = (enum rte_flow_item_type)
6177                                         MLX5_RTE_FLOW_ITEM_TYPE_TAG,
6178                                 .spec = &q_tag_spec,
6179                                 .last = NULL,
6180                                 .mask = NULL,
6181                         },
6182                         {
6183                                 .type = RTE_FLOW_ITEM_TYPE_END,
6184                         },
6185                 };
6186                 struct rte_flow_action q_actions[] = {
6187                         {
6188                                 .type = qrss->type,
6189                                 .conf = qrss->conf,
6190                         },
6191                         {
6192                                 .type = RTE_FLOW_ACTION_TYPE_END,
6193                         },
6194                 };
6195                 uint64_t layers = flow_get_prefix_layer_flags(dev_flow);
6196
6197                 /*
6198                  * Configure the tag item only if there is no meter subflow.
6199                  * Since tag is already marked in the meter suffix subflow
6200                  * we can just use the meter suffix items as is.
6201                  */
6202                 if (qrss_id) {
6203                         /* Not meter subflow. */
6204                         MLX5_ASSERT(!mtr_sfx);
6205                         /*
6206                          * Put unique id in prefix flow due to it is destroyed
6207                          * after suffix flow and id will be freed after there
6208                          * is no actual flows with this id and identifier
6209                          * reallocation becomes possible (for example, for
6210                          * other flows in other threads).
6211                          */
6212                         dev_flow->handle->split_flow_id = qrss_id;
6213                         ret = mlx5_flow_get_reg_id(dev, MLX5_COPY_MARK, 0,
6214                                                    error);
6215                         if (ret < 0)
6216                                 goto exit;
6217                         q_tag_spec.id = ret;
6218                 }
6219                 dev_flow = NULL;
6220                 /* Add suffix subflow to execute Q/RSS. */
6221                 flow_split_info->prefix_layers = layers;
6222                 flow_split_info->prefix_mark = 0;
6223                 flow_split_info->table_id = 0;
6224                 ret = flow_create_split_inner(dev, flow, &dev_flow,
6225                                               &q_attr, mtr_sfx ? items :
6226                                               q_items, q_actions,
6227                                               flow_split_info, error);
6228                 if (ret < 0)
6229                         goto exit;
6230                 /* qrss ID should be freed if failed. */
6231                 qrss_id = 0;
6232                 MLX5_ASSERT(dev_flow);
6233         }
6234
6235 exit:
6236         /*
6237          * We do not destroy the partially created sub_flows in case of error.
6238          * These ones are included into parent flow list and will be destroyed
6239          * by flow_drv_destroy.
6240          */
6241         mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_RSS_EXPANTION_FLOW_ID],
6242                         qrss_id);
6243         mlx5_free(ext_actions);
6244         return ret;
6245 }
6246
6247 /**
6248  * Create meter internal drop flow with the original pattern.
6249  *
6250  * @param dev
6251  *   Pointer to Ethernet device.
6252  * @param[in] flow
6253  *   Parent flow structure pointer.
6254  * @param[in] attr
6255  *   Flow rule attributes.
6256  * @param[in] items
6257  *   Pattern specification (list terminated by the END pattern item).
6258  * @param[in] flow_split_info
6259  *   Pointer to flow split info structure.
6260  * @param[in] fm
6261  *   Pointer to flow meter structure.
6262  * @param[out] error
6263  *   Perform verbose error reporting if not NULL.
6264  * @return
6265  *   0 on success, negative value otherwise
6266  */
6267 static uint32_t
6268 flow_meter_create_drop_flow_with_org_pattern(struct rte_eth_dev *dev,
6269                         struct rte_flow *flow,
6270                         const struct rte_flow_attr *attr,
6271                         const struct rte_flow_item items[],
6272                         struct mlx5_flow_split_info *flow_split_info,
6273                         struct mlx5_flow_meter_info *fm,
6274                         struct rte_flow_error *error)
6275 {
6276         struct mlx5_flow *dev_flow = NULL;
6277         struct rte_flow_attr drop_attr = *attr;
6278         struct rte_flow_action drop_actions[3];
6279         struct mlx5_flow_split_info drop_split_info = *flow_split_info;
6280
6281         MLX5_ASSERT(fm->drop_cnt);
6282         drop_actions[0].type =
6283                 (enum rte_flow_action_type)MLX5_RTE_FLOW_ACTION_TYPE_COUNT;
6284         drop_actions[0].conf = (void *)(uintptr_t)fm->drop_cnt;
6285         drop_actions[1].type = RTE_FLOW_ACTION_TYPE_DROP;
6286         drop_actions[1].conf = NULL;
6287         drop_actions[2].type = RTE_FLOW_ACTION_TYPE_END;
6288         drop_actions[2].conf = NULL;
6289         drop_split_info.external = false;
6290         drop_split_info.skip_scale |= 1 << MLX5_SCALE_FLOW_GROUP_BIT;
6291         drop_split_info.table_id = MLX5_MTR_TABLE_ID_DROP;
6292         drop_attr.group = MLX5_FLOW_TABLE_LEVEL_METER;
6293         return flow_create_split_inner(dev, flow, &dev_flow,
6294                                 &drop_attr, items, drop_actions,
6295                                 &drop_split_info, error);
6296 }
6297
6298 /**
6299  * The splitting for meter feature.
6300  *
6301  * - The meter flow will be split to two flows as prefix and
6302  *   suffix flow. The packets make sense only it pass the prefix
6303  *   meter action.
6304  *
6305  * - Reg_C_5 is used for the packet to match betweend prefix and
6306  *   suffix flow.
6307  *
6308  * @param dev
6309  *   Pointer to Ethernet device.
6310  * @param[in] flow
6311  *   Parent flow structure pointer.
6312  * @param[in] attr
6313  *   Flow rule attributes.
6314  * @param[in] items
6315  *   Pattern specification (list terminated by the END pattern item).
6316  * @param[in] actions
6317  *   Associated actions (list terminated by the END action).
6318  * @param[in] flow_split_info
6319  *   Pointer to flow split info structure.
6320  * @param[out] error
6321  *   Perform verbose error reporting if not NULL.
6322  * @return
6323  *   0 on success, negative value otherwise
6324  */
6325 static int
6326 flow_create_split_meter(struct rte_eth_dev *dev,
6327                         struct rte_flow *flow,
6328                         const struct rte_flow_attr *attr,
6329                         const struct rte_flow_item items[],
6330                         const struct rte_flow_action actions[],
6331                         struct mlx5_flow_split_info *flow_split_info,
6332                         struct rte_flow_error *error)
6333 {
6334         struct mlx5_priv *priv = dev->data->dev_private;
6335         struct mlx5_flow_workspace *wks = mlx5_flow_get_thread_workspace();
6336         struct rte_flow_action *sfx_actions = NULL;
6337         struct rte_flow_action *pre_actions = NULL;
6338         struct rte_flow_item *sfx_items = NULL;
6339         struct mlx5_flow *dev_flow = NULL;
6340         struct rte_flow_attr sfx_attr = *attr;
6341         struct mlx5_flow_meter_info *fm = NULL;
6342         uint8_t skip_scale_restore;
6343         bool has_mtr = false;
6344         bool has_modify = false;
6345         bool set_mtr_reg = true;
6346         bool is_mtr_hierarchy = false;
6347         uint32_t meter_id = 0;
6348         uint32_t mtr_idx = 0;
6349         uint32_t mtr_flow_id = 0;
6350         size_t act_size;
6351         size_t item_size;
6352         int actions_n = 0;
6353         int ret = 0;
6354
6355         if (priv->mtr_en)
6356                 actions_n = flow_check_meter_action(dev, actions, &has_mtr,
6357                                                     &has_modify, &meter_id);
6358         if (has_mtr) {
6359                 if (flow->meter) {
6360                         fm = flow_dv_meter_find_by_idx(priv, flow->meter);
6361                         if (!fm)
6362                                 return rte_flow_error_set(error, EINVAL,
6363                                                 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
6364                                                 NULL, "Meter not found.");
6365                 } else {
6366                         fm = mlx5_flow_meter_find(priv, meter_id, &mtr_idx);
6367                         if (!fm)
6368                                 return rte_flow_error_set(error, EINVAL,
6369                                                 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
6370                                                 NULL, "Meter not found.");
6371                         ret = mlx5_flow_meter_attach(priv, fm,
6372                                                      &sfx_attr, error);
6373                         if (ret)
6374                                 return -rte_errno;
6375                         flow->meter = mtr_idx;
6376                 }
6377                 MLX5_ASSERT(wks);
6378                 wks->fm = fm;
6379                 if (!fm->def_policy) {
6380                         wks->policy = mlx5_flow_meter_policy_find(dev,
6381                                                                   fm->policy_id,
6382                                                                   NULL);
6383                         MLX5_ASSERT(wks->policy);
6384                         if (wks->policy->mark)
6385                                 wks->mark = 1;
6386                         if (wks->policy->is_hierarchy) {
6387                                 wks->final_policy =
6388                                 mlx5_flow_meter_hierarchy_get_final_policy(dev,
6389                                                                 wks->policy);
6390                                 if (!wks->final_policy)
6391                                         return rte_flow_error_set(error,
6392                                         EINVAL,
6393                                         RTE_FLOW_ERROR_TYPE_ACTION, NULL,
6394                                 "Failed to find terminal policy of hierarchy.");
6395                                 is_mtr_hierarchy = true;
6396                         }
6397                 }
6398                 /*
6399                  * If it isn't default-policy Meter, and
6400                  * 1. There's no action in flow to change
6401                  *    packet (modify/encap/decap etc.), OR
6402                  * 2. No drop count needed for this meter.
6403                  * 3. It's not meter hierarchy.
6404                  * Then no need to use regC to save meter id anymore.
6405                  */
6406                 if (!fm->def_policy && !is_mtr_hierarchy &&
6407                     (!has_modify || !fm->drop_cnt))
6408                         set_mtr_reg = false;
6409                 /* Prefix actions: meter, decap, encap, tag, jump, end, cnt. */
6410 #define METER_PREFIX_ACTION 7
6411                 act_size = (sizeof(struct rte_flow_action) *
6412                             (actions_n + METER_PREFIX_ACTION)) +
6413                            sizeof(struct mlx5_rte_flow_action_set_tag);
6414                 /* Suffix items: tag, vlan, port id, end. */
6415 #define METER_SUFFIX_ITEM 4
6416                 item_size = sizeof(struct rte_flow_item) * METER_SUFFIX_ITEM +
6417                             sizeof(struct mlx5_rte_flow_item_tag) * 2;
6418                 sfx_actions = mlx5_malloc(MLX5_MEM_ZERO, (act_size + item_size),
6419                                           0, SOCKET_ID_ANY);
6420                 if (!sfx_actions)
6421                         return rte_flow_error_set(error, ENOMEM,
6422                                                   RTE_FLOW_ERROR_TYPE_ACTION,
6423                                                   NULL, "no memory to split "
6424                                                   "meter flow");
6425                 sfx_items = (struct rte_flow_item *)((char *)sfx_actions +
6426                              act_size);
6427                 /* There's no suffix flow for meter of non-default policy. */
6428                 if (!fm->def_policy)
6429                         pre_actions = sfx_actions + 1;
6430                 else
6431                         pre_actions = sfx_actions + actions_n;
6432                 ret = flow_meter_split_prep(dev, flow, wks, &sfx_attr,
6433                                             items, sfx_items, actions,
6434                                             sfx_actions, pre_actions,
6435                                             (set_mtr_reg ? &mtr_flow_id : NULL),
6436                                             error);
6437                 if (ret) {
6438                         ret = -rte_errno;
6439                         goto exit;
6440                 }
6441                 /* Add the prefix subflow. */
6442                 skip_scale_restore = flow_split_info->skip_scale;
6443                 flow_split_info->skip_scale |=
6444                         1 << MLX5_SCALE_JUMP_FLOW_GROUP_BIT;
6445                 ret = flow_create_split_inner(dev, flow, &dev_flow,
6446                                               attr, items, pre_actions,
6447                                               flow_split_info, error);
6448                 flow_split_info->skip_scale = skip_scale_restore;
6449                 if (ret) {
6450                         if (mtr_flow_id)
6451                                 mlx5_ipool_free(fm->flow_ipool, mtr_flow_id);
6452                         ret = -rte_errno;
6453                         goto exit;
6454                 }
6455                 if (mtr_flow_id) {
6456                         dev_flow->handle->split_flow_id = mtr_flow_id;
6457                         dev_flow->handle->is_meter_flow_id = 1;
6458                 }
6459                 if (!fm->def_policy) {
6460                         if (!set_mtr_reg && fm->drop_cnt)
6461                                 ret =
6462                         flow_meter_create_drop_flow_with_org_pattern(dev, flow,
6463                                                         &sfx_attr, items,
6464                                                         flow_split_info,
6465                                                         fm, error);
6466                         goto exit;
6467                 }
6468                 /* Setting the sfx group atrr. */
6469                 sfx_attr.group = sfx_attr.transfer ?
6470                                 (MLX5_FLOW_TABLE_LEVEL_METER - 1) :
6471                                  MLX5_FLOW_TABLE_LEVEL_METER;
6472                 flow_split_info->prefix_layers =
6473                                 flow_get_prefix_layer_flags(dev_flow);
6474                 flow_split_info->prefix_mark |= wks->mark;
6475                 flow_split_info->table_id = MLX5_MTR_TABLE_ID_SUFFIX;
6476         }
6477         /* Add the prefix subflow. */
6478         ret = flow_create_split_metadata(dev, flow,
6479                                          &sfx_attr, sfx_items ?
6480                                          sfx_items : items,
6481                                          sfx_actions ? sfx_actions : actions,
6482                                          flow_split_info, error);
6483 exit:
6484         if (sfx_actions)
6485                 mlx5_free(sfx_actions);
6486         return ret;
6487 }
6488
6489 /**
6490  * The splitting for sample feature.
6491  *
6492  * Once Sample action is detected in the action list, the flow actions should
6493  * be split into prefix sub flow and suffix sub flow.
6494  *
6495  * The original items remain in the prefix sub flow, all actions preceding the
6496  * sample action and the sample action itself will be copied to the prefix
6497  * sub flow, the actions following the sample action will be copied to the
6498  * suffix sub flow, Queue action always be located in the suffix sub flow.
6499  *
6500  * In order to make the packet from prefix sub flow matches with suffix sub
6501  * flow, an extra tag action be added into prefix sub flow, and the suffix sub
6502  * flow uses tag item with the unique flow id.
6503  *
6504  * @param dev
6505  *   Pointer to Ethernet device.
6506  * @param[in] flow
6507  *   Parent flow structure pointer.
6508  * @param[in] attr
6509  *   Flow rule attributes.
6510  * @param[in] items
6511  *   Pattern specification (list terminated by the END pattern item).
6512  * @param[in] actions
6513  *   Associated actions (list terminated by the END action).
6514  * @param[in] flow_split_info
6515  *   Pointer to flow split info structure.
6516  * @param[out] error
6517  *   Perform verbose error reporting if not NULL.
6518  * @return
6519  *   0 on success, negative value otherwise
6520  */
6521 static int
6522 flow_create_split_sample(struct rte_eth_dev *dev,
6523                          struct rte_flow *flow,
6524                          const struct rte_flow_attr *attr,
6525                          const struct rte_flow_item items[],
6526                          const struct rte_flow_action actions[],
6527                          struct mlx5_flow_split_info *flow_split_info,
6528                          struct rte_flow_error *error)
6529 {
6530         struct mlx5_priv *priv = dev->data->dev_private;
6531         struct rte_flow_action *sfx_actions = NULL;
6532         struct rte_flow_action *pre_actions = NULL;
6533         struct rte_flow_item *sfx_items = NULL;
6534         struct mlx5_flow *dev_flow = NULL;
6535         struct rte_flow_attr sfx_attr = *attr;
6536 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
6537         struct mlx5_flow_dv_sample_resource *sample_res;
6538         struct mlx5_flow_tbl_data_entry *sfx_tbl_data;
6539         struct mlx5_flow_tbl_resource *sfx_tbl;
6540         struct mlx5_flow_workspace *wks = mlx5_flow_get_thread_workspace();
6541 #endif
6542         size_t act_size;
6543         size_t item_size;
6544         uint32_t fdb_tx = 0;
6545         int32_t tag_id = 0;
6546         int actions_n = 0;
6547         int sample_action_pos;
6548         int qrss_action_pos;
6549         int add_tag = 0;
6550         int modify_after_mirror = 0;
6551         uint16_t jump_table = 0;
6552         const uint32_t next_ft_step = 1;
6553         int ret = 0;
6554
6555         if (priv->sampler_en)
6556                 actions_n = flow_check_match_action(actions, attr,
6557                                         RTE_FLOW_ACTION_TYPE_SAMPLE,
6558                                         &sample_action_pos, &qrss_action_pos,
6559                                         &modify_after_mirror);
6560         if (actions_n) {
6561                 /* The prefix actions must includes sample, tag, end. */
6562                 act_size = sizeof(struct rte_flow_action) * (actions_n * 2 + 1)
6563                            + sizeof(struct mlx5_rte_flow_action_set_tag);
6564                 item_size = sizeof(struct rte_flow_item) * SAMPLE_SUFFIX_ITEM +
6565                             sizeof(struct mlx5_rte_flow_item_tag) * 2;
6566                 sfx_actions = mlx5_malloc(MLX5_MEM_ZERO, (act_size +
6567                                           item_size), 0, SOCKET_ID_ANY);
6568                 if (!sfx_actions)
6569                         return rte_flow_error_set(error, ENOMEM,
6570                                                   RTE_FLOW_ERROR_TYPE_ACTION,
6571                                                   NULL, "no memory to split "
6572                                                   "sample flow");
6573                 /* The representor_id is UINT16_MAX for uplink. */
6574                 fdb_tx = (attr->transfer && priv->representor_id != UINT16_MAX);
6575                 /*
6576                  * When reg_c_preserve is set, metadata registers Cx preserve
6577                  * their value even through packet duplication.
6578                  */
6579                 add_tag = (!fdb_tx ||
6580                            priv->sh->cdev->config.hca_attr.reg_c_preserve);
6581                 if (add_tag)
6582                         sfx_items = (struct rte_flow_item *)((char *)sfx_actions
6583                                         + act_size);
6584                 if (modify_after_mirror)
6585                         jump_table = attr->group * MLX5_FLOW_TABLE_FACTOR +
6586                                      next_ft_step;
6587                 pre_actions = sfx_actions + actions_n;
6588                 tag_id = flow_sample_split_prep(dev, add_tag, items, sfx_items,
6589                                                 actions, sfx_actions,
6590                                                 pre_actions, actions_n,
6591                                                 sample_action_pos,
6592                                                 qrss_action_pos, jump_table,
6593                                                 error);
6594                 if (tag_id < 0 || (add_tag && !tag_id)) {
6595                         ret = -rte_errno;
6596                         goto exit;
6597                 }
6598                 if (modify_after_mirror)
6599                         flow_split_info->skip_scale =
6600                                         1 << MLX5_SCALE_JUMP_FLOW_GROUP_BIT;
6601                 /* Add the prefix subflow. */
6602                 ret = flow_create_split_inner(dev, flow, &dev_flow, attr,
6603                                               items, pre_actions,
6604                                               flow_split_info, error);
6605                 if (ret) {
6606                         ret = -rte_errno;
6607                         goto exit;
6608                 }
6609                 dev_flow->handle->split_flow_id = tag_id;
6610 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
6611                 if (!modify_after_mirror) {
6612                         /* Set the sfx group attr. */
6613                         sample_res = (struct mlx5_flow_dv_sample_resource *)
6614                                                 dev_flow->dv.sample_res;
6615                         sfx_tbl = (struct mlx5_flow_tbl_resource *)
6616                                                 sample_res->normal_path_tbl;
6617                         sfx_tbl_data = container_of(sfx_tbl,
6618                                                 struct mlx5_flow_tbl_data_entry,
6619                                                 tbl);
6620                         sfx_attr.group = sfx_attr.transfer ?
6621                         (sfx_tbl_data->level - 1) : sfx_tbl_data->level;
6622                 } else {
6623                         MLX5_ASSERT(attr->transfer);
6624                         sfx_attr.group = jump_table;
6625                 }
6626                 flow_split_info->prefix_layers =
6627                                 flow_get_prefix_layer_flags(dev_flow);
6628                 MLX5_ASSERT(wks);
6629                 flow_split_info->prefix_mark |= wks->mark;
6630                 /* Suffix group level already be scaled with factor, set
6631                  * MLX5_SCALE_FLOW_GROUP_BIT of skip_scale to 1 to avoid scale
6632                  * again in translation.
6633                  */
6634                 flow_split_info->skip_scale = 1 << MLX5_SCALE_FLOW_GROUP_BIT;
6635 #endif
6636         }
6637         /* Add the suffix subflow. */
6638         ret = flow_create_split_meter(dev, flow, &sfx_attr,
6639                                       sfx_items ? sfx_items : items,
6640                                       sfx_actions ? sfx_actions : actions,
6641                                       flow_split_info, error);
6642 exit:
6643         if (sfx_actions)
6644                 mlx5_free(sfx_actions);
6645         return ret;
6646 }
6647
6648 /**
6649  * Split the flow to subflow set. The splitters might be linked
6650  * in the chain, like this:
6651  * flow_create_split_outer() calls:
6652  *   flow_create_split_meter() calls:
6653  *     flow_create_split_metadata(meter_subflow_0) calls:
6654  *       flow_create_split_inner(metadata_subflow_0)
6655  *       flow_create_split_inner(metadata_subflow_1)
6656  *       flow_create_split_inner(metadata_subflow_2)
6657  *     flow_create_split_metadata(meter_subflow_1) calls:
6658  *       flow_create_split_inner(metadata_subflow_0)
6659  *       flow_create_split_inner(metadata_subflow_1)
6660  *       flow_create_split_inner(metadata_subflow_2)
6661  *
6662  * This provide flexible way to add new levels of flow splitting.
6663  * The all of successfully created subflows are included to the
6664  * parent flow dev_flow list.
6665  *
6666  * @param dev
6667  *   Pointer to Ethernet device.
6668  * @param[in] flow
6669  *   Parent flow structure pointer.
6670  * @param[in] attr
6671  *   Flow rule attributes.
6672  * @param[in] items
6673  *   Pattern specification (list terminated by the END pattern item).
6674  * @param[in] actions
6675  *   Associated actions (list terminated by the END action).
6676  * @param[in] flow_split_info
6677  *   Pointer to flow split info structure.
6678  * @param[out] error
6679  *   Perform verbose error reporting if not NULL.
6680  * @return
6681  *   0 on success, negative value otherwise
6682  */
6683 static int
6684 flow_create_split_outer(struct rte_eth_dev *dev,
6685                         struct rte_flow *flow,
6686                         const struct rte_flow_attr *attr,
6687                         const struct rte_flow_item items[],
6688                         const struct rte_flow_action actions[],
6689                         struct mlx5_flow_split_info *flow_split_info,
6690                         struct rte_flow_error *error)
6691 {
6692         int ret;
6693
6694         ret = flow_create_split_sample(dev, flow, attr, items,
6695                                        actions, flow_split_info, error);
6696         MLX5_ASSERT(ret <= 0);
6697         return ret;
6698 }
6699
6700 static inline struct mlx5_flow_tunnel *
6701 flow_tunnel_from_rule(const struct mlx5_flow *flow)
6702 {
6703         struct mlx5_flow_tunnel *tunnel;
6704
6705 #pragma GCC diagnostic push
6706 #pragma GCC diagnostic ignored "-Wcast-qual"
6707         tunnel = (typeof(tunnel))flow->tunnel;
6708 #pragma GCC diagnostic pop
6709
6710         return tunnel;
6711 }
6712
6713 /**
6714  * Adjust flow RSS workspace if needed.
6715  *
6716  * @param wks
6717  *   Pointer to thread flow work space.
6718  * @param rss_desc
6719  *   Pointer to RSS descriptor.
6720  * @param[in] nrssq_num
6721  *   New RSS queue number.
6722  *
6723  * @return
6724  *   0 on success, -1 otherwise and rte_errno is set.
6725  */
6726 static int
6727 flow_rss_workspace_adjust(struct mlx5_flow_workspace *wks,
6728                           struct mlx5_flow_rss_desc *rss_desc,
6729                           uint32_t nrssq_num)
6730 {
6731         if (likely(nrssq_num <= wks->rssq_num))
6732                 return 0;
6733         rss_desc->queue = realloc(rss_desc->queue,
6734                           sizeof(*rss_desc->queue) * RTE_ALIGN(nrssq_num, 2));
6735         if (!rss_desc->queue) {
6736                 rte_errno = ENOMEM;
6737                 return -1;
6738         }
6739         wks->rssq_num = RTE_ALIGN(nrssq_num, 2);
6740         return 0;
6741 }
6742
6743 /**
6744  * Create a flow and add it to @p list.
6745  *
6746  * @param dev
6747  *   Pointer to Ethernet device.
6748  * @param list
6749  *   Pointer to a TAILQ flow list. If this parameter NULL,
6750  *   no list insertion occurred, flow is just created,
6751  *   this is caller's responsibility to track the
6752  *   created flow.
6753  * @param[in] attr
6754  *   Flow rule attributes.
6755  * @param[in] items
6756  *   Pattern specification (list terminated by the END pattern item).
6757  * @param[in] actions
6758  *   Associated actions (list terminated by the END action).
6759  * @param[in] external
6760  *   This flow rule is created by request external to PMD.
6761  * @param[out] error
6762  *   Perform verbose error reporting if not NULL.
6763  *
6764  * @return
6765  *   A flow index on success, 0 otherwise and rte_errno is set.
6766  */
6767 static uint32_t
6768 flow_list_create(struct rte_eth_dev *dev, enum mlx5_flow_type type,
6769                  const struct rte_flow_attr *attr,
6770                  const struct rte_flow_item items[],
6771                  const struct rte_flow_action original_actions[],
6772                  bool external, struct rte_flow_error *error)
6773 {
6774         struct mlx5_priv *priv = dev->data->dev_private;
6775         struct rte_flow *flow = NULL;
6776         struct mlx5_flow *dev_flow;
6777         const struct rte_flow_action_rss *rss = NULL;
6778         struct mlx5_translated_action_handle
6779                 indir_actions[MLX5_MAX_INDIRECT_ACTIONS];
6780         int indir_actions_n = MLX5_MAX_INDIRECT_ACTIONS;
6781         union {
6782                 struct mlx5_flow_expand_rss buf;
6783                 uint8_t buffer[4096];
6784         } expand_buffer;
6785         union {
6786                 struct rte_flow_action actions[MLX5_MAX_SPLIT_ACTIONS];
6787                 uint8_t buffer[2048];
6788         } actions_rx;
6789         union {
6790                 struct rte_flow_action actions[MLX5_MAX_SPLIT_ACTIONS];
6791                 uint8_t buffer[2048];
6792         } actions_hairpin_tx;
6793         union {
6794                 struct rte_flow_item items[MLX5_MAX_SPLIT_ITEMS];
6795                 uint8_t buffer[2048];
6796         } items_tx;
6797         struct mlx5_flow_expand_rss *buf = &expand_buffer.buf;
6798         struct mlx5_flow_rss_desc *rss_desc;
6799         const struct rte_flow_action *p_actions_rx;
6800         uint32_t i;
6801         uint32_t idx = 0;
6802         int hairpin_flow;
6803         struct rte_flow_attr attr_tx = { .priority = 0 };
6804         const struct rte_flow_action *actions;
6805         struct rte_flow_action *translated_actions = NULL;
6806         struct mlx5_flow_tunnel *tunnel;
6807         struct tunnel_default_miss_ctx default_miss_ctx = { 0, };
6808         struct mlx5_flow_workspace *wks = mlx5_flow_push_thread_workspace();
6809         struct mlx5_flow_split_info flow_split_info = {
6810                 .external = !!external,
6811                 .skip_scale = 0,
6812                 .flow_idx = 0,
6813                 .prefix_mark = 0,
6814                 .prefix_layers = 0,
6815                 .table_id = 0
6816         };
6817         int ret;
6818
6819         MLX5_ASSERT(wks);
6820         rss_desc = &wks->rss_desc;
6821         ret = flow_action_handles_translate(dev, original_actions,
6822                                             indir_actions,
6823                                             &indir_actions_n,
6824                                             &translated_actions, error);
6825         if (ret < 0) {
6826                 MLX5_ASSERT(translated_actions == NULL);
6827                 return 0;
6828         }
6829         actions = translated_actions ? translated_actions : original_actions;
6830         p_actions_rx = actions;
6831         hairpin_flow = flow_check_hairpin_split(dev, attr, actions);
6832         ret = flow_drv_validate(dev, attr, items, p_actions_rx,
6833                                 external, hairpin_flow, error);
6834         if (ret < 0)
6835                 goto error_before_hairpin_split;
6836         flow = mlx5_ipool_zmalloc(priv->flows[type], &idx);
6837         if (!flow) {
6838                 rte_errno = ENOMEM;
6839                 goto error_before_hairpin_split;
6840         }
6841         if (hairpin_flow > 0) {
6842                 if (hairpin_flow > MLX5_MAX_SPLIT_ACTIONS) {
6843                         rte_errno = EINVAL;
6844                         goto error_before_hairpin_split;
6845                 }
6846                 flow_hairpin_split(dev, actions, actions_rx.actions,
6847                                    actions_hairpin_tx.actions, items_tx.items,
6848                                    idx);
6849                 p_actions_rx = actions_rx.actions;
6850         }
6851         flow_split_info.flow_idx = idx;
6852         flow->drv_type = flow_get_drv_type(dev, attr);
6853         MLX5_ASSERT(flow->drv_type > MLX5_FLOW_TYPE_MIN &&
6854                     flow->drv_type < MLX5_FLOW_TYPE_MAX);
6855         memset(rss_desc, 0, offsetof(struct mlx5_flow_rss_desc, queue));
6856         /* RSS Action only works on NIC RX domain */
6857         if (attr->ingress && !attr->transfer)
6858                 rss = flow_get_rss_action(dev, p_actions_rx);
6859         if (rss) {
6860                 if (flow_rss_workspace_adjust(wks, rss_desc, rss->queue_num))
6861                         return 0;
6862                 /*
6863                  * The following information is required by
6864                  * mlx5_flow_hashfields_adjust() in advance.
6865                  */
6866                 rss_desc->level = rss->level;
6867                 /* RSS type 0 indicates default RSS type (RTE_ETH_RSS_IP). */
6868                 rss_desc->types = !rss->types ? RTE_ETH_RSS_IP : rss->types;
6869         }
6870         flow->dev_handles = 0;
6871         if (rss && rss->types) {
6872                 unsigned int graph_root;
6873
6874                 graph_root = find_graph_root(rss->level);
6875                 ret = mlx5_flow_expand_rss(buf, sizeof(expand_buffer.buffer),
6876                                            items, rss->types,
6877                                            mlx5_support_expansion, graph_root);
6878                 MLX5_ASSERT(ret > 0 &&
6879                        (unsigned int)ret < sizeof(expand_buffer.buffer));
6880                 if (rte_log_can_log(mlx5_logtype, RTE_LOG_DEBUG)) {
6881                         for (i = 0; i < buf->entries; ++i)
6882                                 mlx5_dbg__print_pattern(buf->entry[i].pattern);
6883                 }
6884         } else {
6885                 buf->entries = 1;
6886                 buf->entry[0].pattern = (void *)(uintptr_t)items;
6887         }
6888         rss_desc->shared_rss = flow_get_shared_rss_action(dev, indir_actions,
6889                                                       indir_actions_n);
6890         for (i = 0; i < buf->entries; ++i) {
6891                 /* Initialize flow split data. */
6892                 flow_split_info.prefix_layers = 0;
6893                 flow_split_info.prefix_mark = 0;
6894                 flow_split_info.skip_scale = 0;
6895                 /*
6896                  * The splitter may create multiple dev_flows,
6897                  * depending on configuration. In the simplest
6898                  * case it just creates unmodified original flow.
6899                  */
6900                 ret = flow_create_split_outer(dev, flow, attr,
6901                                               buf->entry[i].pattern,
6902                                               p_actions_rx, &flow_split_info,
6903                                               error);
6904                 if (ret < 0)
6905                         goto error;
6906                 if (is_flow_tunnel_steer_rule(wks->flows[0].tof_type)) {
6907                         ret = flow_tunnel_add_default_miss(dev, flow, attr,
6908                                                            p_actions_rx,
6909                                                            idx,
6910                                                            wks->flows[0].tunnel,
6911                                                            &default_miss_ctx,
6912                                                            error);
6913                         if (ret < 0) {
6914                                 mlx5_free(default_miss_ctx.queue);
6915                                 goto error;
6916                         }
6917                 }
6918         }
6919         /* Create the tx flow. */
6920         if (hairpin_flow) {
6921                 attr_tx.group = MLX5_HAIRPIN_TX_TABLE;
6922                 attr_tx.ingress = 0;
6923                 attr_tx.egress = 1;
6924                 dev_flow = flow_drv_prepare(dev, flow, &attr_tx, items_tx.items,
6925                                          actions_hairpin_tx.actions,
6926                                          idx, error);
6927                 if (!dev_flow)
6928                         goto error;
6929                 dev_flow->flow = flow;
6930                 dev_flow->external = 0;
6931                 SILIST_INSERT(&flow->dev_handles, dev_flow->handle_idx,
6932                               dev_flow->handle, next);
6933                 ret = flow_drv_translate(dev, dev_flow, &attr_tx,
6934                                          items_tx.items,
6935                                          actions_hairpin_tx.actions, error);
6936                 if (ret < 0)
6937                         goto error;
6938         }
6939         /*
6940          * Update the metadata register copy table. If extensive
6941          * metadata feature is enabled and registers are supported
6942          * we might create the extra rte_flow for each unique
6943          * MARK/FLAG action ID.
6944          *
6945          * The table is updated for ingress Flows only, because
6946          * the egress Flows belong to the different device and
6947          * copy table should be updated in peer NIC Rx domain.
6948          */
6949         if (attr->ingress &&
6950             (external || attr->group != MLX5_FLOW_MREG_CP_TABLE_GROUP)) {
6951                 ret = flow_mreg_update_copy_table(dev, flow, actions, error);
6952                 if (ret)
6953                         goto error;
6954         }
6955         /*
6956          * If the flow is external (from application) OR device is started,
6957          * OR mreg discover, then apply immediately.
6958          */
6959         if (external || dev->data->dev_started ||
6960             (attr->group == MLX5_FLOW_MREG_CP_TABLE_GROUP &&
6961              attr->priority == MLX5_FLOW_LOWEST_PRIO_INDICATOR)) {
6962                 ret = flow_drv_apply(dev, flow, error);
6963                 if (ret < 0)
6964                         goto error;
6965         }
6966         flow->type = type;
6967         flow_rxq_flags_set(dev, flow);
6968         rte_free(translated_actions);
6969         tunnel = flow_tunnel_from_rule(wks->flows);
6970         if (tunnel) {
6971                 flow->tunnel = 1;
6972                 flow->tunnel_id = tunnel->tunnel_id;
6973                 __atomic_add_fetch(&tunnel->refctn, 1, __ATOMIC_RELAXED);
6974                 mlx5_free(default_miss_ctx.queue);
6975         }
6976         mlx5_flow_pop_thread_workspace();
6977         return idx;
6978 error:
6979         MLX5_ASSERT(flow);
6980         ret = rte_errno; /* Save rte_errno before cleanup. */
6981         flow_mreg_del_copy_action(dev, flow);
6982         flow_drv_destroy(dev, flow);
6983         if (rss_desc->shared_rss)
6984                 __atomic_sub_fetch(&((struct mlx5_shared_action_rss *)
6985                         mlx5_ipool_get
6986                         (priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS],
6987                         rss_desc->shared_rss))->refcnt, 1, __ATOMIC_RELAXED);
6988         mlx5_ipool_free(priv->flows[type], idx);
6989         rte_errno = ret; /* Restore rte_errno. */
6990         ret = rte_errno;
6991         rte_errno = ret;
6992         mlx5_flow_pop_thread_workspace();
6993 error_before_hairpin_split:
6994         rte_free(translated_actions);
6995         return 0;
6996 }
6997
6998 /**
6999  * Create a dedicated flow rule on e-switch table 0 (root table), to direct all
7000  * incoming packets to table 1.
7001  *
7002  * Other flow rules, requested for group n, will be created in
7003  * e-switch table n+1.
7004  * Jump action to e-switch group n will be created to group n+1.
7005  *
7006  * Used when working in switchdev mode, to utilise advantages of table 1
7007  * and above.
7008  *
7009  * @param dev
7010  *   Pointer to Ethernet device.
7011  *
7012  * @return
7013  *   Pointer to flow on success, NULL otherwise and rte_errno is set.
7014  */
7015 struct rte_flow *
7016 mlx5_flow_create_esw_table_zero_flow(struct rte_eth_dev *dev)
7017 {
7018         const struct rte_flow_attr attr = {
7019                 .group = 0,
7020                 .priority = 0,
7021                 .ingress = 1,
7022                 .egress = 0,
7023                 .transfer = 1,
7024         };
7025         const struct rte_flow_item pattern = {
7026                 .type = RTE_FLOW_ITEM_TYPE_END,
7027         };
7028         struct rte_flow_action_jump jump = {
7029                 .group = 1,
7030         };
7031         const struct rte_flow_action actions[] = {
7032                 {
7033                         .type = RTE_FLOW_ACTION_TYPE_JUMP,
7034                         .conf = &jump,
7035                 },
7036                 {
7037                         .type = RTE_FLOW_ACTION_TYPE_END,
7038                 },
7039         };
7040         struct rte_flow_error error;
7041
7042         return (void *)(uintptr_t)flow_list_create(dev, MLX5_FLOW_TYPE_CTL,
7043                                                    &attr, &pattern,
7044                                                    actions, false, &error);
7045 }
7046
7047 /**
7048  * Create a dedicated flow rule on e-switch table 1, matches ESW manager
7049  * and sq number, directs all packets to peer vport.
7050  *
7051  * @param dev
7052  *   Pointer to Ethernet device.
7053  * @param txq
7054  *   Txq index.
7055  *
7056  * @return
7057  *   Flow ID on success, 0 otherwise and rte_errno is set.
7058  */
7059 uint32_t
7060 mlx5_flow_create_devx_sq_miss_flow(struct rte_eth_dev *dev, uint32_t txq)
7061 {
7062         struct rte_flow_attr attr = {
7063                 .group = 0,
7064                 .priority = MLX5_FLOW_LOWEST_PRIO_INDICATOR,
7065                 .ingress = 1,
7066                 .egress = 0,
7067                 .transfer = 1,
7068         };
7069         struct rte_flow_item_port_id port_spec = {
7070                 .id = MLX5_PORT_ESW_MGR,
7071         };
7072         struct mlx5_rte_flow_item_tx_queue txq_spec = {
7073                 .queue = txq,
7074         };
7075         struct rte_flow_item pattern[] = {
7076                 {
7077                         .type = RTE_FLOW_ITEM_TYPE_PORT_ID,
7078                         .spec = &port_spec,
7079                 },
7080                 {
7081                         .type = (enum rte_flow_item_type)
7082                                 MLX5_RTE_FLOW_ITEM_TYPE_TX_QUEUE,
7083                         .spec = &txq_spec,
7084                 },
7085                 {
7086                         .type = RTE_FLOW_ITEM_TYPE_END,
7087                 },
7088         };
7089         struct rte_flow_action_jump jump = {
7090                 .group = 1,
7091         };
7092         struct rte_flow_action_port_id port = {
7093                 .id = dev->data->port_id,
7094         };
7095         struct rte_flow_action actions[] = {
7096                 {
7097                         .type = RTE_FLOW_ACTION_TYPE_JUMP,
7098                         .conf = &jump,
7099                 },
7100                 {
7101                         .type = RTE_FLOW_ACTION_TYPE_END,
7102                 },
7103         };
7104         struct rte_flow_error error;
7105
7106         /*
7107          * Creates group 0, highest priority jump flow.
7108          * Matches txq to bypass kernel packets.
7109          */
7110         if (flow_list_create(dev, MLX5_FLOW_TYPE_CTL, &attr, pattern, actions,
7111                              false, &error) == 0)
7112                 return 0;
7113         /* Create group 1, lowest priority redirect flow for txq. */
7114         attr.group = 1;
7115         actions[0].conf = &port;
7116         actions[0].type = RTE_FLOW_ACTION_TYPE_PORT_ID;
7117         return flow_list_create(dev, MLX5_FLOW_TYPE_CTL, &attr, pattern,
7118                                 actions, false, &error);
7119 }
7120
7121 /**
7122  * Validate a flow supported by the NIC.
7123  *
7124  * @see rte_flow_validate()
7125  * @see rte_flow_ops
7126  */
7127 int
7128 mlx5_flow_validate(struct rte_eth_dev *dev,
7129                    const struct rte_flow_attr *attr,
7130                    const struct rte_flow_item items[],
7131                    const struct rte_flow_action original_actions[],
7132                    struct rte_flow_error *error)
7133 {
7134         int hairpin_flow;
7135         struct mlx5_translated_action_handle
7136                 indir_actions[MLX5_MAX_INDIRECT_ACTIONS];
7137         int indir_actions_n = MLX5_MAX_INDIRECT_ACTIONS;
7138         const struct rte_flow_action *actions;
7139         struct rte_flow_action *translated_actions = NULL;
7140         int ret = flow_action_handles_translate(dev, original_actions,
7141                                                 indir_actions,
7142                                                 &indir_actions_n,
7143                                                 &translated_actions, error);
7144
7145         if (ret)
7146                 return ret;
7147         actions = translated_actions ? translated_actions : original_actions;
7148         hairpin_flow = flow_check_hairpin_split(dev, attr, actions);
7149         ret = flow_drv_validate(dev, attr, items, actions,
7150                                 true, hairpin_flow, error);
7151         rte_free(translated_actions);
7152         return ret;
7153 }
7154
7155 /**
7156  * Create a flow.
7157  *
7158  * @see rte_flow_create()
7159  * @see rte_flow_ops
7160  */
7161 struct rte_flow *
7162 mlx5_flow_create(struct rte_eth_dev *dev,
7163                  const struct rte_flow_attr *attr,
7164                  const struct rte_flow_item items[],
7165                  const struct rte_flow_action actions[],
7166                  struct rte_flow_error *error)
7167 {
7168         struct mlx5_priv *priv = dev->data->dev_private;
7169
7170         if (priv->sh->config.dv_flow_en == 2) {
7171                 rte_flow_error_set(error, ENOTSUP,
7172                           RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
7173                           NULL,
7174                           "Flow non-Q creation not supported");
7175                 return NULL;
7176         }
7177         /*
7178          * If the device is not started yet, it is not allowed to created a
7179          * flow from application. PMD default flows and traffic control flows
7180          * are not affected.
7181          */
7182         if (unlikely(!dev->data->dev_started)) {
7183                 DRV_LOG(DEBUG, "port %u is not started when "
7184                         "inserting a flow", dev->data->port_id);
7185                 rte_flow_error_set(error, ENODEV,
7186                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
7187                                    NULL,
7188                                    "port not started");
7189                 return NULL;
7190         }
7191
7192         return (void *)(uintptr_t)flow_list_create(dev, MLX5_FLOW_TYPE_GEN,
7193                                                    attr, items, actions,
7194                                                    true, error);
7195 }
7196
7197 /**
7198  * Destroy a flow in a list.
7199  *
7200  * @param dev
7201  *   Pointer to Ethernet device.
7202  * @param[in] flow_idx
7203  *   Index of flow to destroy.
7204  */
7205 static void
7206 flow_list_destroy(struct rte_eth_dev *dev, enum mlx5_flow_type type,
7207                   uint32_t flow_idx)
7208 {
7209         struct mlx5_priv *priv = dev->data->dev_private;
7210         struct rte_flow *flow = mlx5_ipool_get(priv->flows[type], flow_idx);
7211
7212         if (!flow)
7213                 return;
7214         MLX5_ASSERT(flow->type == type);
7215         /*
7216          * Update RX queue flags only if port is started, otherwise it is
7217          * already clean.
7218          */
7219         if (dev->data->dev_started)
7220                 flow_rxq_flags_trim(dev, flow);
7221         flow_drv_destroy(dev, flow);
7222         if (flow->tunnel) {
7223                 struct mlx5_flow_tunnel *tunnel;
7224
7225                 tunnel = mlx5_find_tunnel_id(dev, flow->tunnel_id);
7226                 RTE_VERIFY(tunnel);
7227                 if (!__atomic_sub_fetch(&tunnel->refctn, 1, __ATOMIC_RELAXED))
7228                         mlx5_flow_tunnel_free(dev, tunnel);
7229         }
7230         flow_mreg_del_copy_action(dev, flow);
7231         mlx5_ipool_free(priv->flows[type], flow_idx);
7232 }
7233
7234 /**
7235  * Destroy all flows.
7236  *
7237  * @param dev
7238  *   Pointer to Ethernet device.
7239  * @param type
7240  *   Flow type to be flushed.
7241  * @param active
7242  *   If flushing is called actively.
7243  */
7244 void
7245 mlx5_flow_list_flush(struct rte_eth_dev *dev, enum mlx5_flow_type type,
7246                      bool active)
7247 {
7248         struct mlx5_priv *priv = dev->data->dev_private;
7249         uint32_t num_flushed = 0, fidx = 1;
7250         struct rte_flow *flow;
7251
7252 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
7253         if (priv->sh->config.dv_flow_en == 2 &&
7254             type == MLX5_FLOW_TYPE_GEN) {
7255                 flow_hw_q_flow_flush(dev, NULL);
7256                 return;
7257         }
7258 #endif
7259
7260         MLX5_IPOOL_FOREACH(priv->flows[type], fidx, flow) {
7261                 flow_list_destroy(dev, type, fidx);
7262                 num_flushed++;
7263         }
7264         if (active) {
7265                 DRV_LOG(INFO, "port %u: %u flows flushed before stopping",
7266                         dev->data->port_id, num_flushed);
7267         }
7268 }
7269
7270 /**
7271  * Stop all default actions for flows.
7272  *
7273  * @param dev
7274  *   Pointer to Ethernet device.
7275  */
7276 void
7277 mlx5_flow_stop_default(struct rte_eth_dev *dev)
7278 {
7279         flow_mreg_del_default_copy_action(dev);
7280         flow_rxq_flags_clear(dev);
7281 }
7282
7283 /**
7284  * Start all default actions for flows.
7285  *
7286  * @param dev
7287  *   Pointer to Ethernet device.
7288  * @return
7289  *   0 on success, a negative errno value otherwise and rte_errno is set.
7290  */
7291 int
7292 mlx5_flow_start_default(struct rte_eth_dev *dev)
7293 {
7294         struct rte_flow_error error;
7295
7296         /* Make sure default copy action (reg_c[0] -> reg_b) is created. */
7297         return flow_mreg_add_default_copy_action(dev, &error);
7298 }
7299
7300 /**
7301  * Release key of thread specific flow workspace data.
7302  */
7303 void
7304 flow_release_workspace(void *data)
7305 {
7306         struct mlx5_flow_workspace *wks = data;
7307         struct mlx5_flow_workspace *next;
7308
7309         while (wks) {
7310                 next = wks->next;
7311                 free(wks->rss_desc.queue);
7312                 free(wks);
7313                 wks = next;
7314         }
7315 }
7316
7317 /**
7318  * Get thread specific current flow workspace.
7319  *
7320  * @return pointer to thread specific flow workspace data, NULL on error.
7321  */
7322 struct mlx5_flow_workspace*
7323 mlx5_flow_get_thread_workspace(void)
7324 {
7325         struct mlx5_flow_workspace *data;
7326
7327         data = mlx5_flow_os_get_specific_workspace();
7328         MLX5_ASSERT(data && data->inuse);
7329         if (!data || !data->inuse)
7330                 DRV_LOG(ERR, "flow workspace not initialized.");
7331         return data;
7332 }
7333
7334 /**
7335  * Allocate and init new flow workspace.
7336  *
7337  * @return pointer to flow workspace data, NULL on error.
7338  */
7339 static struct mlx5_flow_workspace*
7340 flow_alloc_thread_workspace(void)
7341 {
7342         struct mlx5_flow_workspace *data = calloc(1, sizeof(*data));
7343
7344         if (!data) {
7345                 DRV_LOG(ERR, "Failed to allocate flow workspace "
7346                         "memory.");
7347                 return NULL;
7348         }
7349         data->rss_desc.queue = calloc(1,
7350                         sizeof(uint16_t) * MLX5_RSSQ_DEFAULT_NUM);
7351         if (!data->rss_desc.queue)
7352                 goto err;
7353         data->rssq_num = MLX5_RSSQ_DEFAULT_NUM;
7354         return data;
7355 err:
7356         free(data->rss_desc.queue);
7357         free(data);
7358         return NULL;
7359 }
7360
7361 /**
7362  * Get new thread specific flow workspace.
7363  *
7364  * If current workspace inuse, create new one and set as current.
7365  *
7366  * @return pointer to thread specific flow workspace data, NULL on error.
7367  */
7368 static struct mlx5_flow_workspace*
7369 mlx5_flow_push_thread_workspace(void)
7370 {
7371         struct mlx5_flow_workspace *curr;
7372         struct mlx5_flow_workspace *data;
7373
7374         curr = mlx5_flow_os_get_specific_workspace();
7375         if (!curr) {
7376                 data = flow_alloc_thread_workspace();
7377                 if (!data)
7378                         return NULL;
7379         } else if (!curr->inuse) {
7380                 data = curr;
7381         } else if (curr->next) {
7382                 data = curr->next;
7383         } else {
7384                 data = flow_alloc_thread_workspace();
7385                 if (!data)
7386                         return NULL;
7387                 curr->next = data;
7388                 data->prev = curr;
7389         }
7390         data->inuse = 1;
7391         data->flow_idx = 0;
7392         /* Set as current workspace */
7393         if (mlx5_flow_os_set_specific_workspace(data))
7394                 DRV_LOG(ERR, "Failed to set flow workspace to thread.");
7395         return data;
7396 }
7397
7398 /**
7399  * Close current thread specific flow workspace.
7400  *
7401  * If previous workspace available, set it as current.
7402  *
7403  * @return pointer to thread specific flow workspace data, NULL on error.
7404  */
7405 static void
7406 mlx5_flow_pop_thread_workspace(void)
7407 {
7408         struct mlx5_flow_workspace *data = mlx5_flow_get_thread_workspace();
7409
7410         if (!data)
7411                 return;
7412         if (!data->inuse) {
7413                 DRV_LOG(ERR, "Failed to close unused flow workspace.");
7414                 return;
7415         }
7416         data->inuse = 0;
7417         if (!data->prev)
7418                 return;
7419         if (mlx5_flow_os_set_specific_workspace(data->prev))
7420                 DRV_LOG(ERR, "Failed to set flow workspace to thread.");
7421 }
7422
7423 /**
7424  * Verify the flow list is empty
7425  *
7426  * @param dev
7427  *  Pointer to Ethernet device.
7428  *
7429  * @return the number of flows not released.
7430  */
7431 int
7432 mlx5_flow_verify(struct rte_eth_dev *dev __rte_unused)
7433 {
7434         struct mlx5_priv *priv = dev->data->dev_private;
7435         struct rte_flow *flow;
7436         uint32_t idx = 0;
7437         int ret = 0, i;
7438
7439         for (i = 0; i < MLX5_FLOW_TYPE_MAXI; i++) {
7440                 MLX5_IPOOL_FOREACH(priv->flows[i], idx, flow) {
7441                         DRV_LOG(DEBUG, "port %u flow %p still referenced",
7442                                 dev->data->port_id, (void *)flow);
7443                         ret++;
7444                 }
7445         }
7446         return ret;
7447 }
7448
7449 /**
7450  * Enable default hairpin egress flow.
7451  *
7452  * @param dev
7453  *   Pointer to Ethernet device.
7454  * @param queue
7455  *   The queue index.
7456  *
7457  * @return
7458  *   0 on success, a negative errno value otherwise and rte_errno is set.
7459  */
7460 int
7461 mlx5_ctrl_flow_source_queue(struct rte_eth_dev *dev,
7462                             uint32_t queue)
7463 {
7464         const struct rte_flow_attr attr = {
7465                 .egress = 1,
7466                 .priority = 0,
7467         };
7468         struct mlx5_rte_flow_item_tx_queue queue_spec = {
7469                 .queue = queue,
7470         };
7471         struct mlx5_rte_flow_item_tx_queue queue_mask = {
7472                 .queue = UINT32_MAX,
7473         };
7474         struct rte_flow_item items[] = {
7475                 {
7476                         .type = (enum rte_flow_item_type)
7477                                 MLX5_RTE_FLOW_ITEM_TYPE_TX_QUEUE,
7478                         .spec = &queue_spec,
7479                         .last = NULL,
7480                         .mask = &queue_mask,
7481                 },
7482                 {
7483                         .type = RTE_FLOW_ITEM_TYPE_END,
7484                 },
7485         };
7486         struct rte_flow_action_jump jump = {
7487                 .group = MLX5_HAIRPIN_TX_TABLE,
7488         };
7489         struct rte_flow_action actions[2];
7490         uint32_t flow_idx;
7491         struct rte_flow_error error;
7492
7493         actions[0].type = RTE_FLOW_ACTION_TYPE_JUMP;
7494         actions[0].conf = &jump;
7495         actions[1].type = RTE_FLOW_ACTION_TYPE_END;
7496         flow_idx = flow_list_create(dev, MLX5_FLOW_TYPE_CTL,
7497                                     &attr, items, actions, false, &error);
7498         if (!flow_idx) {
7499                 DRV_LOG(DEBUG,
7500                         "Failed to create ctrl flow: rte_errno(%d),"
7501                         " type(%d), message(%s)",
7502                         rte_errno, error.type,
7503                         error.message ? error.message : " (no stated reason)");
7504                 return -rte_errno;
7505         }
7506         return 0;
7507 }
7508
7509 /**
7510  * Enable a control flow configured from the control plane.
7511  *
7512  * @param dev
7513  *   Pointer to Ethernet device.
7514  * @param eth_spec
7515  *   An Ethernet flow spec to apply.
7516  * @param eth_mask
7517  *   An Ethernet flow mask to apply.
7518  * @param vlan_spec
7519  *   A VLAN flow spec to apply.
7520  * @param vlan_mask
7521  *   A VLAN flow mask to apply.
7522  *
7523  * @return
7524  *   0 on success, a negative errno value otherwise and rte_errno is set.
7525  */
7526 int
7527 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
7528                     struct rte_flow_item_eth *eth_spec,
7529                     struct rte_flow_item_eth *eth_mask,
7530                     struct rte_flow_item_vlan *vlan_spec,
7531                     struct rte_flow_item_vlan *vlan_mask)
7532 {
7533         struct mlx5_priv *priv = dev->data->dev_private;
7534         const struct rte_flow_attr attr = {
7535                 .ingress = 1,
7536                 .priority = MLX5_FLOW_LOWEST_PRIO_INDICATOR,
7537         };
7538         struct rte_flow_item items[] = {
7539                 {
7540                         .type = RTE_FLOW_ITEM_TYPE_ETH,
7541                         .spec = eth_spec,
7542                         .last = NULL,
7543                         .mask = eth_mask,
7544                 },
7545                 {
7546                         .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
7547                                               RTE_FLOW_ITEM_TYPE_END,
7548                         .spec = vlan_spec,
7549                         .last = NULL,
7550                         .mask = vlan_mask,
7551                 },
7552                 {
7553                         .type = RTE_FLOW_ITEM_TYPE_END,
7554                 },
7555         };
7556         uint16_t queue[priv->reta_idx_n];
7557         struct rte_flow_action_rss action_rss = {
7558                 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
7559                 .level = 0,
7560                 .types = priv->rss_conf.rss_hf,
7561                 .key_len = priv->rss_conf.rss_key_len,
7562                 .queue_num = priv->reta_idx_n,
7563                 .key = priv->rss_conf.rss_key,
7564                 .queue = queue,
7565         };
7566         struct rte_flow_action actions[] = {
7567                 {
7568                         .type = RTE_FLOW_ACTION_TYPE_RSS,
7569                         .conf = &action_rss,
7570                 },
7571                 {
7572                         .type = RTE_FLOW_ACTION_TYPE_END,
7573                 },
7574         };
7575         uint32_t flow_idx;
7576         struct rte_flow_error error;
7577         unsigned int i;
7578
7579         if (!priv->reta_idx_n || !priv->rxqs_n) {
7580                 return 0;
7581         }
7582         if (!(dev->data->dev_conf.rxmode.mq_mode & RTE_ETH_MQ_RX_RSS_FLAG))
7583                 action_rss.types = 0;
7584         for (i = 0; i != priv->reta_idx_n; ++i)
7585                 queue[i] = (*priv->reta_idx)[i];
7586         flow_idx = flow_list_create(dev, MLX5_FLOW_TYPE_CTL,
7587                                     &attr, items, actions, false, &error);
7588         if (!flow_idx)
7589                 return -rte_errno;
7590         return 0;
7591 }
7592
7593 /**
7594  * Enable a flow control configured from the control plane.
7595  *
7596  * @param dev
7597  *   Pointer to Ethernet device.
7598  * @param eth_spec
7599  *   An Ethernet flow spec to apply.
7600  * @param eth_mask
7601  *   An Ethernet flow mask to apply.
7602  *
7603  * @return
7604  *   0 on success, a negative errno value otherwise and rte_errno is set.
7605  */
7606 int
7607 mlx5_ctrl_flow(struct rte_eth_dev *dev,
7608                struct rte_flow_item_eth *eth_spec,
7609                struct rte_flow_item_eth *eth_mask)
7610 {
7611         return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
7612 }
7613
7614 /**
7615  * Create default miss flow rule matching lacp traffic
7616  *
7617  * @param dev
7618  *   Pointer to Ethernet device.
7619  * @param eth_spec
7620  *   An Ethernet flow spec to apply.
7621  *
7622  * @return
7623  *   0 on success, a negative errno value otherwise and rte_errno is set.
7624  */
7625 int
7626 mlx5_flow_lacp_miss(struct rte_eth_dev *dev)
7627 {
7628         /*
7629          * The LACP matching is done by only using ether type since using
7630          * a multicast dst mac causes kernel to give low priority to this flow.
7631          */
7632         static const struct rte_flow_item_eth lacp_spec = {
7633                 .type = RTE_BE16(0x8809),
7634         };
7635         static const struct rte_flow_item_eth lacp_mask = {
7636                 .type = 0xffff,
7637         };
7638         const struct rte_flow_attr attr = {
7639                 .ingress = 1,
7640         };
7641         struct rte_flow_item items[] = {
7642                 {
7643                         .type = RTE_FLOW_ITEM_TYPE_ETH,
7644                         .spec = &lacp_spec,
7645                         .mask = &lacp_mask,
7646                 },
7647                 {
7648                         .type = RTE_FLOW_ITEM_TYPE_END,
7649                 },
7650         };
7651         struct rte_flow_action actions[] = {
7652                 {
7653                         .type = (enum rte_flow_action_type)
7654                                 MLX5_RTE_FLOW_ACTION_TYPE_DEFAULT_MISS,
7655                 },
7656                 {
7657                         .type = RTE_FLOW_ACTION_TYPE_END,
7658                 },
7659         };
7660         struct rte_flow_error error;
7661         uint32_t flow_idx = flow_list_create(dev, MLX5_FLOW_TYPE_CTL,
7662                                         &attr, items, actions,
7663                                         false, &error);
7664
7665         if (!flow_idx)
7666                 return -rte_errno;
7667         return 0;
7668 }
7669
7670 /**
7671  * Destroy a flow.
7672  *
7673  * @see rte_flow_destroy()
7674  * @see rte_flow_ops
7675  */
7676 int
7677 mlx5_flow_destroy(struct rte_eth_dev *dev,
7678                   struct rte_flow *flow,
7679                   struct rte_flow_error *error __rte_unused)
7680 {
7681         struct mlx5_priv *priv = dev->data->dev_private;
7682
7683         if (priv->sh->config.dv_flow_en == 2)
7684                 return rte_flow_error_set(error, ENOTSUP,
7685                           RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
7686                           NULL,
7687                           "Flow non-Q destruction not supported");
7688         flow_list_destroy(dev, MLX5_FLOW_TYPE_GEN,
7689                                 (uintptr_t)(void *)flow);
7690         return 0;
7691 }
7692
7693 /**
7694  * Destroy all flows.
7695  *
7696  * @see rte_flow_flush()
7697  * @see rte_flow_ops
7698  */
7699 int
7700 mlx5_flow_flush(struct rte_eth_dev *dev,
7701                 struct rte_flow_error *error __rte_unused)
7702 {
7703         mlx5_flow_list_flush(dev, MLX5_FLOW_TYPE_GEN, false);
7704         return 0;
7705 }
7706
7707 /**
7708  * Isolated mode.
7709  *
7710  * @see rte_flow_isolate()
7711  * @see rte_flow_ops
7712  */
7713 int
7714 mlx5_flow_isolate(struct rte_eth_dev *dev,
7715                   int enable,
7716                   struct rte_flow_error *error)
7717 {
7718         struct mlx5_priv *priv = dev->data->dev_private;
7719
7720         if (dev->data->dev_started) {
7721                 rte_flow_error_set(error, EBUSY,
7722                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
7723                                    NULL,
7724                                    "port must be stopped first");
7725                 return -rte_errno;
7726         }
7727         priv->isolated = !!enable;
7728         if (enable)
7729                 dev->dev_ops = &mlx5_dev_ops_isolate;
7730         else
7731                 dev->dev_ops = &mlx5_dev_ops;
7732
7733         dev->rx_descriptor_status = mlx5_rx_descriptor_status;
7734         dev->tx_descriptor_status = mlx5_tx_descriptor_status;
7735
7736         return 0;
7737 }
7738
7739 /**
7740  * Query a flow.
7741  *
7742  * @see rte_flow_query()
7743  * @see rte_flow_ops
7744  */
7745 static int
7746 flow_drv_query(struct rte_eth_dev *dev,
7747                uint32_t flow_idx,
7748                const struct rte_flow_action *actions,
7749                void *data,
7750                struct rte_flow_error *error)
7751 {
7752         struct mlx5_priv *priv = dev->data->dev_private;
7753         const struct mlx5_flow_driver_ops *fops;
7754         struct rte_flow *flow = mlx5_ipool_get(priv->flows[MLX5_FLOW_TYPE_GEN],
7755                                                flow_idx);
7756         enum mlx5_flow_drv_type ftype;
7757
7758         if (!flow) {
7759                 return rte_flow_error_set(error, ENOENT,
7760                           RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
7761                           NULL,
7762                           "invalid flow handle");
7763         }
7764         ftype = flow->drv_type;
7765         MLX5_ASSERT(ftype > MLX5_FLOW_TYPE_MIN && ftype < MLX5_FLOW_TYPE_MAX);
7766         fops = flow_get_drv_ops(ftype);
7767
7768         return fops->query(dev, flow, actions, data, error);
7769 }
7770
7771 /**
7772  * Query a flow.
7773  *
7774  * @see rte_flow_query()
7775  * @see rte_flow_ops
7776  */
7777 int
7778 mlx5_flow_query(struct rte_eth_dev *dev,
7779                 struct rte_flow *flow,
7780                 const struct rte_flow_action *actions,
7781                 void *data,
7782                 struct rte_flow_error *error)
7783 {
7784         int ret;
7785         struct mlx5_priv *priv = dev->data->dev_private;
7786
7787         if (priv->sh->config.dv_flow_en == 2)
7788                 return rte_flow_error_set(error, ENOTSUP,
7789                           RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
7790                           NULL,
7791                           "Flow non-Q query not supported");
7792         ret = flow_drv_query(dev, (uintptr_t)(void *)flow, actions, data,
7793                              error);
7794         if (ret < 0)
7795                 return ret;
7796         return 0;
7797 }
7798
7799 /**
7800  * Get rte_flow callbacks.
7801  *
7802  * @param dev
7803  *   Pointer to Ethernet device structure.
7804  * @param ops
7805  *   Pointer to operation-specific structure.
7806  *
7807  * @return 0
7808  */
7809 int
7810 mlx5_flow_ops_get(struct rte_eth_dev *dev __rte_unused,
7811                   const struct rte_flow_ops **ops)
7812 {
7813         *ops = &mlx5_flow_ops;
7814         return 0;
7815 }
7816
7817 /**
7818  * Validate meter policy actions.
7819  * Dispatcher for action type specific validation.
7820  *
7821  * @param[in] dev
7822  *   Pointer to the Ethernet device structure.
7823  * @param[in] action
7824  *   The meter policy action object to validate.
7825  * @param[in] attr
7826  *   Attributes of flow to determine steering domain.
7827  * @param[out] is_rss
7828  *   Is RSS or not.
7829  * @param[out] domain_bitmap
7830  *   Domain bitmap.
7831  * @param[out] is_def_policy
7832  *   Is default policy or not.
7833  * @param[out] error
7834  *   Perform verbose error reporting if not NULL. Initialized in case of
7835  *   error only.
7836  *
7837  * @return
7838  *   0 on success, otherwise negative errno value.
7839  */
7840 int
7841 mlx5_flow_validate_mtr_acts(struct rte_eth_dev *dev,
7842                         const struct rte_flow_action *actions[RTE_COLORS],
7843                         struct rte_flow_attr *attr,
7844                         bool *is_rss,
7845                         uint8_t *domain_bitmap,
7846                         uint8_t *policy_mode,
7847                         struct rte_mtr_error *error)
7848 {
7849         const struct mlx5_flow_driver_ops *fops;
7850
7851         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7852         return fops->validate_mtr_acts(dev, actions, attr, is_rss,
7853                                        domain_bitmap, policy_mode, error);
7854 }
7855
7856 /**
7857  * Destroy the meter table set.
7858  *
7859  * @param[in] dev
7860  *   Pointer to Ethernet device.
7861  * @param[in] mtr_policy
7862  *   Meter policy struct.
7863  */
7864 void
7865 mlx5_flow_destroy_mtr_acts(struct rte_eth_dev *dev,
7866                       struct mlx5_flow_meter_policy *mtr_policy)
7867 {
7868         const struct mlx5_flow_driver_ops *fops;
7869
7870         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7871         fops->destroy_mtr_acts(dev, mtr_policy);
7872 }
7873
7874 /**
7875  * Create policy action, lock free,
7876  * (mutex should be acquired by caller).
7877  * Dispatcher for action type specific call.
7878  *
7879  * @param[in] dev
7880  *   Pointer to the Ethernet device structure.
7881  * @param[in] mtr_policy
7882  *   Meter policy struct.
7883  * @param[in] action
7884  *   Action specification used to create meter actions.
7885  * @param[out] error
7886  *   Perform verbose error reporting if not NULL. Initialized in case of
7887  *   error only.
7888  *
7889  * @return
7890  *   0 on success, otherwise negative errno value.
7891  */
7892 int
7893 mlx5_flow_create_mtr_acts(struct rte_eth_dev *dev,
7894                       struct mlx5_flow_meter_policy *mtr_policy,
7895                       const struct rte_flow_action *actions[RTE_COLORS],
7896                       struct rte_mtr_error *error)
7897 {
7898         const struct mlx5_flow_driver_ops *fops;
7899
7900         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7901         return fops->create_mtr_acts(dev, mtr_policy, actions, error);
7902 }
7903
7904 /**
7905  * Create policy rules, lock free,
7906  * (mutex should be acquired by caller).
7907  * Dispatcher for action type specific call.
7908  *
7909  * @param[in] dev
7910  *   Pointer to the Ethernet device structure.
7911  * @param[in] mtr_policy
7912  *   Meter policy struct.
7913  *
7914  * @return
7915  *   0 on success, -1 otherwise.
7916  */
7917 int
7918 mlx5_flow_create_policy_rules(struct rte_eth_dev *dev,
7919                              struct mlx5_flow_meter_policy *mtr_policy)
7920 {
7921         const struct mlx5_flow_driver_ops *fops;
7922
7923         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7924         return fops->create_policy_rules(dev, mtr_policy);
7925 }
7926
7927 /**
7928  * Destroy policy rules, lock free,
7929  * (mutex should be acquired by caller).
7930  * Dispatcher for action type specific call.
7931  *
7932  * @param[in] dev
7933  *   Pointer to the Ethernet device structure.
7934  * @param[in] mtr_policy
7935  *   Meter policy struct.
7936  */
7937 void
7938 mlx5_flow_destroy_policy_rules(struct rte_eth_dev *dev,
7939                              struct mlx5_flow_meter_policy *mtr_policy)
7940 {
7941         const struct mlx5_flow_driver_ops *fops;
7942
7943         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7944         fops->destroy_policy_rules(dev, mtr_policy);
7945 }
7946
7947 /**
7948  * Destroy the default policy table set.
7949  *
7950  * @param[in] dev
7951  *   Pointer to Ethernet device.
7952  */
7953 void
7954 mlx5_flow_destroy_def_policy(struct rte_eth_dev *dev)
7955 {
7956         const struct mlx5_flow_driver_ops *fops;
7957
7958         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7959         fops->destroy_def_policy(dev);
7960 }
7961
7962 /**
7963  * Destroy the default policy table set.
7964  *
7965  * @param[in] dev
7966  *   Pointer to Ethernet device.
7967  *
7968  * @return
7969  *   0 on success, -1 otherwise.
7970  */
7971 int
7972 mlx5_flow_create_def_policy(struct rte_eth_dev *dev)
7973 {
7974         const struct mlx5_flow_driver_ops *fops;
7975
7976         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7977         return fops->create_def_policy(dev);
7978 }
7979
7980 /**
7981  * Create the needed meter and suffix tables.
7982  *
7983  * @param[in] dev
7984  *   Pointer to Ethernet device.
7985  *
7986  * @return
7987  *   0 on success, -1 otherwise.
7988  */
7989 int
7990 mlx5_flow_create_mtr_tbls(struct rte_eth_dev *dev,
7991                         struct mlx5_flow_meter_info *fm,
7992                         uint32_t mtr_idx,
7993                         uint8_t domain_bitmap)
7994 {
7995         const struct mlx5_flow_driver_ops *fops;
7996
7997         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7998         return fops->create_mtr_tbls(dev, fm, mtr_idx, domain_bitmap);
7999 }
8000
8001 /**
8002  * Destroy the meter table set.
8003  *
8004  * @param[in] dev
8005  *   Pointer to Ethernet device.
8006  * @param[in] tbl
8007  *   Pointer to the meter table set.
8008  */
8009 void
8010 mlx5_flow_destroy_mtr_tbls(struct rte_eth_dev *dev,
8011                            struct mlx5_flow_meter_info *fm)
8012 {
8013         const struct mlx5_flow_driver_ops *fops;
8014
8015         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8016         fops->destroy_mtr_tbls(dev, fm);
8017 }
8018
8019 /**
8020  * Destroy the global meter drop table.
8021  *
8022  * @param[in] dev
8023  *   Pointer to Ethernet device.
8024  */
8025 void
8026 mlx5_flow_destroy_mtr_drop_tbls(struct rte_eth_dev *dev)
8027 {
8028         const struct mlx5_flow_driver_ops *fops;
8029
8030         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8031         fops->destroy_mtr_drop_tbls(dev);
8032 }
8033
8034 /**
8035  * Destroy the sub policy table with RX queue.
8036  *
8037  * @param[in] dev
8038  *   Pointer to Ethernet device.
8039  * @param[in] mtr_policy
8040  *   Pointer to meter policy table.
8041  */
8042 void
8043 mlx5_flow_destroy_sub_policy_with_rxq(struct rte_eth_dev *dev,
8044                 struct mlx5_flow_meter_policy *mtr_policy)
8045 {
8046         const struct mlx5_flow_driver_ops *fops;
8047
8048         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8049         fops->destroy_sub_policy_with_rxq(dev, mtr_policy);
8050 }
8051
8052 /**
8053  * Allocate the needed aso flow meter id.
8054  *
8055  * @param[in] dev
8056  *   Pointer to Ethernet device.
8057  *
8058  * @return
8059  *   Index to aso flow meter on success, NULL otherwise.
8060  */
8061 uint32_t
8062 mlx5_flow_mtr_alloc(struct rte_eth_dev *dev)
8063 {
8064         const struct mlx5_flow_driver_ops *fops;
8065
8066         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8067         return fops->create_meter(dev);
8068 }
8069
8070 /**
8071  * Free the aso flow meter id.
8072  *
8073  * @param[in] dev
8074  *   Pointer to Ethernet device.
8075  * @param[in] mtr_idx
8076  *  Index to aso flow meter to be free.
8077  *
8078  * @return
8079  *   0 on success.
8080  */
8081 void
8082 mlx5_flow_mtr_free(struct rte_eth_dev *dev, uint32_t mtr_idx)
8083 {
8084         const struct mlx5_flow_driver_ops *fops;
8085
8086         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8087         fops->free_meter(dev, mtr_idx);
8088 }
8089
8090 /**
8091  * Allocate a counter.
8092  *
8093  * @param[in] dev
8094  *   Pointer to Ethernet device structure.
8095  *
8096  * @return
8097  *   Index to allocated counter  on success, 0 otherwise.
8098  */
8099 uint32_t
8100 mlx5_counter_alloc(struct rte_eth_dev *dev)
8101 {
8102         const struct mlx5_flow_driver_ops *fops;
8103         struct rte_flow_attr attr = { .transfer = 0 };
8104
8105         if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
8106                 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8107                 return fops->counter_alloc(dev);
8108         }
8109         DRV_LOG(ERR,
8110                 "port %u counter allocate is not supported.",
8111                  dev->data->port_id);
8112         return 0;
8113 }
8114
8115 /**
8116  * Free a counter.
8117  *
8118  * @param[in] dev
8119  *   Pointer to Ethernet device structure.
8120  * @param[in] cnt
8121  *   Index to counter to be free.
8122  */
8123 void
8124 mlx5_counter_free(struct rte_eth_dev *dev, uint32_t cnt)
8125 {
8126         const struct mlx5_flow_driver_ops *fops;
8127         struct rte_flow_attr attr = { .transfer = 0 };
8128
8129         if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
8130                 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8131                 fops->counter_free(dev, cnt);
8132                 return;
8133         }
8134         DRV_LOG(ERR,
8135                 "port %u counter free is not supported.",
8136                  dev->data->port_id);
8137 }
8138
8139 /**
8140  * Query counter statistics.
8141  *
8142  * @param[in] dev
8143  *   Pointer to Ethernet device structure.
8144  * @param[in] cnt
8145  *   Index to counter to query.
8146  * @param[in] clear
8147  *   Set to clear counter statistics.
8148  * @param[out] pkts
8149  *   The counter hits packets number to save.
8150  * @param[out] bytes
8151  *   The counter hits bytes number to save.
8152  *
8153  * @return
8154  *   0 on success, a negative errno value otherwise.
8155  */
8156 int
8157 mlx5_counter_query(struct rte_eth_dev *dev, uint32_t cnt,
8158                    bool clear, uint64_t *pkts, uint64_t *bytes, void **action)
8159 {
8160         const struct mlx5_flow_driver_ops *fops;
8161         struct rte_flow_attr attr = { .transfer = 0 };
8162
8163         if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
8164                 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
8165                 return fops->counter_query(dev, cnt, clear, pkts,
8166                                         bytes, action);
8167         }
8168         DRV_LOG(ERR,
8169                 "port %u counter query is not supported.",
8170                  dev->data->port_id);
8171         return -ENOTSUP;
8172 }
8173
8174 /**
8175  * Get information about HWS pre-configurable resources.
8176  *
8177  * @param[in] dev
8178  *   Pointer to the rte_eth_dev structure.
8179  * @param[out] port_info
8180  *   Pointer to port information.
8181  * @param[out] queue_info
8182  *   Pointer to queue information.
8183  * @param[out] error
8184  *   Pointer to error structure.
8185  *
8186  * @return
8187  *   0 on success, a negative errno value otherwise and rte_errno is set.
8188  */
8189 static int
8190 mlx5_flow_info_get(struct rte_eth_dev *dev,
8191                    struct rte_flow_port_info *port_info,
8192                    struct rte_flow_queue_info *queue_info,
8193                    struct rte_flow_error *error)
8194 {
8195         const struct mlx5_flow_driver_ops *fops;
8196
8197         if (flow_get_drv_type(dev, NULL) != MLX5_FLOW_TYPE_HW)
8198                 return rte_flow_error_set(error, ENOTSUP,
8199                                 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8200                                 NULL,
8201                                 "info get with incorrect steering mode");
8202         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8203         return fops->info_get(dev, port_info, queue_info, error);
8204 }
8205
8206 /**
8207  * Configure port HWS resources.
8208  *
8209  * @param[in] dev
8210  *   Pointer to the rte_eth_dev structure.
8211  * @param[in] port_attr
8212  *   Port configuration attributes.
8213  * @param[in] nb_queue
8214  *   Number of queue.
8215  * @param[in] queue_attr
8216  *   Array that holds attributes for each flow queue.
8217  * @param[out] error
8218  *   Pointer to error structure.
8219  *
8220  * @return
8221  *   0 on success, a negative errno value otherwise and rte_errno is set.
8222  */
8223 static int
8224 mlx5_flow_port_configure(struct rte_eth_dev *dev,
8225                          const struct rte_flow_port_attr *port_attr,
8226                          uint16_t nb_queue,
8227                          const struct rte_flow_queue_attr *queue_attr[],
8228                          struct rte_flow_error *error)
8229 {
8230         const struct mlx5_flow_driver_ops *fops;
8231
8232         if (flow_get_drv_type(dev, NULL) != MLX5_FLOW_TYPE_HW)
8233                 return rte_flow_error_set(error, ENOTSUP,
8234                                 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8235                                 NULL,
8236                                 "port configure with incorrect steering mode");
8237         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8238         return fops->configure(dev, port_attr, nb_queue, queue_attr, error);
8239 }
8240
8241 /**
8242  * Create flow item template.
8243  *
8244  * @param[in] dev
8245  *   Pointer to the rte_eth_dev structure.
8246  * @param[in] attr
8247  *   Pointer to the item template attributes.
8248  * @param[in] items
8249  *   The template item pattern.
8250  * @param[out] error
8251  *   Pointer to error structure.
8252  *
8253  * @return
8254  *   0 on success, a negative errno value otherwise and rte_errno is set.
8255  */
8256 static struct rte_flow_pattern_template *
8257 mlx5_flow_pattern_template_create(struct rte_eth_dev *dev,
8258                 const struct rte_flow_pattern_template_attr *attr,
8259                 const struct rte_flow_item items[],
8260                 struct rte_flow_error *error)
8261 {
8262         const struct mlx5_flow_driver_ops *fops;
8263
8264         if (flow_get_drv_type(dev, NULL) != MLX5_FLOW_TYPE_HW) {
8265                 rte_flow_error_set(error, ENOTSUP,
8266                                 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8267                                 NULL,
8268                                 "pattern create with incorrect steering mode");
8269                 return NULL;
8270         }
8271         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8272         return fops->pattern_template_create(dev, attr, items, error);
8273 }
8274
8275 /**
8276  * Destroy flow item template.
8277  *
8278  * @param[in] dev
8279  *   Pointer to the rte_eth_dev structure.
8280  * @param[in] template
8281  *   Pointer to the item template to be destroyed.
8282  * @param[out] error
8283  *   Pointer to error structure.
8284  *
8285  * @return
8286  *   0 on success, a negative errno value otherwise and rte_errno is set.
8287  */
8288 static int
8289 mlx5_flow_pattern_template_destroy(struct rte_eth_dev *dev,
8290                                    struct rte_flow_pattern_template *template,
8291                                    struct rte_flow_error *error)
8292 {
8293         const struct mlx5_flow_driver_ops *fops;
8294
8295         if (flow_get_drv_type(dev, NULL) != MLX5_FLOW_TYPE_HW)
8296                 return rte_flow_error_set(error, ENOTSUP,
8297                                 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8298                                 NULL,
8299                                 "pattern destroy with incorrect steering mode");
8300         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8301         return fops->pattern_template_destroy(dev, template, error);
8302 }
8303
8304 /**
8305  * Create flow item template.
8306  *
8307  * @param[in] dev
8308  *   Pointer to the rte_eth_dev structure.
8309  * @param[in] attr
8310  *   Pointer to the action template attributes.
8311  * @param[in] actions
8312  *   Associated actions (list terminated by the END action).
8313  * @param[in] masks
8314  *   List of actions that marks which of the action's member is constant.
8315  * @param[out] error
8316  *   Pointer to error structure.
8317  *
8318  * @return
8319  *   0 on success, a negative errno value otherwise and rte_errno is set.
8320  */
8321 static struct rte_flow_actions_template *
8322 mlx5_flow_actions_template_create(struct rte_eth_dev *dev,
8323                         const struct rte_flow_actions_template_attr *attr,
8324                         const struct rte_flow_action actions[],
8325                         const struct rte_flow_action masks[],
8326                         struct rte_flow_error *error)
8327 {
8328         const struct mlx5_flow_driver_ops *fops;
8329
8330         if (flow_get_drv_type(dev, NULL) != MLX5_FLOW_TYPE_HW) {
8331                 rte_flow_error_set(error, ENOTSUP,
8332                                 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8333                                 NULL,
8334                                 "action create with incorrect steering mode");
8335                 return NULL;
8336         }
8337         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8338         return fops->actions_template_create(dev, attr, actions, masks, error);
8339 }
8340
8341 /**
8342  * Destroy flow action template.
8343  *
8344  * @param[in] dev
8345  *   Pointer to the rte_eth_dev structure.
8346  * @param[in] template
8347  *   Pointer to the action template to be destroyed.
8348  * @param[out] error
8349  *   Pointer to error structure.
8350  *
8351  * @return
8352  *   0 on success, a negative errno value otherwise and rte_errno is set.
8353  */
8354 static int
8355 mlx5_flow_actions_template_destroy(struct rte_eth_dev *dev,
8356                                    struct rte_flow_actions_template *template,
8357                                    struct rte_flow_error *error)
8358 {
8359         const struct mlx5_flow_driver_ops *fops;
8360
8361         if (flow_get_drv_type(dev, NULL) != MLX5_FLOW_TYPE_HW)
8362                 return rte_flow_error_set(error, ENOTSUP,
8363                                 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8364                                 NULL,
8365                                 "action destroy with incorrect steering mode");
8366         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8367         return fops->actions_template_destroy(dev, template, error);
8368 }
8369
8370 /**
8371  * Create flow table.
8372  *
8373  * @param[in] dev
8374  *   Pointer to the rte_eth_dev structure.
8375  * @param[in] attr
8376  *   Pointer to the table attributes.
8377  * @param[in] item_templates
8378  *   Item template array to be binded to the table.
8379  * @param[in] nb_item_templates
8380  *   Number of item template.
8381  * @param[in] action_templates
8382  *   Action template array to be binded to the table.
8383  * @param[in] nb_action_templates
8384  *   Number of action template.
8385  * @param[out] error
8386  *   Pointer to error structure.
8387  *
8388  * @return
8389  *    Table on success, NULL otherwise and rte_errno is set.
8390  */
8391 static struct rte_flow_template_table *
8392 mlx5_flow_table_create(struct rte_eth_dev *dev,
8393                        const struct rte_flow_template_table_attr *attr,
8394                        struct rte_flow_pattern_template *item_templates[],
8395                        uint8_t nb_item_templates,
8396                        struct rte_flow_actions_template *action_templates[],
8397                        uint8_t nb_action_templates,
8398                        struct rte_flow_error *error)
8399 {
8400         const struct mlx5_flow_driver_ops *fops;
8401
8402         if (flow_get_drv_type(dev, NULL) != MLX5_FLOW_TYPE_HW) {
8403                 rte_flow_error_set(error, ENOTSUP,
8404                                 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8405                                 NULL,
8406                                 "table create with incorrect steering mode");
8407                 return NULL;
8408         }
8409         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8410         return fops->template_table_create(dev,
8411                                            attr,
8412                                            item_templates,
8413                                            nb_item_templates,
8414                                            action_templates,
8415                                            nb_action_templates,
8416                                            error);
8417 }
8418
8419 /**
8420  * PMD destroy flow table.
8421  *
8422  * @param[in] dev
8423  *   Pointer to the rte_eth_dev structure.
8424  * @param[in] table
8425  *   Pointer to the table to be destroyed.
8426  * @param[out] error
8427  *   Pointer to error structure.
8428  *
8429  * @return
8430  *   0 on success, a negative errno value otherwise and rte_errno is set.
8431  */
8432 static int
8433 mlx5_flow_table_destroy(struct rte_eth_dev *dev,
8434                         struct rte_flow_template_table *table,
8435                         struct rte_flow_error *error)
8436 {
8437         const struct mlx5_flow_driver_ops *fops;
8438
8439         if (flow_get_drv_type(dev, NULL) != MLX5_FLOW_TYPE_HW)
8440                 return rte_flow_error_set(error, ENOTSUP,
8441                                 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8442                                 NULL,
8443                                 "table destroy with incorrect steering mode");
8444         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8445         return fops->template_table_destroy(dev, table, error);
8446 }
8447
8448 /**
8449  * Enqueue flow creation.
8450  *
8451  * @param[in] dev
8452  *   Pointer to the rte_eth_dev structure.
8453  * @param[in] queue_id
8454  *   The queue to create the flow.
8455  * @param[in] attr
8456  *   Pointer to the flow operation attributes.
8457  * @param[in] items
8458  *   Items with flow spec value.
8459  * @param[in] pattern_template_index
8460  *   The item pattern flow follows from the table.
8461  * @param[in] actions
8462  *   Action with flow spec value.
8463  * @param[in] action_template_index
8464  *   The action pattern flow follows from the table.
8465  * @param[in] user_data
8466  *   Pointer to the user_data.
8467  * @param[out] error
8468  *   Pointer to error structure.
8469  *
8470  * @return
8471  *    Flow pointer on success, NULL otherwise and rte_errno is set.
8472  */
8473 static struct rte_flow *
8474 mlx5_flow_async_flow_create(struct rte_eth_dev *dev,
8475                             uint32_t queue_id,
8476                             const struct rte_flow_op_attr *attr,
8477                             struct rte_flow_template_table *table,
8478                             const struct rte_flow_item items[],
8479                             uint8_t pattern_template_index,
8480                             const struct rte_flow_action actions[],
8481                             uint8_t action_template_index,
8482                             void *user_data,
8483                             struct rte_flow_error *error)
8484 {
8485         const struct mlx5_flow_driver_ops *fops;
8486
8487         if (flow_get_drv_type(dev, NULL) != MLX5_FLOW_TYPE_HW) {
8488                 rte_flow_error_set(error, ENOTSUP,
8489                                 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8490                                 NULL,
8491                                 "flow_q create with incorrect steering mode");
8492                 return NULL;
8493         }
8494         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8495         return fops->async_flow_create(dev, queue_id, attr, table,
8496                                        items, pattern_template_index,
8497                                        actions, action_template_index,
8498                                        user_data, error);
8499 }
8500
8501 /**
8502  * Enqueue flow destruction.
8503  *
8504  * @param[in] dev
8505  *   Pointer to the rte_eth_dev structure.
8506  * @param[in] queue
8507  *   The queue to destroy the flow.
8508  * @param[in] attr
8509  *   Pointer to the flow operation attributes.
8510  * @param[in] flow
8511  *   Pointer to the flow to be destroyed.
8512  * @param[in] user_data
8513  *   Pointer to the user_data.
8514  * @param[out] error
8515  *   Pointer to error structure.
8516  *
8517  * @return
8518  *    0 on success, negative value otherwise and rte_errno is set.
8519  */
8520 static int
8521 mlx5_flow_async_flow_destroy(struct rte_eth_dev *dev,
8522                              uint32_t queue,
8523                              const struct rte_flow_op_attr *attr,
8524                              struct rte_flow *flow,
8525                              void *user_data,
8526                              struct rte_flow_error *error)
8527 {
8528         const struct mlx5_flow_driver_ops *fops;
8529
8530         if (flow_get_drv_type(dev, NULL) != MLX5_FLOW_TYPE_HW)
8531                 return rte_flow_error_set(error, ENOTSUP,
8532                                 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8533                                 NULL,
8534                                 "flow_q destroy with incorrect steering mode");
8535         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8536         return fops->async_flow_destroy(dev, queue, attr, flow,
8537                                         user_data, error);
8538 }
8539
8540 /**
8541  * Pull the enqueued flows.
8542  *
8543  * @param[in] dev
8544  *   Pointer to the rte_eth_dev structure.
8545  * @param[in] queue
8546  *   The queue to pull the result.
8547  * @param[in/out] res
8548  *   Array to save the results.
8549  * @param[in] n_res
8550  *   Available result with the array.
8551  * @param[out] error
8552  *   Pointer to error structure.
8553  *
8554  * @return
8555  *    Result number on success, negative value otherwise and rte_errno is set.
8556  */
8557 static int
8558 mlx5_flow_pull(struct rte_eth_dev *dev,
8559                uint32_t queue,
8560                struct rte_flow_op_result res[],
8561                uint16_t n_res,
8562                struct rte_flow_error *error)
8563 {
8564         const struct mlx5_flow_driver_ops *fops;
8565
8566         if (flow_get_drv_type(dev, NULL) != MLX5_FLOW_TYPE_HW)
8567                 return rte_flow_error_set(error, ENOTSUP,
8568                                 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8569                                 NULL,
8570                                 "flow_q pull with incorrect steering mode");
8571         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8572         return fops->pull(dev, queue, res, n_res, error);
8573 }
8574
8575 /**
8576  * Push the enqueued flows.
8577  *
8578  * @param[in] dev
8579  *   Pointer to the rte_eth_dev structure.
8580  * @param[in] queue
8581  *   The queue to push the flows.
8582  * @param[out] error
8583  *   Pointer to error structure.
8584  *
8585  * @return
8586  *    0 on success, negative value otherwise and rte_errno is set.
8587  */
8588 static int
8589 mlx5_flow_push(struct rte_eth_dev *dev,
8590                uint32_t queue,
8591                struct rte_flow_error *error)
8592 {
8593         const struct mlx5_flow_driver_ops *fops;
8594
8595         if (flow_get_drv_type(dev, NULL) != MLX5_FLOW_TYPE_HW)
8596                 return rte_flow_error_set(error, ENOTSUP,
8597                                 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
8598                                 NULL,
8599                                 "flow_q push with incorrect steering mode");
8600         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8601         return fops->push(dev, queue, error);
8602 }
8603
8604 /**
8605  * Create shared action.
8606  *
8607  * @param[in] dev
8608  *   Pointer to the rte_eth_dev structure.
8609  * @param[in] queue
8610  *   Which queue to be used..
8611  * @param[in] attr
8612  *   Operation attribute.
8613  * @param[in] conf
8614  *   Indirect action configuration.
8615  * @param[in] action
8616  *   rte_flow action detail.
8617  * @param[in] user_data
8618  *   Pointer to the user_data.
8619  * @param[out] error
8620  *   Pointer to error structure.
8621  *
8622  * @return
8623  *   Action handle on success, NULL otherwise and rte_errno is set.
8624  */
8625 static struct rte_flow_action_handle *
8626 mlx5_flow_async_action_handle_create(struct rte_eth_dev *dev, uint32_t queue,
8627                                  const struct rte_flow_op_attr *attr,
8628                                  const struct rte_flow_indir_action_conf *conf,
8629                                  const struct rte_flow_action *action,
8630                                  void *user_data,
8631                                  struct rte_flow_error *error)
8632 {
8633         const struct mlx5_flow_driver_ops *fops =
8634                         flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8635
8636         return fops->async_action_create(dev, queue, attr, conf, action,
8637                                          user_data, error);
8638 }
8639
8640 /**
8641  * Update shared action.
8642  *
8643  * @param[in] dev
8644  *   Pointer to the rte_eth_dev structure.
8645  * @param[in] queue
8646  *   Which queue to be used..
8647  * @param[in] attr
8648  *   Operation attribute.
8649  * @param[in] handle
8650  *   Action handle to be updated.
8651  * @param[in] update
8652  *   Update value.
8653  * @param[in] user_data
8654  *   Pointer to the user_data.
8655  * @param[out] error
8656  *   Pointer to error structure.
8657  *
8658  * @return
8659  *   0 on success, negative value otherwise and rte_errno is set.
8660  */
8661 static int
8662 mlx5_flow_async_action_handle_update(struct rte_eth_dev *dev, uint32_t queue,
8663                                      const struct rte_flow_op_attr *attr,
8664                                      struct rte_flow_action_handle *handle,
8665                                      const void *update,
8666                                      void *user_data,
8667                                      struct rte_flow_error *error)
8668 {
8669         const struct mlx5_flow_driver_ops *fops =
8670                         flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8671
8672         return fops->async_action_update(dev, queue, attr, handle,
8673                                          update, user_data, error);
8674 }
8675
8676 /**
8677  * Destroy shared action.
8678  *
8679  * @param[in] dev
8680  *   Pointer to the rte_eth_dev structure.
8681  * @param[in] queue
8682  *   Which queue to be used..
8683  * @param[in] attr
8684  *   Operation attribute.
8685  * @param[in] handle
8686  *   Action handle to be destroyed.
8687  * @param[in] user_data
8688  *   Pointer to the user_data.
8689  * @param[out] error
8690  *   Pointer to error structure.
8691  *
8692  * @return
8693  *   0 on success, negative value otherwise and rte_errno is set.
8694  */
8695 static int
8696 mlx5_flow_async_action_handle_destroy(struct rte_eth_dev *dev, uint32_t queue,
8697                                       const struct rte_flow_op_attr *attr,
8698                                       struct rte_flow_action_handle *handle,
8699                                       void *user_data,
8700                                       struct rte_flow_error *error)
8701 {
8702         const struct mlx5_flow_driver_ops *fops =
8703                         flow_get_drv_ops(MLX5_FLOW_TYPE_HW);
8704
8705         return fops->async_action_destroy(dev, queue, attr, handle,
8706                                           user_data, error);
8707 }
8708
8709 /**
8710  * Allocate a new memory for the counter values wrapped by all the needed
8711  * management.
8712  *
8713  * @param[in] sh
8714  *   Pointer to mlx5_dev_ctx_shared object.
8715  *
8716  * @return
8717  *   0 on success, a negative errno value otherwise.
8718  */
8719 static int
8720 mlx5_flow_create_counter_stat_mem_mng(struct mlx5_dev_ctx_shared *sh)
8721 {
8722         struct mlx5_counter_stats_mem_mng *mem_mng;
8723         volatile struct flow_counter_stats *raw_data;
8724         int raws_n = MLX5_CNT_CONTAINER_RESIZE + MLX5_MAX_PENDING_QUERIES;
8725         int size = (sizeof(struct flow_counter_stats) *
8726                         MLX5_COUNTERS_PER_POOL +
8727                         sizeof(struct mlx5_counter_stats_raw)) * raws_n +
8728                         sizeof(struct mlx5_counter_stats_mem_mng);
8729         size_t pgsize = rte_mem_page_size();
8730         uint8_t *mem;
8731         int ret;
8732         int i;
8733
8734         if (pgsize == (size_t)-1) {
8735                 DRV_LOG(ERR, "Failed to get mem page size");
8736                 rte_errno = ENOMEM;
8737                 return -ENOMEM;
8738         }
8739         mem = mlx5_malloc(MLX5_MEM_ZERO, size, pgsize, SOCKET_ID_ANY);
8740         if (!mem) {
8741                 rte_errno = ENOMEM;
8742                 return -ENOMEM;
8743         }
8744         mem_mng = (struct mlx5_counter_stats_mem_mng *)(mem + size) - 1;
8745         size = sizeof(*raw_data) * MLX5_COUNTERS_PER_POOL * raws_n;
8746         ret = mlx5_os_wrapped_mkey_create(sh->cdev->ctx, sh->cdev->pd,
8747                                           sh->cdev->pdn, mem, size,
8748                                           &mem_mng->wm);
8749         if (ret) {
8750                 rte_errno = errno;
8751                 mlx5_free(mem);
8752                 return -rte_errno;
8753         }
8754         mem_mng->raws = (struct mlx5_counter_stats_raw *)(mem + size);
8755         raw_data = (volatile struct flow_counter_stats *)mem;
8756         for (i = 0; i < raws_n; ++i) {
8757                 mem_mng->raws[i].mem_mng = mem_mng;
8758                 mem_mng->raws[i].data = raw_data + i * MLX5_COUNTERS_PER_POOL;
8759         }
8760         for (i = 0; i < MLX5_MAX_PENDING_QUERIES; ++i)
8761                 LIST_INSERT_HEAD(&sh->cmng.free_stat_raws,
8762                                  mem_mng->raws + MLX5_CNT_CONTAINER_RESIZE + i,
8763                                  next);
8764         LIST_INSERT_HEAD(&sh->cmng.mem_mngs, mem_mng, next);
8765         sh->cmng.mem_mng = mem_mng;
8766         return 0;
8767 }
8768
8769 /**
8770  * Set the statistic memory to the new counter pool.
8771  *
8772  * @param[in] sh
8773  *   Pointer to mlx5_dev_ctx_shared object.
8774  * @param[in] pool
8775  *   Pointer to the pool to set the statistic memory.
8776  *
8777  * @return
8778  *   0 on success, a negative errno value otherwise.
8779  */
8780 static int
8781 mlx5_flow_set_counter_stat_mem(struct mlx5_dev_ctx_shared *sh,
8782                                struct mlx5_flow_counter_pool *pool)
8783 {
8784         struct mlx5_flow_counter_mng *cmng = &sh->cmng;
8785         /* Resize statistic memory once used out. */
8786         if (!(pool->index % MLX5_CNT_CONTAINER_RESIZE) &&
8787             mlx5_flow_create_counter_stat_mem_mng(sh)) {
8788                 DRV_LOG(ERR, "Cannot resize counter stat mem.");
8789                 return -1;
8790         }
8791         rte_spinlock_lock(&pool->sl);
8792         pool->raw = cmng->mem_mng->raws + pool->index %
8793                     MLX5_CNT_CONTAINER_RESIZE;
8794         rte_spinlock_unlock(&pool->sl);
8795         pool->raw_hw = NULL;
8796         return 0;
8797 }
8798
8799 #define MLX5_POOL_QUERY_FREQ_US 1000000
8800
8801 /**
8802  * Set the periodic procedure for triggering asynchronous batch queries for all
8803  * the counter pools.
8804  *
8805  * @param[in] sh
8806  *   Pointer to mlx5_dev_ctx_shared object.
8807  */
8808 void
8809 mlx5_set_query_alarm(struct mlx5_dev_ctx_shared *sh)
8810 {
8811         uint32_t pools_n, us;
8812
8813         pools_n = __atomic_load_n(&sh->cmng.n_valid, __ATOMIC_RELAXED);
8814         us = MLX5_POOL_QUERY_FREQ_US / pools_n;
8815         DRV_LOG(DEBUG, "Set alarm for %u pools each %u us", pools_n, us);
8816         if (rte_eal_alarm_set(us, mlx5_flow_query_alarm, sh)) {
8817                 sh->cmng.query_thread_on = 0;
8818                 DRV_LOG(ERR, "Cannot reinitialize query alarm");
8819         } else {
8820                 sh->cmng.query_thread_on = 1;
8821         }
8822 }
8823
8824 /**
8825  * The periodic procedure for triggering asynchronous batch queries for all the
8826  * counter pools. This function is probably called by the host thread.
8827  *
8828  * @param[in] arg
8829  *   The parameter for the alarm process.
8830  */
8831 void
8832 mlx5_flow_query_alarm(void *arg)
8833 {
8834         struct mlx5_dev_ctx_shared *sh = arg;
8835         int ret;
8836         uint16_t pool_index = sh->cmng.pool_index;
8837         struct mlx5_flow_counter_mng *cmng = &sh->cmng;
8838         struct mlx5_flow_counter_pool *pool;
8839         uint16_t n_valid;
8840
8841         if (sh->cmng.pending_queries >= MLX5_MAX_PENDING_QUERIES)
8842                 goto set_alarm;
8843         rte_spinlock_lock(&cmng->pool_update_sl);
8844         pool = cmng->pools[pool_index];
8845         n_valid = cmng->n_valid;
8846         rte_spinlock_unlock(&cmng->pool_update_sl);
8847         /* Set the statistic memory to the new created pool. */
8848         if ((!pool->raw && mlx5_flow_set_counter_stat_mem(sh, pool)))
8849                 goto set_alarm;
8850         if (pool->raw_hw)
8851                 /* There is a pool query in progress. */
8852                 goto set_alarm;
8853         pool->raw_hw =
8854                 LIST_FIRST(&sh->cmng.free_stat_raws);
8855         if (!pool->raw_hw)
8856                 /* No free counter statistics raw memory. */
8857                 goto set_alarm;
8858         /*
8859          * Identify the counters released between query trigger and query
8860          * handle more efficiently. The counter released in this gap period
8861          * should wait for a new round of query as the new arrived packets
8862          * will not be taken into account.
8863          */
8864         pool->query_gen++;
8865         ret = mlx5_devx_cmd_flow_counter_query(pool->min_dcs, 0,
8866                                                MLX5_COUNTERS_PER_POOL,
8867                                                NULL, NULL,
8868                                                pool->raw_hw->mem_mng->wm.lkey,
8869                                                (void *)(uintptr_t)
8870                                                pool->raw_hw->data,
8871                                                sh->devx_comp,
8872                                                (uint64_t)(uintptr_t)pool);
8873         if (ret) {
8874                 DRV_LOG(ERR, "Failed to trigger asynchronous query for dcs ID"
8875                         " %d", pool->min_dcs->id);
8876                 pool->raw_hw = NULL;
8877                 goto set_alarm;
8878         }
8879         LIST_REMOVE(pool->raw_hw, next);
8880         sh->cmng.pending_queries++;
8881         pool_index++;
8882         if (pool_index >= n_valid)
8883                 pool_index = 0;
8884 set_alarm:
8885         sh->cmng.pool_index = pool_index;
8886         mlx5_set_query_alarm(sh);
8887 }
8888
8889 /**
8890  * Check and callback event for new aged flow in the counter pool
8891  *
8892  * @param[in] sh
8893  *   Pointer to mlx5_dev_ctx_shared object.
8894  * @param[in] pool
8895  *   Pointer to Current counter pool.
8896  */
8897 static void
8898 mlx5_flow_aging_check(struct mlx5_dev_ctx_shared *sh,
8899                    struct mlx5_flow_counter_pool *pool)
8900 {
8901         struct mlx5_priv *priv;
8902         struct mlx5_flow_counter *cnt;
8903         struct mlx5_age_info *age_info;
8904         struct mlx5_age_param *age_param;
8905         struct mlx5_counter_stats_raw *cur = pool->raw_hw;
8906         struct mlx5_counter_stats_raw *prev = pool->raw;
8907         const uint64_t curr_time = MLX5_CURR_TIME_SEC;
8908         const uint32_t time_delta = curr_time - pool->time_of_last_age_check;
8909         uint16_t expected = AGE_CANDIDATE;
8910         uint32_t i;
8911
8912         pool->time_of_last_age_check = curr_time;
8913         for (i = 0; i < MLX5_COUNTERS_PER_POOL; ++i) {
8914                 cnt = MLX5_POOL_GET_CNT(pool, i);
8915                 age_param = MLX5_CNT_TO_AGE(cnt);
8916                 if (__atomic_load_n(&age_param->state,
8917                                     __ATOMIC_RELAXED) != AGE_CANDIDATE)
8918                         continue;
8919                 if (cur->data[i].hits != prev->data[i].hits) {
8920                         __atomic_store_n(&age_param->sec_since_last_hit, 0,
8921                                          __ATOMIC_RELAXED);
8922                         continue;
8923                 }
8924                 if (__atomic_add_fetch(&age_param->sec_since_last_hit,
8925                                        time_delta,
8926                                        __ATOMIC_RELAXED) <= age_param->timeout)
8927                         continue;
8928                 /**
8929                  * Hold the lock first, or if between the
8930                  * state AGE_TMOUT and tailq operation the
8931                  * release happened, the release procedure
8932                  * may delete a non-existent tailq node.
8933                  */
8934                 priv = rte_eth_devices[age_param->port_id].data->dev_private;
8935                 age_info = GET_PORT_AGE_INFO(priv);
8936                 rte_spinlock_lock(&age_info->aged_sl);
8937                 if (__atomic_compare_exchange_n(&age_param->state, &expected,
8938                                                 AGE_TMOUT, false,
8939                                                 __ATOMIC_RELAXED,
8940                                                 __ATOMIC_RELAXED)) {
8941                         TAILQ_INSERT_TAIL(&age_info->aged_counters, cnt, next);
8942                         MLX5_AGE_SET(age_info, MLX5_AGE_EVENT_NEW);
8943                 }
8944                 rte_spinlock_unlock(&age_info->aged_sl);
8945         }
8946         mlx5_age_event_prepare(sh);
8947 }
8948
8949 /**
8950  * Handler for the HW respond about ready values from an asynchronous batch
8951  * query. This function is probably called by the host thread.
8952  *
8953  * @param[in] sh
8954  *   The pointer to the shared device context.
8955  * @param[in] async_id
8956  *   The Devx async ID.
8957  * @param[in] status
8958  *   The status of the completion.
8959  */
8960 void
8961 mlx5_flow_async_pool_query_handle(struct mlx5_dev_ctx_shared *sh,
8962                                   uint64_t async_id, int status)
8963 {
8964         struct mlx5_flow_counter_pool *pool =
8965                 (struct mlx5_flow_counter_pool *)(uintptr_t)async_id;
8966         struct mlx5_counter_stats_raw *raw_to_free;
8967         uint8_t query_gen = pool->query_gen ^ 1;
8968         struct mlx5_flow_counter_mng *cmng = &sh->cmng;
8969         enum mlx5_counter_type cnt_type =
8970                 pool->is_aged ? MLX5_COUNTER_TYPE_AGE :
8971                                 MLX5_COUNTER_TYPE_ORIGIN;
8972
8973         if (unlikely(status)) {
8974                 raw_to_free = pool->raw_hw;
8975         } else {
8976                 raw_to_free = pool->raw;
8977                 if (pool->is_aged)
8978                         mlx5_flow_aging_check(sh, pool);
8979                 rte_spinlock_lock(&pool->sl);
8980                 pool->raw = pool->raw_hw;
8981                 rte_spinlock_unlock(&pool->sl);
8982                 /* Be sure the new raw counters data is updated in memory. */
8983                 rte_io_wmb();
8984                 if (!TAILQ_EMPTY(&pool->counters[query_gen])) {
8985                         rte_spinlock_lock(&cmng->csl[cnt_type]);
8986                         TAILQ_CONCAT(&cmng->counters[cnt_type],
8987                                      &pool->counters[query_gen], next);
8988                         rte_spinlock_unlock(&cmng->csl[cnt_type]);
8989                 }
8990         }
8991         LIST_INSERT_HEAD(&sh->cmng.free_stat_raws, raw_to_free, next);
8992         pool->raw_hw = NULL;
8993         sh->cmng.pending_queries--;
8994 }
8995
8996 static int
8997 flow_group_to_table(uint32_t port_id, uint32_t group, uint32_t *table,
8998                     const struct flow_grp_info *grp_info,
8999                     struct rte_flow_error *error)
9000 {
9001         if (grp_info->transfer && grp_info->external &&
9002             grp_info->fdb_def_rule) {
9003                 if (group == UINT32_MAX)
9004                         return rte_flow_error_set
9005                                                 (error, EINVAL,
9006                                                  RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
9007                                                  NULL,
9008                                                  "group index not supported");
9009                 *table = group + 1;
9010         } else {
9011                 *table = group;
9012         }
9013         DRV_LOG(DEBUG, "port %u group=%#x table=%#x", port_id, group, *table);
9014         return 0;
9015 }
9016
9017 /**
9018  * Translate the rte_flow group index to HW table value.
9019  *
9020  * If tunnel offload is disabled, all group ids converted to flow table
9021  * id using the standard method.
9022  * If tunnel offload is enabled, group id can be converted using the
9023  * standard or tunnel conversion method. Group conversion method
9024  * selection depends on flags in `grp_info` parameter:
9025  * - Internal (grp_info.external == 0) groups conversion uses the
9026  *   standard method.
9027  * - Group ids in JUMP action converted with the tunnel conversion.
9028  * - Group id in rule attribute conversion depends on a rule type and
9029  *   group id value:
9030  *   ** non zero group attributes converted with the tunnel method
9031  *   ** zero group attribute in non-tunnel rule is converted using the
9032  *      standard method - there's only one root table
9033  *   ** zero group attribute in steer tunnel rule is converted with the
9034  *      standard method - single root table
9035  *   ** zero group attribute in match tunnel rule is a special OvS
9036  *      case: that value is used for portability reasons. That group
9037  *      id is converted with the tunnel conversion method.
9038  *
9039  * @param[in] dev
9040  *   Port device
9041  * @param[in] tunnel
9042  *   PMD tunnel offload object
9043  * @param[in] group
9044  *   rte_flow group index value.
9045  * @param[out] table
9046  *   HW table value.
9047  * @param[in] grp_info
9048  *   flags used for conversion
9049  * @param[out] error
9050  *   Pointer to error structure.
9051  *
9052  * @return
9053  *   0 on success, a negative errno value otherwise and rte_errno is set.
9054  */
9055 int
9056 mlx5_flow_group_to_table(struct rte_eth_dev *dev,
9057                          const struct mlx5_flow_tunnel *tunnel,
9058                          uint32_t group, uint32_t *table,
9059                          const struct flow_grp_info *grp_info,
9060                          struct rte_flow_error *error)
9061 {
9062         int ret;
9063         bool standard_translation;
9064
9065         if (!grp_info->skip_scale && grp_info->external &&
9066             group < MLX5_MAX_TABLES_EXTERNAL)
9067                 group *= MLX5_FLOW_TABLE_FACTOR;
9068         if (is_tunnel_offload_active(dev)) {
9069                 standard_translation = !grp_info->external ||
9070                                         grp_info->std_tbl_fix;
9071         } else {
9072                 standard_translation = true;
9073         }
9074         DRV_LOG(DEBUG,
9075                 "port %u group=%u transfer=%d external=%d fdb_def_rule=%d translate=%s",
9076                 dev->data->port_id, group, grp_info->transfer,
9077                 grp_info->external, grp_info->fdb_def_rule,
9078                 standard_translation ? "STANDARD" : "TUNNEL");
9079         if (standard_translation)
9080                 ret = flow_group_to_table(dev->data->port_id, group, table,
9081                                           grp_info, error);
9082         else
9083                 ret = tunnel_flow_group_to_flow_table(dev, tunnel, group,
9084                                                       table, error);
9085
9086         return ret;
9087 }
9088
9089 /**
9090  * Discover availability of metadata reg_c's.
9091  *
9092  * Iteratively use test flows to check availability.
9093  *
9094  * @param[in] dev
9095  *   Pointer to the Ethernet device structure.
9096  *
9097  * @return
9098  *   0 on success, a negative errno value otherwise and rte_errno is set.
9099  */
9100 int
9101 mlx5_flow_discover_mreg_c(struct rte_eth_dev *dev)
9102 {
9103         struct mlx5_priv *priv = dev->data->dev_private;
9104         enum modify_reg idx;
9105         int n = 0;
9106
9107         /* reg_c[0] and reg_c[1] are reserved. */
9108         priv->sh->flow_mreg_c[n++] = REG_C_0;
9109         priv->sh->flow_mreg_c[n++] = REG_C_1;
9110         /* Discover availability of other reg_c's. */
9111         for (idx = REG_C_2; idx <= REG_C_7; ++idx) {
9112                 struct rte_flow_attr attr = {
9113                         .group = MLX5_FLOW_MREG_CP_TABLE_GROUP,
9114                         .priority = MLX5_FLOW_LOWEST_PRIO_INDICATOR,
9115                         .ingress = 1,
9116                 };
9117                 struct rte_flow_item items[] = {
9118                         [0] = {
9119                                 .type = RTE_FLOW_ITEM_TYPE_END,
9120                         },
9121                 };
9122                 struct rte_flow_action actions[] = {
9123                         [0] = {
9124                                 .type = (enum rte_flow_action_type)
9125                                         MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
9126                                 .conf = &(struct mlx5_flow_action_copy_mreg){
9127                                         .src = REG_C_1,
9128                                         .dst = idx,
9129                                 },
9130                         },
9131                         [1] = {
9132                                 .type = RTE_FLOW_ACTION_TYPE_JUMP,
9133                                 .conf = &(struct rte_flow_action_jump){
9134                                         .group = MLX5_FLOW_MREG_ACT_TABLE_GROUP,
9135                                 },
9136                         },
9137                         [2] = {
9138                                 .type = RTE_FLOW_ACTION_TYPE_END,
9139                         },
9140                 };
9141                 uint32_t flow_idx;
9142                 struct rte_flow *flow;
9143                 struct rte_flow_error error;
9144
9145                 if (!priv->sh->config.dv_flow_en)
9146                         break;
9147                 /* Create internal flow, validation skips copy action. */
9148                 flow_idx = flow_list_create(dev, MLX5_FLOW_TYPE_GEN, &attr,
9149                                         items, actions, false, &error);
9150                 flow = mlx5_ipool_get(priv->flows[MLX5_FLOW_TYPE_GEN],
9151                                       flow_idx);
9152                 if (!flow)
9153                         continue;
9154                 priv->sh->flow_mreg_c[n++] = idx;
9155                 flow_list_destroy(dev, MLX5_FLOW_TYPE_GEN, flow_idx);
9156         }
9157         for (; n < MLX5_MREG_C_NUM; ++n)
9158                 priv->sh->flow_mreg_c[n] = REG_NON;
9159         priv->sh->metadata_regc_check_flag = 1;
9160         return 0;
9161 }
9162
9163 int
9164 save_dump_file(const uint8_t *data, uint32_t size,
9165         uint32_t type, uint64_t id, void *arg, FILE *file)
9166 {
9167         char line[BUF_SIZE];
9168         uint32_t out = 0;
9169         uint32_t k;
9170         uint32_t actions_num;
9171         struct rte_flow_query_count *count;
9172
9173         memset(line, 0, BUF_SIZE);
9174         switch (type) {
9175         case DR_DUMP_REC_TYPE_PMD_MODIFY_HDR:
9176                 actions_num = *(uint32_t *)(arg);
9177                 out += snprintf(line + out, BUF_SIZE - out, "%d,0x%" PRIx64 ",%d,",
9178                                 type, id, actions_num);
9179                 break;
9180         case DR_DUMP_REC_TYPE_PMD_PKT_REFORMAT:
9181                 out += snprintf(line + out, BUF_SIZE - out, "%d,0x%" PRIx64 ",",
9182                                 type, id);
9183                 break;
9184         case DR_DUMP_REC_TYPE_PMD_COUNTER:
9185                 count = (struct rte_flow_query_count *)arg;
9186                 fprintf(file,
9187                         "%d,0x%" PRIx64 ",%" PRIu64 ",%" PRIu64 "\n",
9188                         type, id, count->hits, count->bytes);
9189                 return 0;
9190         default:
9191                 return -1;
9192         }
9193
9194         for (k = 0; k < size; k++) {
9195                 /* Make sure we do not overrun the line buffer length. */
9196                 if (out >= BUF_SIZE - 4) {
9197                         line[out] = '\0';
9198                         break;
9199                 }
9200                 out += snprintf(line + out, BUF_SIZE - out, "%02x",
9201                                 (data[k]) & 0xff);
9202         }
9203         fprintf(file, "%s\n", line);
9204         return 0;
9205 }
9206
9207 int
9208 mlx5_flow_query_counter(struct rte_eth_dev *dev, struct rte_flow *flow,
9209         struct rte_flow_query_count *count, struct rte_flow_error *error)
9210 {
9211         struct rte_flow_action action[2];
9212         enum mlx5_flow_drv_type ftype;
9213         const struct mlx5_flow_driver_ops *fops;
9214
9215         if (!flow) {
9216                 return rte_flow_error_set(error, ENOENT,
9217                                 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
9218                                 NULL,
9219                                 "invalid flow handle");
9220         }
9221         action[0].type = RTE_FLOW_ACTION_TYPE_COUNT;
9222         action[1].type = RTE_FLOW_ACTION_TYPE_END;
9223         if (flow->counter) {
9224                 memset(count, 0, sizeof(struct rte_flow_query_count));
9225                 ftype = (enum mlx5_flow_drv_type)(flow->drv_type);
9226                 MLX5_ASSERT(ftype > MLX5_FLOW_TYPE_MIN &&
9227                                                 ftype < MLX5_FLOW_TYPE_MAX);
9228                 fops = flow_get_drv_ops(ftype);
9229                 return fops->query(dev, flow, action, count, error);
9230         }
9231         return -1;
9232 }
9233
9234 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
9235 /**
9236  * Dump flow ipool data to file
9237  *
9238  * @param[in] dev
9239  *   The pointer to Ethernet device.
9240  * @param[in] file
9241  *   A pointer to a file for output.
9242  * @param[out] error
9243  *   Perform verbose error reporting if not NULL. PMDs initialize this
9244  *   structure in case of error only.
9245  * @return
9246  *   0 on success, a negative value otherwise.
9247  */
9248 int
9249 mlx5_flow_dev_dump_ipool(struct rte_eth_dev *dev,
9250         struct rte_flow *flow, FILE *file,
9251         struct rte_flow_error *error)
9252 {
9253         struct mlx5_priv *priv = dev->data->dev_private;
9254         struct mlx5_flow_dv_modify_hdr_resource  *modify_hdr;
9255         struct mlx5_flow_dv_encap_decap_resource *encap_decap;
9256         uint32_t handle_idx;
9257         struct mlx5_flow_handle *dh;
9258         struct rte_flow_query_count count;
9259         uint32_t actions_num;
9260         const uint8_t *data;
9261         size_t size;
9262         uint64_t id;
9263         uint32_t type;
9264         void *action = NULL;
9265
9266         if (!flow) {
9267                 return rte_flow_error_set(error, ENOENT,
9268                                 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
9269                                 NULL,
9270                                 "invalid flow handle");
9271         }
9272         handle_idx = flow->dev_handles;
9273         /* query counter */
9274         if (flow->counter &&
9275         (!mlx5_counter_query(dev, flow->counter, false,
9276         &count.hits, &count.bytes, &action)) && action) {
9277                 id = (uint64_t)(uintptr_t)action;
9278                 type = DR_DUMP_REC_TYPE_PMD_COUNTER;
9279                 save_dump_file(NULL, 0, type,
9280                         id, (void *)&count, file);
9281         }
9282
9283         while (handle_idx) {
9284                 dh = mlx5_ipool_get(priv->sh->ipool
9285                                 [MLX5_IPOOL_MLX5_FLOW], handle_idx);
9286                 if (!dh)
9287                         continue;
9288                 handle_idx = dh->next.next;
9289
9290                 /* Get modify_hdr and encap_decap buf from ipools. */
9291                 encap_decap = NULL;
9292                 modify_hdr = dh->dvh.modify_hdr;
9293
9294                 if (dh->dvh.rix_encap_decap) {
9295                         encap_decap = mlx5_ipool_get(priv->sh->ipool
9296                                                 [MLX5_IPOOL_DECAP_ENCAP],
9297                                                 dh->dvh.rix_encap_decap);
9298                 }
9299                 if (modify_hdr) {
9300                         data = (const uint8_t *)modify_hdr->actions;
9301                         size = (size_t)(modify_hdr->actions_num) * 8;
9302                         id = (uint64_t)(uintptr_t)modify_hdr->action;
9303                         actions_num = modify_hdr->actions_num;
9304                         type = DR_DUMP_REC_TYPE_PMD_MODIFY_HDR;
9305                         save_dump_file(data, size, type, id,
9306                                                 (void *)(&actions_num), file);
9307                 }
9308                 if (encap_decap) {
9309                         data = encap_decap->buf;
9310                         size = encap_decap->size;
9311                         id = (uint64_t)(uintptr_t)encap_decap->action;
9312                         type = DR_DUMP_REC_TYPE_PMD_PKT_REFORMAT;
9313                         save_dump_file(data, size, type,
9314                                                 id, NULL, file);
9315                 }
9316         }
9317         return 0;
9318 }
9319
9320 /**
9321  * Dump all flow's encap_decap/modify_hdr/counter data to file
9322  *
9323  * @param[in] dev
9324  *   The pointer to Ethernet device.
9325  * @param[in] file
9326  *   A pointer to a file for output.
9327  * @param[out] error
9328  *   Perform verbose error reporting if not NULL. PMDs initialize this
9329  *   structure in case of error only.
9330  * @return
9331  *   0 on success, a negative value otherwise.
9332  */
9333 static int
9334 mlx5_flow_dev_dump_sh_all(struct rte_eth_dev *dev,
9335         FILE *file, struct rte_flow_error *error __rte_unused)
9336 {
9337         struct mlx5_priv *priv = dev->data->dev_private;
9338         struct mlx5_dev_ctx_shared *sh = priv->sh;
9339         struct mlx5_hlist *h;
9340         struct mlx5_flow_dv_modify_hdr_resource  *modify_hdr;
9341         struct mlx5_flow_dv_encap_decap_resource *encap_decap;
9342         struct rte_flow_query_count count;
9343         uint32_t actions_num;
9344         const uint8_t *data;
9345         size_t size;
9346         uint64_t id;
9347         uint32_t type;
9348         uint32_t i;
9349         uint32_t j;
9350         struct mlx5_list_inconst *l_inconst;
9351         struct mlx5_list_entry *e;
9352         int lcore_index;
9353         struct mlx5_flow_counter_mng *cmng = &priv->sh->cmng;
9354         uint32_t max;
9355         void *action;
9356
9357         /* encap_decap hlist is lcore_share, get global core cache. */
9358         i = MLX5_LIST_GLOBAL;
9359         h = sh->encaps_decaps;
9360         if (h) {
9361                 for (j = 0; j <= h->mask; j++) {
9362                         l_inconst = &h->buckets[j].l;
9363                         if (!l_inconst || !l_inconst->cache[i])
9364                                 continue;
9365
9366                         e = LIST_FIRST(&l_inconst->cache[i]->h);
9367                         while (e) {
9368                                 encap_decap =
9369                                 (struct mlx5_flow_dv_encap_decap_resource *)e;
9370                                 data = encap_decap->buf;
9371                                 size = encap_decap->size;
9372                                 id = (uint64_t)(uintptr_t)encap_decap->action;
9373                                 type = DR_DUMP_REC_TYPE_PMD_PKT_REFORMAT;
9374                                 save_dump_file(data, size, type,
9375                                         id, NULL, file);
9376                                 e = LIST_NEXT(e, next);
9377                         }
9378                 }
9379         }
9380
9381         /* get modify_hdr */
9382         h = sh->modify_cmds;
9383         if (h) {
9384                 lcore_index = rte_lcore_index(rte_lcore_id());
9385                 if (unlikely(lcore_index == -1)) {
9386                         lcore_index = MLX5_LIST_NLCORE;
9387                         rte_spinlock_lock(&h->l_const.lcore_lock);
9388                 }
9389                 i = lcore_index;
9390
9391                 for (j = 0; j <= h->mask; j++) {
9392                         l_inconst = &h->buckets[j].l;
9393                         if (!l_inconst || !l_inconst->cache[i])
9394                                 continue;
9395
9396                         e = LIST_FIRST(&l_inconst->cache[i]->h);
9397                         while (e) {
9398                                 modify_hdr =
9399                                 (struct mlx5_flow_dv_modify_hdr_resource *)e;
9400                                 data = (const uint8_t *)modify_hdr->actions;
9401                                 size = (size_t)(modify_hdr->actions_num) * 8;
9402                                 actions_num = modify_hdr->actions_num;
9403                                 id = (uint64_t)(uintptr_t)modify_hdr->action;
9404                                 type = DR_DUMP_REC_TYPE_PMD_MODIFY_HDR;
9405                                 save_dump_file(data, size, type, id,
9406                                                 (void *)(&actions_num), file);
9407                                 e = LIST_NEXT(e, next);
9408                         }
9409                 }
9410
9411                 if (unlikely(lcore_index == MLX5_LIST_NLCORE))
9412                         rte_spinlock_unlock(&h->l_const.lcore_lock);
9413         }
9414
9415         /* get counter */
9416         MLX5_ASSERT(cmng->n_valid <= cmng->n);
9417         max = MLX5_COUNTERS_PER_POOL * cmng->n_valid;
9418         for (j = 1; j <= max; j++) {
9419                 action = NULL;
9420                 if ((!mlx5_counter_query(dev, j, false, &count.hits,
9421                 &count.bytes, &action)) && action) {
9422                         id = (uint64_t)(uintptr_t)action;
9423                         type = DR_DUMP_REC_TYPE_PMD_COUNTER;
9424                         save_dump_file(NULL, 0, type,
9425                                         id, (void *)&count, file);
9426                 }
9427         }
9428         return 0;
9429 }
9430 #endif
9431
9432 /**
9433  * Dump flow raw hw data to file
9434  *
9435  * @param[in] dev
9436  *    The pointer to Ethernet device.
9437  * @param[in] file
9438  *   A pointer to a file for output.
9439  * @param[out] error
9440  *   Perform verbose error reporting if not NULL. PMDs initialize this
9441  *   structure in case of error only.
9442  * @return
9443  *   0 on success, a negative value otherwise.
9444  */
9445 int
9446 mlx5_flow_dev_dump(struct rte_eth_dev *dev, struct rte_flow *flow_idx,
9447                    FILE *file,
9448                    struct rte_flow_error *error __rte_unused)
9449 {
9450         struct mlx5_priv *priv = dev->data->dev_private;
9451         struct mlx5_dev_ctx_shared *sh = priv->sh;
9452         uint32_t handle_idx;
9453         int ret;
9454         struct mlx5_flow_handle *dh;
9455         struct rte_flow *flow;
9456
9457         if (!sh->config.dv_flow_en) {
9458                 if (fputs("device dv flow disabled\n", file) <= 0)
9459                         return -errno;
9460                 return -ENOTSUP;
9461         }
9462
9463         /* dump all */
9464         if (!flow_idx) {
9465 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
9466                 if (mlx5_flow_dev_dump_sh_all(dev, file, error))
9467                         return -EINVAL;
9468 #endif
9469                 return mlx5_devx_cmd_flow_dump(sh->fdb_domain,
9470                                         sh->rx_domain,
9471                                         sh->tx_domain, file);
9472         }
9473         /* dump one */
9474         flow = mlx5_ipool_get(priv->flows[MLX5_FLOW_TYPE_GEN],
9475                         (uintptr_t)(void *)flow_idx);
9476         if (!flow)
9477                 return -EINVAL;
9478
9479 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
9480         mlx5_flow_dev_dump_ipool(dev, flow, file, error);
9481 #endif
9482         handle_idx = flow->dev_handles;
9483         while (handle_idx) {
9484                 dh = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW],
9485                                 handle_idx);
9486                 if (!dh)
9487                         return -ENOENT;
9488                 if (dh->drv_flow) {
9489                         ret = mlx5_devx_cmd_flow_single_dump(dh->drv_flow,
9490                                         file);
9491                         if (ret)
9492                                 return -ENOENT;
9493                 }
9494                 handle_idx = dh->next.next;
9495         }
9496         return 0;
9497 }
9498
9499 /**
9500  * Get aged-out flows.
9501  *
9502  * @param[in] dev
9503  *   Pointer to the Ethernet device structure.
9504  * @param[in] context
9505  *   The address of an array of pointers to the aged-out flows contexts.
9506  * @param[in] nb_countexts
9507  *   The length of context array pointers.
9508  * @param[out] error
9509  *   Perform verbose error reporting if not NULL. Initialized in case of
9510  *   error only.
9511  *
9512  * @return
9513  *   how many contexts get in success, otherwise negative errno value.
9514  *   if nb_contexts is 0, return the amount of all aged contexts.
9515  *   if nb_contexts is not 0 , return the amount of aged flows reported
9516  *   in the context array.
9517  */
9518 int
9519 mlx5_flow_get_aged_flows(struct rte_eth_dev *dev, void **contexts,
9520                         uint32_t nb_contexts, struct rte_flow_error *error)
9521 {
9522         const struct mlx5_flow_driver_ops *fops;
9523         struct rte_flow_attr attr = { .transfer = 0 };
9524
9525         if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
9526                 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
9527                 return fops->get_aged_flows(dev, contexts, nb_contexts,
9528                                                     error);
9529         }
9530         DRV_LOG(ERR,
9531                 "port %u get aged flows is not supported.",
9532                  dev->data->port_id);
9533         return -ENOTSUP;
9534 }
9535
9536 /* Wrapper for driver action_validate op callback */
9537 static int
9538 flow_drv_action_validate(struct rte_eth_dev *dev,
9539                          const struct rte_flow_indir_action_conf *conf,
9540                          const struct rte_flow_action *action,
9541                          const struct mlx5_flow_driver_ops *fops,
9542                          struct rte_flow_error *error)
9543 {
9544         static const char err_msg[] = "indirect action validation unsupported";
9545
9546         if (!fops->action_validate) {
9547                 DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
9548                 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
9549                                    NULL, err_msg);
9550                 return -rte_errno;
9551         }
9552         return fops->action_validate(dev, conf, action, error);
9553 }
9554
9555 /**
9556  * Destroys the shared action by handle.
9557  *
9558  * @param dev
9559  *   Pointer to Ethernet device structure.
9560  * @param[in] handle
9561  *   Handle for the indirect action object to be destroyed.
9562  * @param[out] error
9563  *   Perform verbose error reporting if not NULL. PMDs initialize this
9564  *   structure in case of error only.
9565  *
9566  * @return
9567  *   0 on success, a negative errno value otherwise and rte_errno is set.
9568  *
9569  * @note: wrapper for driver action_create op callback.
9570  */
9571 static int
9572 mlx5_action_handle_destroy(struct rte_eth_dev *dev,
9573                            struct rte_flow_action_handle *handle,
9574                            struct rte_flow_error *error)
9575 {
9576         static const char err_msg[] = "indirect action destruction unsupported";
9577         struct rte_flow_attr attr = { .transfer = 0 };
9578         const struct mlx5_flow_driver_ops *fops =
9579                         flow_get_drv_ops(flow_get_drv_type(dev, &attr));
9580
9581         if (!fops->action_destroy) {
9582                 DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
9583                 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
9584                                    NULL, err_msg);
9585                 return -rte_errno;
9586         }
9587         return fops->action_destroy(dev, handle, error);
9588 }
9589
9590 /* Wrapper for driver action_destroy op callback */
9591 static int
9592 flow_drv_action_update(struct rte_eth_dev *dev,
9593                        struct rte_flow_action_handle *handle,
9594                        const void *update,
9595                        const struct mlx5_flow_driver_ops *fops,
9596                        struct rte_flow_error *error)
9597 {
9598         static const char err_msg[] = "indirect action update unsupported";
9599
9600         if (!fops->action_update) {
9601                 DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
9602                 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
9603                                    NULL, err_msg);
9604                 return -rte_errno;
9605         }
9606         return fops->action_update(dev, handle, update, error);
9607 }
9608
9609 /* Wrapper for driver action_destroy op callback */
9610 static int
9611 flow_drv_action_query(struct rte_eth_dev *dev,
9612                       const struct rte_flow_action_handle *handle,
9613                       void *data,
9614                       const struct mlx5_flow_driver_ops *fops,
9615                       struct rte_flow_error *error)
9616 {
9617         static const char err_msg[] = "indirect action query unsupported";
9618
9619         if (!fops->action_query) {
9620                 DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
9621                 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
9622                                    NULL, err_msg);
9623                 return -rte_errno;
9624         }
9625         return fops->action_query(dev, handle, data, error);
9626 }
9627
9628 /**
9629  * Create indirect action for reuse in multiple flow rules.
9630  *
9631  * @param dev
9632  *   Pointer to Ethernet device structure.
9633  * @param conf
9634  *   Pointer to indirect action object configuration.
9635  * @param[in] action
9636  *   Action configuration for indirect action object creation.
9637  * @param[out] error
9638  *   Perform verbose error reporting if not NULL. PMDs initialize this
9639  *   structure in case of error only.
9640  * @return
9641  *   A valid handle in case of success, NULL otherwise and rte_errno is set.
9642  */
9643 static struct rte_flow_action_handle *
9644 mlx5_action_handle_create(struct rte_eth_dev *dev,
9645                           const struct rte_flow_indir_action_conf *conf,
9646                           const struct rte_flow_action *action,
9647                           struct rte_flow_error *error)
9648 {
9649         static const char err_msg[] = "indirect action creation unsupported";
9650         struct rte_flow_attr attr = { .transfer = 0 };
9651         const struct mlx5_flow_driver_ops *fops =
9652                         flow_get_drv_ops(flow_get_drv_type(dev, &attr));
9653
9654         if (flow_drv_action_validate(dev, conf, action, fops, error))
9655                 return NULL;
9656         if (!fops->action_create) {
9657                 DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
9658                 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
9659                                    NULL, err_msg);
9660                 return NULL;
9661         }
9662         return fops->action_create(dev, conf, action, error);
9663 }
9664
9665 /**
9666  * Updates inplace the indirect action configuration pointed by *handle*
9667  * with the configuration provided as *update* argument.
9668  * The update of the indirect action configuration effects all flow rules
9669  * reusing the action via handle.
9670  *
9671  * @param dev
9672  *   Pointer to Ethernet device structure.
9673  * @param[in] handle
9674  *   Handle for the indirect action to be updated.
9675  * @param[in] update
9676  *   Action specification used to modify the action pointed by handle.
9677  *   *update* could be of same type with the action pointed by the *handle*
9678  *   handle argument, or some other structures like a wrapper, depending on
9679  *   the indirect action type.
9680  * @param[out] error
9681  *   Perform verbose error reporting if not NULL. PMDs initialize this
9682  *   structure in case of error only.
9683  *
9684  * @return
9685  *   0 on success, a negative errno value otherwise and rte_errno is set.
9686  */
9687 static int
9688 mlx5_action_handle_update(struct rte_eth_dev *dev,
9689                 struct rte_flow_action_handle *handle,
9690                 const void *update,
9691                 struct rte_flow_error *error)
9692 {
9693         struct rte_flow_attr attr = { .transfer = 0 };
9694         const struct mlx5_flow_driver_ops *fops =
9695                         flow_get_drv_ops(flow_get_drv_type(dev, &attr));
9696         int ret;
9697
9698         ret = flow_drv_action_validate(dev, NULL,
9699                         (const struct rte_flow_action *)update, fops, error);
9700         if (ret)
9701                 return ret;
9702         return flow_drv_action_update(dev, handle, update, fops,
9703                                       error);
9704 }
9705
9706 /**
9707  * Query the indirect action by handle.
9708  *
9709  * This function allows retrieving action-specific data such as counters.
9710  * Data is gathered by special action which may be present/referenced in
9711  * more than one flow rule definition.
9712  *
9713  * see @RTE_FLOW_ACTION_TYPE_COUNT
9714  *
9715  * @param dev
9716  *   Pointer to Ethernet device structure.
9717  * @param[in] handle
9718  *   Handle for the indirect action to query.
9719  * @param[in, out] data
9720  *   Pointer to storage for the associated query data type.
9721  * @param[out] error
9722  *   Perform verbose error reporting if not NULL. PMDs initialize this
9723  *   structure in case of error only.
9724  *
9725  * @return
9726  *   0 on success, a negative errno value otherwise and rte_errno is set.
9727  */
9728 static int
9729 mlx5_action_handle_query(struct rte_eth_dev *dev,
9730                          const struct rte_flow_action_handle *handle,
9731                          void *data,
9732                          struct rte_flow_error *error)
9733 {
9734         struct rte_flow_attr attr = { .transfer = 0 };
9735         const struct mlx5_flow_driver_ops *fops =
9736                         flow_get_drv_ops(flow_get_drv_type(dev, &attr));
9737
9738         return flow_drv_action_query(dev, handle, data, fops, error);
9739 }
9740
9741 /**
9742  * Destroy all indirect actions (shared RSS).
9743  *
9744  * @param dev
9745  *   Pointer to Ethernet device.
9746  *
9747  * @return
9748  *   0 on success, a negative errno value otherwise and rte_errno is set.
9749  */
9750 int
9751 mlx5_action_handle_flush(struct rte_eth_dev *dev)
9752 {
9753         struct rte_flow_error error;
9754         struct mlx5_priv *priv = dev->data->dev_private;
9755         struct mlx5_shared_action_rss *shared_rss;
9756         int ret = 0;
9757         uint32_t idx;
9758
9759         ILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS],
9760                       priv->rss_shared_actions, idx, shared_rss, next) {
9761                 ret |= mlx5_action_handle_destroy(dev,
9762                        (struct rte_flow_action_handle *)(uintptr_t)idx, &error);
9763         }
9764         return ret;
9765 }
9766
9767 /**
9768  * Validate existing indirect actions against current device configuration
9769  * and attach them to device resources.
9770  *
9771  * @param dev
9772  *   Pointer to Ethernet device.
9773  *
9774  * @return
9775  *   0 on success, a negative errno value otherwise and rte_errno is set.
9776  */
9777 int
9778 mlx5_action_handle_attach(struct rte_eth_dev *dev)
9779 {
9780         struct mlx5_priv *priv = dev->data->dev_private;
9781         int ret = 0;
9782         struct mlx5_ind_table_obj *ind_tbl, *ind_tbl_last;
9783
9784         LIST_FOREACH(ind_tbl, &priv->standalone_ind_tbls, next) {
9785                 const char *message;
9786                 uint32_t queue_idx;
9787
9788                 ret = mlx5_validate_rss_queues(dev, ind_tbl->queues,
9789                                                ind_tbl->queues_n,
9790                                                &message, &queue_idx);
9791                 if (ret != 0) {
9792                         DRV_LOG(ERR, "Port %u cannot use queue %u in RSS: %s",
9793                                 dev->data->port_id, ind_tbl->queues[queue_idx],
9794                                 message);
9795                         break;
9796                 }
9797         }
9798         if (ret != 0)
9799                 return ret;
9800         LIST_FOREACH(ind_tbl, &priv->standalone_ind_tbls, next) {
9801                 ret = mlx5_ind_table_obj_attach(dev, ind_tbl);
9802                 if (ret != 0) {
9803                         DRV_LOG(ERR, "Port %u could not attach "
9804                                 "indirection table obj %p",
9805                                 dev->data->port_id, (void *)ind_tbl);
9806                         goto error;
9807                 }
9808         }
9809
9810         return 0;
9811 error:
9812         ind_tbl_last = ind_tbl;
9813         LIST_FOREACH(ind_tbl, &priv->standalone_ind_tbls, next) {
9814                 if (ind_tbl == ind_tbl_last)
9815                         break;
9816                 if (mlx5_ind_table_obj_detach(dev, ind_tbl) != 0)
9817                         DRV_LOG(CRIT, "Port %u could not detach "
9818                                 "indirection table obj %p on rollback",
9819                                 dev->data->port_id, (void *)ind_tbl);
9820         }
9821         return ret;
9822 }
9823
9824 /**
9825  * Detach indirect actions of the device from its resources.
9826  *
9827  * @param dev
9828  *   Pointer to Ethernet device.
9829  *
9830  * @return
9831  *   0 on success, a negative errno value otherwise and rte_errno is set.
9832  */
9833 int
9834 mlx5_action_handle_detach(struct rte_eth_dev *dev)
9835 {
9836         struct mlx5_priv *priv = dev->data->dev_private;
9837         int ret = 0;
9838         struct mlx5_ind_table_obj *ind_tbl, *ind_tbl_last;
9839
9840         LIST_FOREACH(ind_tbl, &priv->standalone_ind_tbls, next) {
9841                 ret = mlx5_ind_table_obj_detach(dev, ind_tbl);
9842                 if (ret != 0) {
9843                         DRV_LOG(ERR, "Port %u could not detach "
9844                                 "indirection table obj %p",
9845                                 dev->data->port_id, (void *)ind_tbl);
9846                         goto error;
9847                 }
9848         }
9849         return 0;
9850 error:
9851         ind_tbl_last = ind_tbl;
9852         LIST_FOREACH(ind_tbl, &priv->standalone_ind_tbls, next) {
9853                 if (ind_tbl == ind_tbl_last)
9854                         break;
9855                 if (mlx5_ind_table_obj_attach(dev, ind_tbl) != 0)
9856                         DRV_LOG(CRIT, "Port %u could not attach "
9857                                 "indirection table obj %p on rollback",
9858                                 dev->data->port_id, (void *)ind_tbl);
9859         }
9860         return ret;
9861 }
9862
9863 #ifndef HAVE_MLX5DV_DR
9864 #define MLX5_DOMAIN_SYNC_FLOW ((1 << 0) | (1 << 1))
9865 #else
9866 #define MLX5_DOMAIN_SYNC_FLOW \
9867         (MLX5DV_DR_DOMAIN_SYNC_FLAGS_SW | MLX5DV_DR_DOMAIN_SYNC_FLAGS_HW)
9868 #endif
9869
9870 int rte_pmd_mlx5_sync_flow(uint16_t port_id, uint32_t domains)
9871 {
9872         struct rte_eth_dev *dev = &rte_eth_devices[port_id];
9873         const struct mlx5_flow_driver_ops *fops;
9874         int ret;
9875         struct rte_flow_attr attr = { .transfer = 0 };
9876
9877         fops = flow_get_drv_ops(flow_get_drv_type(dev, &attr));
9878         ret = fops->sync_domain(dev, domains, MLX5_DOMAIN_SYNC_FLOW);
9879         if (ret > 0)
9880                 ret = -ret;
9881         return ret;
9882 }
9883
9884 const struct mlx5_flow_tunnel *
9885 mlx5_get_tof(const struct rte_flow_item *item,
9886              const struct rte_flow_action *action,
9887              enum mlx5_tof_rule_type *rule_type)
9888 {
9889         for (; item->type != RTE_FLOW_ITEM_TYPE_END; item++) {
9890                 if (item->type == (typeof(item->type))
9891                                   MLX5_RTE_FLOW_ITEM_TYPE_TUNNEL) {
9892                         *rule_type = MLX5_TUNNEL_OFFLOAD_MATCH_RULE;
9893                         return flow_items_to_tunnel(item);
9894                 }
9895         }
9896         for (; action->conf != RTE_FLOW_ACTION_TYPE_END; action++) {
9897                 if (action->type == (typeof(action->type))
9898                                     MLX5_RTE_FLOW_ACTION_TYPE_TUNNEL_SET) {
9899                         *rule_type = MLX5_TUNNEL_OFFLOAD_SET_RULE;
9900                         return flow_actions_to_tunnel(action);
9901                 }
9902         }
9903         return NULL;
9904 }
9905
9906 /**
9907  * tunnel offload functionality is defined for DV environment only
9908  */
9909 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
9910 __extension__
9911 union tunnel_offload_mark {
9912         uint32_t val;
9913         struct {
9914                 uint32_t app_reserve:8;
9915                 uint32_t table_id:15;
9916                 uint32_t transfer:1;
9917                 uint32_t _unused_:8;
9918         };
9919 };
9920
9921 static bool
9922 mlx5_access_tunnel_offload_db
9923         (struct rte_eth_dev *dev,
9924          bool (*match)(struct rte_eth_dev *,
9925                        struct mlx5_flow_tunnel *, const void *),
9926          void (*hit)(struct rte_eth_dev *, struct mlx5_flow_tunnel *, void *),
9927          void (*miss)(struct rte_eth_dev *, void *),
9928          void *ctx, bool lock_op);
9929
9930 static int
9931 flow_tunnel_add_default_miss(struct rte_eth_dev *dev,
9932                              struct rte_flow *flow,
9933                              const struct rte_flow_attr *attr,
9934                              const struct rte_flow_action *app_actions,
9935                              uint32_t flow_idx,
9936                              const struct mlx5_flow_tunnel *tunnel,
9937                              struct tunnel_default_miss_ctx *ctx,
9938                              struct rte_flow_error *error)
9939 {
9940         struct mlx5_priv *priv = dev->data->dev_private;
9941         struct mlx5_flow *dev_flow;
9942         struct rte_flow_attr miss_attr = *attr;
9943         const struct rte_flow_item miss_items[2] = {
9944                 {
9945                         .type = RTE_FLOW_ITEM_TYPE_ETH,
9946                         .spec = NULL,
9947                         .last = NULL,
9948                         .mask = NULL
9949                 },
9950                 {
9951                         .type = RTE_FLOW_ITEM_TYPE_END,
9952                         .spec = NULL,
9953                         .last = NULL,
9954                         .mask = NULL
9955                 }
9956         };
9957         union tunnel_offload_mark mark_id;
9958         struct rte_flow_action_mark miss_mark;
9959         struct rte_flow_action miss_actions[3] = {
9960                 [0] = { .type = RTE_FLOW_ACTION_TYPE_MARK, .conf = &miss_mark },
9961                 [2] = { .type = RTE_FLOW_ACTION_TYPE_END,  .conf = NULL }
9962         };
9963         const struct rte_flow_action_jump *jump_data;
9964         uint32_t i, flow_table = 0; /* prevent compilation warning */
9965         struct flow_grp_info grp_info = {
9966                 .external = 1,
9967                 .transfer = attr->transfer,
9968                 .fdb_def_rule = !!priv->fdb_def_rule,
9969                 .std_tbl_fix = 0,
9970         };
9971         int ret;
9972
9973         if (!attr->transfer) {
9974                 uint32_t q_size;
9975
9976                 miss_actions[1].type = RTE_FLOW_ACTION_TYPE_RSS;
9977                 q_size = priv->reta_idx_n * sizeof(ctx->queue[0]);
9978                 ctx->queue = mlx5_malloc(MLX5_MEM_SYS | MLX5_MEM_ZERO, q_size,
9979                                          0, SOCKET_ID_ANY);
9980                 if (!ctx->queue)
9981                         return rte_flow_error_set
9982                                 (error, ENOMEM,
9983                                 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
9984                                 NULL, "invalid default miss RSS");
9985                 ctx->action_rss.func = RTE_ETH_HASH_FUNCTION_DEFAULT,
9986                 ctx->action_rss.level = 0,
9987                 ctx->action_rss.types = priv->rss_conf.rss_hf,
9988                 ctx->action_rss.key_len = priv->rss_conf.rss_key_len,
9989                 ctx->action_rss.queue_num = priv->reta_idx_n,
9990                 ctx->action_rss.key = priv->rss_conf.rss_key,
9991                 ctx->action_rss.queue = ctx->queue;
9992                 if (!priv->reta_idx_n || !priv->rxqs_n)
9993                         return rte_flow_error_set
9994                                 (error, EINVAL,
9995                                 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
9996                                 NULL, "invalid port configuration");
9997                 if (!(dev->data->dev_conf.rxmode.mq_mode & RTE_ETH_MQ_RX_RSS_FLAG))
9998                         ctx->action_rss.types = 0;
9999                 for (i = 0; i != priv->reta_idx_n; ++i)
10000                         ctx->queue[i] = (*priv->reta_idx)[i];
10001         } else {
10002                 miss_actions[1].type = RTE_FLOW_ACTION_TYPE_JUMP;
10003                 ctx->miss_jump.group = MLX5_TNL_MISS_FDB_JUMP_GRP;
10004         }
10005         miss_actions[1].conf = (typeof(miss_actions[1].conf))ctx->raw;
10006         for (; app_actions->type != RTE_FLOW_ACTION_TYPE_JUMP; app_actions++);
10007         jump_data = app_actions->conf;
10008         miss_attr.priority = MLX5_TNL_MISS_RULE_PRIORITY;
10009         miss_attr.group = jump_data->group;
10010         ret = mlx5_flow_group_to_table(dev, tunnel, jump_data->group,
10011                                        &flow_table, &grp_info, error);
10012         if (ret)
10013                 return rte_flow_error_set(error, EINVAL,
10014                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
10015                                           NULL, "invalid tunnel id");
10016         mark_id.app_reserve = 0;
10017         mark_id.table_id = tunnel_flow_tbl_to_id(flow_table);
10018         mark_id.transfer = !!attr->transfer;
10019         mark_id._unused_ = 0;
10020         miss_mark.id = mark_id.val;
10021         dev_flow = flow_drv_prepare(dev, flow, &miss_attr,
10022                                     miss_items, miss_actions, flow_idx, error);
10023         if (!dev_flow)
10024                 return -rte_errno;
10025         dev_flow->flow = flow;
10026         dev_flow->external = true;
10027         dev_flow->tunnel = tunnel;
10028         dev_flow->tof_type = MLX5_TUNNEL_OFFLOAD_MISS_RULE;
10029         /* Subflow object was created, we must include one in the list. */
10030         SILIST_INSERT(&flow->dev_handles, dev_flow->handle_idx,
10031                       dev_flow->handle, next);
10032         DRV_LOG(DEBUG,
10033                 "port %u tunnel type=%d id=%u miss rule priority=%u group=%u",
10034                 dev->data->port_id, tunnel->app_tunnel.type,
10035                 tunnel->tunnel_id, miss_attr.priority, miss_attr.group);
10036         ret = flow_drv_translate(dev, dev_flow, &miss_attr, miss_items,
10037                                   miss_actions, error);
10038         if (!ret)
10039                 ret = flow_mreg_update_copy_table(dev, flow, miss_actions,
10040                                                   error);
10041
10042         return ret;
10043 }
10044
10045 static const struct mlx5_flow_tbl_data_entry  *
10046 tunnel_mark_decode(struct rte_eth_dev *dev, uint32_t mark)
10047 {
10048         struct mlx5_priv *priv = dev->data->dev_private;
10049         struct mlx5_dev_ctx_shared *sh = priv->sh;
10050         struct mlx5_list_entry *he;
10051         union tunnel_offload_mark mbits = { .val = mark };
10052         union mlx5_flow_tbl_key table_key = {
10053                 {
10054                         .level = tunnel_id_to_flow_tbl(mbits.table_id),
10055                         .id = 0,
10056                         .reserved = 0,
10057                         .dummy = 0,
10058                         .is_fdb = !!mbits.transfer,
10059                         .is_egress = 0,
10060                 }
10061         };
10062         struct mlx5_flow_cb_ctx ctx = {
10063                 .data = &table_key.v64,
10064         };
10065
10066         he = mlx5_hlist_lookup(sh->flow_tbls, table_key.v64, &ctx);
10067         return he ?
10068                container_of(he, struct mlx5_flow_tbl_data_entry, entry) : NULL;
10069 }
10070
10071 static void
10072 mlx5_flow_tunnel_grp2tbl_remove_cb(void *tool_ctx,
10073                                    struct mlx5_list_entry *entry)
10074 {
10075         struct mlx5_dev_ctx_shared *sh = tool_ctx;
10076         struct tunnel_tbl_entry *tte = container_of(entry, typeof(*tte), hash);
10077
10078         mlx5_ipool_free(sh->ipool[MLX5_IPOOL_TNL_TBL_ID],
10079                         tunnel_flow_tbl_to_id(tte->flow_table));
10080         mlx5_free(tte);
10081 }
10082
10083 static int
10084 mlx5_flow_tunnel_grp2tbl_match_cb(void *tool_ctx __rte_unused,
10085                                   struct mlx5_list_entry *entry, void *cb_ctx)
10086 {
10087         struct mlx5_flow_cb_ctx *ctx = cb_ctx;
10088         union tunnel_tbl_key tbl = {
10089                 .val = *(uint64_t *)(ctx->data),
10090         };
10091         struct tunnel_tbl_entry *tte = container_of(entry, typeof(*tte), hash);
10092
10093         return tbl.tunnel_id != tte->tunnel_id || tbl.group != tte->group;
10094 }
10095
10096 static struct mlx5_list_entry *
10097 mlx5_flow_tunnel_grp2tbl_create_cb(void *tool_ctx, void *cb_ctx)
10098 {
10099         struct mlx5_dev_ctx_shared *sh = tool_ctx;
10100         struct mlx5_flow_cb_ctx *ctx = cb_ctx;
10101         struct tunnel_tbl_entry *tte;
10102         union tunnel_tbl_key tbl = {
10103                 .val = *(uint64_t *)(ctx->data),
10104         };
10105
10106         tte = mlx5_malloc(MLX5_MEM_SYS | MLX5_MEM_ZERO,
10107                           sizeof(*tte), 0,
10108                           SOCKET_ID_ANY);
10109         if (!tte)
10110                 goto err;
10111         mlx5_ipool_malloc(sh->ipool[MLX5_IPOOL_TNL_TBL_ID],
10112                           &tte->flow_table);
10113         if (tte->flow_table >= MLX5_MAX_TABLES) {
10114                 DRV_LOG(ERR, "Tunnel TBL ID %d exceed max limit.",
10115                         tte->flow_table);
10116                 mlx5_ipool_free(sh->ipool[MLX5_IPOOL_TNL_TBL_ID],
10117                                 tte->flow_table);
10118                 goto err;
10119         } else if (!tte->flow_table) {
10120                 goto err;
10121         }
10122         tte->flow_table = tunnel_id_to_flow_tbl(tte->flow_table);
10123         tte->tunnel_id = tbl.tunnel_id;
10124         tte->group = tbl.group;
10125         return &tte->hash;
10126 err:
10127         if (tte)
10128                 mlx5_free(tte);
10129         return NULL;
10130 }
10131
10132 static struct mlx5_list_entry *
10133 mlx5_flow_tunnel_grp2tbl_clone_cb(void *tool_ctx __rte_unused,
10134                                   struct mlx5_list_entry *oentry,
10135                                   void *cb_ctx __rte_unused)
10136 {
10137         struct tunnel_tbl_entry *tte = mlx5_malloc(MLX5_MEM_SYS, sizeof(*tte),
10138                                                    0, SOCKET_ID_ANY);
10139
10140         if (!tte)
10141                 return NULL;
10142         memcpy(tte, oentry, sizeof(*tte));
10143         return &tte->hash;
10144 }
10145
10146 static void
10147 mlx5_flow_tunnel_grp2tbl_clone_free_cb(void *tool_ctx __rte_unused,
10148                                        struct mlx5_list_entry *entry)
10149 {
10150         struct tunnel_tbl_entry *tte = container_of(entry, typeof(*tte), hash);
10151
10152         mlx5_free(tte);
10153 }
10154
10155 static uint32_t
10156 tunnel_flow_group_to_flow_table(struct rte_eth_dev *dev,
10157                                 const struct mlx5_flow_tunnel *tunnel,
10158                                 uint32_t group, uint32_t *table,
10159                                 struct rte_flow_error *error)
10160 {
10161         struct mlx5_list_entry *he;
10162         struct tunnel_tbl_entry *tte;
10163         union tunnel_tbl_key key = {
10164                 .tunnel_id = tunnel ? tunnel->tunnel_id : 0,
10165                 .group = group
10166         };
10167         struct mlx5_flow_tunnel_hub *thub = mlx5_tunnel_hub(dev);
10168         struct mlx5_hlist *group_hash;
10169         struct mlx5_flow_cb_ctx ctx = {
10170                 .data = &key.val,
10171         };
10172
10173         group_hash = tunnel ? tunnel->groups : thub->groups;
10174         he = mlx5_hlist_register(group_hash, key.val, &ctx);
10175         if (!he)
10176                 return rte_flow_error_set(error, EINVAL,
10177                                           RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
10178                                           NULL,
10179                                           "tunnel group index not supported");
10180         tte = container_of(he, typeof(*tte), hash);
10181         *table = tte->flow_table;
10182         DRV_LOG(DEBUG, "port %u tunnel %u group=%#x table=%#x",
10183                 dev->data->port_id, key.tunnel_id, group, *table);
10184         return 0;
10185 }
10186
10187 static void
10188 mlx5_flow_tunnel_free(struct rte_eth_dev *dev,
10189                       struct mlx5_flow_tunnel *tunnel)
10190 {
10191         struct mlx5_priv *priv = dev->data->dev_private;
10192         struct mlx5_indexed_pool *ipool;
10193
10194         DRV_LOG(DEBUG, "port %u release pmd tunnel id=0x%x",
10195                 dev->data->port_id, tunnel->tunnel_id);
10196         LIST_REMOVE(tunnel, chain);
10197         mlx5_hlist_destroy(tunnel->groups);
10198         ipool = priv->sh->ipool[MLX5_IPOOL_TUNNEL_ID];
10199         mlx5_ipool_free(ipool, tunnel->tunnel_id);
10200 }
10201
10202 static bool
10203 mlx5_access_tunnel_offload_db
10204         (struct rte_eth_dev *dev,
10205          bool (*match)(struct rte_eth_dev *,
10206                        struct mlx5_flow_tunnel *, const void *),
10207          void (*hit)(struct rte_eth_dev *, struct mlx5_flow_tunnel *, void *),
10208          void (*miss)(struct rte_eth_dev *, void *),
10209          void *ctx, bool lock_op)
10210 {
10211         bool verdict = false;
10212         struct mlx5_flow_tunnel_hub *thub = mlx5_tunnel_hub(dev);
10213         struct mlx5_flow_tunnel *tunnel;
10214
10215         rte_spinlock_lock(&thub->sl);
10216         LIST_FOREACH(tunnel, &thub->tunnels, chain) {
10217                 verdict = match(dev, tunnel, (const void *)ctx);
10218                 if (verdict)
10219                         break;
10220         }
10221         if (!lock_op)
10222                 rte_spinlock_unlock(&thub->sl);
10223         if (verdict && hit)
10224                 hit(dev, tunnel, ctx);
10225         if (!verdict && miss)
10226                 miss(dev, ctx);
10227         if (lock_op)
10228                 rte_spinlock_unlock(&thub->sl);
10229
10230         return verdict;
10231 }
10232
10233 struct tunnel_db_find_tunnel_id_ctx {
10234         uint32_t tunnel_id;
10235         struct mlx5_flow_tunnel *tunnel;
10236 };
10237
10238 static bool
10239 find_tunnel_id_match(struct rte_eth_dev *dev,
10240                      struct mlx5_flow_tunnel *tunnel, const void *x)
10241 {
10242         const struct tunnel_db_find_tunnel_id_ctx *ctx = x;
10243
10244         RTE_SET_USED(dev);
10245         return tunnel->tunnel_id == ctx->tunnel_id;
10246 }
10247
10248 static void
10249 find_tunnel_id_hit(struct rte_eth_dev *dev,
10250                    struct mlx5_flow_tunnel *tunnel, void *x)
10251 {
10252         struct tunnel_db_find_tunnel_id_ctx *ctx = x;
10253         RTE_SET_USED(dev);
10254         ctx->tunnel = tunnel;
10255 }
10256
10257 static struct mlx5_flow_tunnel *
10258 mlx5_find_tunnel_id(struct rte_eth_dev *dev, uint32_t id)
10259 {
10260         struct tunnel_db_find_tunnel_id_ctx ctx = {
10261                 .tunnel_id = id,
10262         };
10263
10264         mlx5_access_tunnel_offload_db(dev, find_tunnel_id_match,
10265                                       find_tunnel_id_hit, NULL, &ctx, true);
10266
10267         return ctx.tunnel;
10268 }
10269
10270 static struct mlx5_flow_tunnel *
10271 mlx5_flow_tunnel_allocate(struct rte_eth_dev *dev,
10272                           const struct rte_flow_tunnel *app_tunnel)
10273 {
10274         struct mlx5_priv *priv = dev->data->dev_private;
10275         struct mlx5_indexed_pool *ipool;
10276         struct mlx5_flow_tunnel *tunnel;
10277         uint32_t id;
10278
10279         ipool = priv->sh->ipool[MLX5_IPOOL_TUNNEL_ID];
10280         tunnel = mlx5_ipool_zmalloc(ipool, &id);
10281         if (!tunnel)
10282                 return NULL;
10283         if (id >= MLX5_MAX_TUNNELS) {
10284                 mlx5_ipool_free(ipool, id);
10285                 DRV_LOG(ERR, "Tunnel ID %d exceed max limit.", id);
10286                 return NULL;
10287         }
10288         tunnel->groups = mlx5_hlist_create("tunnel groups", 64, false, true,
10289                                            priv->sh,
10290                                            mlx5_flow_tunnel_grp2tbl_create_cb,
10291                                            mlx5_flow_tunnel_grp2tbl_match_cb,
10292                                            mlx5_flow_tunnel_grp2tbl_remove_cb,
10293                                            mlx5_flow_tunnel_grp2tbl_clone_cb,
10294                                         mlx5_flow_tunnel_grp2tbl_clone_free_cb);
10295         if (!tunnel->groups) {
10296                 mlx5_ipool_free(ipool, id);
10297                 return NULL;
10298         }
10299         /* initiate new PMD tunnel */
10300         memcpy(&tunnel->app_tunnel, app_tunnel, sizeof(*app_tunnel));
10301         tunnel->tunnel_id = id;
10302         tunnel->action.type = (typeof(tunnel->action.type))
10303                               MLX5_RTE_FLOW_ACTION_TYPE_TUNNEL_SET;
10304         tunnel->action.conf = tunnel;
10305         tunnel->item.type = (typeof(tunnel->item.type))
10306                             MLX5_RTE_FLOW_ITEM_TYPE_TUNNEL;
10307         tunnel->item.spec = tunnel;
10308         tunnel->item.last = NULL;
10309         tunnel->item.mask = NULL;
10310
10311         DRV_LOG(DEBUG, "port %u new pmd tunnel id=0x%x",
10312                 dev->data->port_id, tunnel->tunnel_id);
10313
10314         return tunnel;
10315 }
10316
10317 struct tunnel_db_get_tunnel_ctx {
10318         const struct rte_flow_tunnel *app_tunnel;
10319         struct mlx5_flow_tunnel *tunnel;
10320 };
10321
10322 static bool get_tunnel_match(struct rte_eth_dev *dev,
10323                              struct mlx5_flow_tunnel *tunnel, const void *x)
10324 {
10325         const struct tunnel_db_get_tunnel_ctx *ctx = x;
10326
10327         RTE_SET_USED(dev);
10328         return !memcmp(ctx->app_tunnel, &tunnel->app_tunnel,
10329                        sizeof(*ctx->app_tunnel));
10330 }
10331
10332 static void get_tunnel_hit(struct rte_eth_dev *dev,
10333                            struct mlx5_flow_tunnel *tunnel, void *x)
10334 {
10335         /* called under tunnel spinlock protection */
10336         struct tunnel_db_get_tunnel_ctx *ctx = x;
10337
10338         RTE_SET_USED(dev);
10339         tunnel->refctn++;
10340         ctx->tunnel = tunnel;
10341 }
10342
10343 static void get_tunnel_miss(struct rte_eth_dev *dev, void *x)
10344 {
10345         /* called under tunnel spinlock protection */
10346         struct mlx5_flow_tunnel_hub *thub = mlx5_tunnel_hub(dev);
10347         struct tunnel_db_get_tunnel_ctx *ctx = x;
10348
10349         rte_spinlock_unlock(&thub->sl);
10350         ctx->tunnel = mlx5_flow_tunnel_allocate(dev, ctx->app_tunnel);
10351         rte_spinlock_lock(&thub->sl);
10352         if (ctx->tunnel) {
10353                 ctx->tunnel->refctn = 1;
10354                 LIST_INSERT_HEAD(&thub->tunnels, ctx->tunnel, chain);
10355         }
10356 }
10357
10358
10359 static int
10360 mlx5_get_flow_tunnel(struct rte_eth_dev *dev,
10361                      const struct rte_flow_tunnel *app_tunnel,
10362                      struct mlx5_flow_tunnel **tunnel)
10363 {
10364         struct tunnel_db_get_tunnel_ctx ctx = {
10365                 .app_tunnel = app_tunnel,
10366         };
10367
10368         mlx5_access_tunnel_offload_db(dev, get_tunnel_match, get_tunnel_hit,
10369                                       get_tunnel_miss, &ctx, true);
10370         *tunnel = ctx.tunnel;
10371         return ctx.tunnel ? 0 : -ENOMEM;
10372 }
10373
10374 void mlx5_release_tunnel_hub(struct mlx5_dev_ctx_shared *sh, uint16_t port_id)
10375 {
10376         struct mlx5_flow_tunnel_hub *thub = sh->tunnel_hub;
10377
10378         if (!thub)
10379                 return;
10380         if (!LIST_EMPTY(&thub->tunnels))
10381                 DRV_LOG(WARNING, "port %u tunnels present", port_id);
10382         mlx5_hlist_destroy(thub->groups);
10383         mlx5_free(thub);
10384 }
10385
10386 int mlx5_alloc_tunnel_hub(struct mlx5_dev_ctx_shared *sh)
10387 {
10388         int err;
10389         struct mlx5_flow_tunnel_hub *thub;
10390
10391         thub = mlx5_malloc(MLX5_MEM_SYS | MLX5_MEM_ZERO, sizeof(*thub),
10392                            0, SOCKET_ID_ANY);
10393         if (!thub)
10394                 return -ENOMEM;
10395         LIST_INIT(&thub->tunnels);
10396         rte_spinlock_init(&thub->sl);
10397         thub->groups = mlx5_hlist_create("flow groups", 64,
10398                                          false, true, sh,
10399                                          mlx5_flow_tunnel_grp2tbl_create_cb,
10400                                          mlx5_flow_tunnel_grp2tbl_match_cb,
10401                                          mlx5_flow_tunnel_grp2tbl_remove_cb,
10402                                          mlx5_flow_tunnel_grp2tbl_clone_cb,
10403                                         mlx5_flow_tunnel_grp2tbl_clone_free_cb);
10404         if (!thub->groups) {
10405                 err = -rte_errno;
10406                 goto err;
10407         }
10408         sh->tunnel_hub = thub;
10409
10410         return 0;
10411
10412 err:
10413         if (thub->groups)
10414                 mlx5_hlist_destroy(thub->groups);
10415         if (thub)
10416                 mlx5_free(thub);
10417         return err;
10418 }
10419
10420 static inline int
10421 mlx5_flow_tunnel_validate(struct rte_eth_dev *dev,
10422                           struct rte_flow_tunnel *tunnel,
10423                           struct rte_flow_error *error)
10424 {
10425         struct mlx5_priv *priv = dev->data->dev_private;
10426
10427         if (!priv->sh->config.dv_flow_en)
10428                 return rte_flow_error_set(error, ENOTSUP,
10429                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
10430                                           "flow DV interface is off");
10431         if (!is_tunnel_offload_active(dev))
10432                 return rte_flow_error_set(error, ENOTSUP,
10433                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
10434                                           "tunnel offload was not activated");
10435         if (!tunnel)
10436                 return rte_flow_error_set(error, EINVAL,
10437                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
10438                                           "no application tunnel");
10439         switch (tunnel->type) {
10440         default:
10441                 return rte_flow_error_set(error, EINVAL,
10442                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
10443                                           "unsupported tunnel type");
10444         case RTE_FLOW_ITEM_TYPE_VXLAN:
10445         case RTE_FLOW_ITEM_TYPE_GRE:
10446         case RTE_FLOW_ITEM_TYPE_NVGRE:
10447         case RTE_FLOW_ITEM_TYPE_GENEVE:
10448                 break;
10449         }
10450         return 0;
10451 }
10452
10453 static int
10454 mlx5_flow_tunnel_decap_set(struct rte_eth_dev *dev,
10455                     struct rte_flow_tunnel *app_tunnel,
10456                     struct rte_flow_action **actions,
10457                     uint32_t *num_of_actions,
10458                     struct rte_flow_error *error)
10459 {
10460         struct mlx5_flow_tunnel *tunnel;
10461         int ret = mlx5_flow_tunnel_validate(dev, app_tunnel, error);
10462
10463         if (ret)
10464                 return ret;
10465         ret = mlx5_get_flow_tunnel(dev, app_tunnel, &tunnel);
10466         if (ret < 0) {
10467                 return rte_flow_error_set(error, ret,
10468                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
10469                                           "failed to initialize pmd tunnel");
10470         }
10471         *actions = &tunnel->action;
10472         *num_of_actions = 1;
10473         return 0;
10474 }
10475
10476 static int
10477 mlx5_flow_tunnel_match(struct rte_eth_dev *dev,
10478                        struct rte_flow_tunnel *app_tunnel,
10479                        struct rte_flow_item **items,
10480                        uint32_t *num_of_items,
10481                        struct rte_flow_error *error)
10482 {
10483         struct mlx5_flow_tunnel *tunnel;
10484         int ret = mlx5_flow_tunnel_validate(dev, app_tunnel, error);
10485
10486         if (ret)
10487                 return ret;
10488         ret = mlx5_get_flow_tunnel(dev, app_tunnel, &tunnel);
10489         if (ret < 0) {
10490                 return rte_flow_error_set(error, ret,
10491                                           RTE_FLOW_ERROR_TYPE_HANDLE, NULL,
10492                                           "failed to initialize pmd tunnel");
10493         }
10494         *items = &tunnel->item;
10495         *num_of_items = 1;
10496         return 0;
10497 }
10498
10499 struct tunnel_db_element_release_ctx {
10500         struct rte_flow_item *items;
10501         struct rte_flow_action *actions;
10502         uint32_t num_elements;
10503         struct rte_flow_error *error;
10504         int ret;
10505 };
10506
10507 static bool
10508 tunnel_element_release_match(struct rte_eth_dev *dev,
10509                              struct mlx5_flow_tunnel *tunnel, const void *x)
10510 {
10511         const struct tunnel_db_element_release_ctx *ctx = x;
10512
10513         RTE_SET_USED(dev);
10514         if (ctx->num_elements != 1)
10515                 return false;
10516         else if (ctx->items)
10517                 return ctx->items == &tunnel->item;
10518         else if (ctx->actions)
10519                 return ctx->actions == &tunnel->action;
10520
10521         return false;
10522 }
10523
10524 static void
10525 tunnel_element_release_hit(struct rte_eth_dev *dev,
10526                            struct mlx5_flow_tunnel *tunnel, void *x)
10527 {
10528         struct tunnel_db_element_release_ctx *ctx = x;
10529         ctx->ret = 0;
10530         if (!__atomic_sub_fetch(&tunnel->refctn, 1, __ATOMIC_RELAXED))
10531                 mlx5_flow_tunnel_free(dev, tunnel);
10532 }
10533
10534 static void
10535 tunnel_element_release_miss(struct rte_eth_dev *dev, void *x)
10536 {
10537         struct tunnel_db_element_release_ctx *ctx = x;
10538         RTE_SET_USED(dev);
10539         ctx->ret = rte_flow_error_set(ctx->error, EINVAL,
10540                                       RTE_FLOW_ERROR_TYPE_HANDLE, NULL,
10541                                       "invalid argument");
10542 }
10543
10544 static int
10545 mlx5_flow_tunnel_item_release(struct rte_eth_dev *dev,
10546                        struct rte_flow_item *pmd_items,
10547                        uint32_t num_items, struct rte_flow_error *err)
10548 {
10549         struct tunnel_db_element_release_ctx ctx = {
10550                 .items = pmd_items,
10551                 .actions = NULL,
10552                 .num_elements = num_items,
10553                 .error = err,
10554         };
10555
10556         mlx5_access_tunnel_offload_db(dev, tunnel_element_release_match,
10557                                       tunnel_element_release_hit,
10558                                       tunnel_element_release_miss, &ctx, false);
10559
10560         return ctx.ret;
10561 }
10562
10563 static int
10564 mlx5_flow_tunnel_action_release(struct rte_eth_dev *dev,
10565                          struct rte_flow_action *pmd_actions,
10566                          uint32_t num_actions, struct rte_flow_error *err)
10567 {
10568         struct tunnel_db_element_release_ctx ctx = {
10569                 .items = NULL,
10570                 .actions = pmd_actions,
10571                 .num_elements = num_actions,
10572                 .error = err,
10573         };
10574
10575         mlx5_access_tunnel_offload_db(dev, tunnel_element_release_match,
10576                                       tunnel_element_release_hit,
10577                                       tunnel_element_release_miss, &ctx, false);
10578
10579         return ctx.ret;
10580 }
10581
10582 static int
10583 mlx5_flow_tunnel_get_restore_info(struct rte_eth_dev *dev,
10584                                   struct rte_mbuf *m,
10585                                   struct rte_flow_restore_info *info,
10586                                   struct rte_flow_error *err)
10587 {
10588         uint64_t ol_flags = m->ol_flags;
10589         const struct mlx5_flow_tbl_data_entry *tble;
10590         const uint64_t mask = RTE_MBUF_F_RX_FDIR | RTE_MBUF_F_RX_FDIR_ID;
10591
10592         if (!is_tunnel_offload_active(dev)) {
10593                 info->flags = 0;
10594                 return 0;
10595         }
10596
10597         if ((ol_flags & mask) != mask)
10598                 goto err;
10599         tble = tunnel_mark_decode(dev, m->hash.fdir.hi);
10600         if (!tble) {
10601                 DRV_LOG(DEBUG, "port %u invalid miss tunnel mark %#x",
10602                         dev->data->port_id, m->hash.fdir.hi);
10603                 goto err;
10604         }
10605         MLX5_ASSERT(tble->tunnel);
10606         memcpy(&info->tunnel, &tble->tunnel->app_tunnel, sizeof(info->tunnel));
10607         info->group_id = tble->group_id;
10608         info->flags = RTE_FLOW_RESTORE_INFO_TUNNEL |
10609                       RTE_FLOW_RESTORE_INFO_GROUP_ID |
10610                       RTE_FLOW_RESTORE_INFO_ENCAPSULATED;
10611
10612         return 0;
10613
10614 err:
10615         return rte_flow_error_set(err, EINVAL,
10616                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
10617                                   "failed to get restore info");
10618 }
10619
10620 #else /* HAVE_IBV_FLOW_DV_SUPPORT */
10621 static int
10622 mlx5_flow_tunnel_decap_set(__rte_unused struct rte_eth_dev *dev,
10623                            __rte_unused struct rte_flow_tunnel *app_tunnel,
10624                            __rte_unused struct rte_flow_action **actions,
10625                            __rte_unused uint32_t *num_of_actions,
10626                            __rte_unused struct rte_flow_error *error)
10627 {
10628         return -ENOTSUP;
10629 }
10630
10631 static int
10632 mlx5_flow_tunnel_match(__rte_unused struct rte_eth_dev *dev,
10633                        __rte_unused struct rte_flow_tunnel *app_tunnel,
10634                        __rte_unused struct rte_flow_item **items,
10635                        __rte_unused uint32_t *num_of_items,
10636                        __rte_unused struct rte_flow_error *error)
10637 {
10638         return -ENOTSUP;
10639 }
10640
10641 static int
10642 mlx5_flow_tunnel_item_release(__rte_unused struct rte_eth_dev *dev,
10643                               __rte_unused struct rte_flow_item *pmd_items,
10644                               __rte_unused uint32_t num_items,
10645                               __rte_unused struct rte_flow_error *err)
10646 {
10647         return -ENOTSUP;
10648 }
10649
10650 static int
10651 mlx5_flow_tunnel_action_release(__rte_unused struct rte_eth_dev *dev,
10652                                 __rte_unused struct rte_flow_action *pmd_action,
10653                                 __rte_unused uint32_t num_actions,
10654                                 __rte_unused struct rte_flow_error *err)
10655 {
10656         return -ENOTSUP;
10657 }
10658
10659 static int
10660 mlx5_flow_tunnel_get_restore_info(__rte_unused struct rte_eth_dev *dev,
10661                                   __rte_unused struct rte_mbuf *m,
10662                                   __rte_unused struct rte_flow_restore_info *i,
10663                                   __rte_unused struct rte_flow_error *err)
10664 {
10665         return -ENOTSUP;
10666 }
10667
10668 static int
10669 flow_tunnel_add_default_miss(__rte_unused struct rte_eth_dev *dev,
10670                              __rte_unused struct rte_flow *flow,
10671                              __rte_unused const struct rte_flow_attr *attr,
10672                              __rte_unused const struct rte_flow_action *actions,
10673                              __rte_unused uint32_t flow_idx,
10674                              __rte_unused const struct mlx5_flow_tunnel *tunnel,
10675                              __rte_unused struct tunnel_default_miss_ctx *ctx,
10676                              __rte_unused struct rte_flow_error *error)
10677 {
10678         return -ENOTSUP;
10679 }
10680
10681 static struct mlx5_flow_tunnel *
10682 mlx5_find_tunnel_id(__rte_unused struct rte_eth_dev *dev,
10683                     __rte_unused uint32_t id)
10684 {
10685         return NULL;
10686 }
10687
10688 static void
10689 mlx5_flow_tunnel_free(__rte_unused struct rte_eth_dev *dev,
10690                       __rte_unused struct mlx5_flow_tunnel *tunnel)
10691 {
10692 }
10693
10694 static uint32_t
10695 tunnel_flow_group_to_flow_table(__rte_unused struct rte_eth_dev *dev,
10696                                 __rte_unused const struct mlx5_flow_tunnel *t,
10697                                 __rte_unused uint32_t group,
10698                                 __rte_unused uint32_t *table,
10699                                 struct rte_flow_error *error)
10700 {
10701         return rte_flow_error_set(error, ENOTSUP,
10702                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
10703                                   "tunnel offload requires DV support");
10704 }
10705
10706 void
10707 mlx5_release_tunnel_hub(__rte_unused struct mlx5_dev_ctx_shared *sh,
10708                         __rte_unused  uint16_t port_id)
10709 {
10710 }
10711 #endif /* HAVE_IBV_FLOW_DV_SUPPORT */
10712
10713 /* Flex flow item API */
10714 static struct rte_flow_item_flex_handle *
10715 mlx5_flow_flex_item_create(struct rte_eth_dev *dev,
10716                            const struct rte_flow_item_flex_conf *conf,
10717                            struct rte_flow_error *error)
10718 {
10719         static const char err_msg[] = "flex item creation unsupported";
10720         struct mlx5_priv *priv = dev->data->dev_private;
10721         struct rte_flow_attr attr = { .transfer = 0 };
10722         const struct mlx5_flow_driver_ops *fops =
10723                         flow_get_drv_ops(flow_get_drv_type(dev, &attr));
10724
10725         if (!priv->pci_dev) {
10726                 rte_flow_error_set(error, ENOTSUP,
10727                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
10728                                    "create flex item on PF only");
10729                 return NULL;
10730         }
10731         switch (priv->pci_dev->id.device_id) {
10732         case PCI_DEVICE_ID_MELLANOX_CONNECTX6DXBF:
10733         case PCI_DEVICE_ID_MELLANOX_CONNECTX7BF:
10734                 break;
10735         default:
10736                 rte_flow_error_set(error, ENOTSUP,
10737                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
10738                                    "flex item available on BlueField ports only");
10739                 return NULL;
10740         }
10741         if (!fops->item_create) {
10742                 DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
10743                 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
10744                                    NULL, err_msg);
10745                 return NULL;
10746         }
10747         return fops->item_create(dev, conf, error);
10748 }
10749
10750 static int
10751 mlx5_flow_flex_item_release(struct rte_eth_dev *dev,
10752                             const struct rte_flow_item_flex_handle *handle,
10753                             struct rte_flow_error *error)
10754 {
10755         static const char err_msg[] = "flex item release unsupported";
10756         struct rte_flow_attr attr = { .transfer = 0 };
10757         const struct mlx5_flow_driver_ops *fops =
10758                         flow_get_drv_ops(flow_get_drv_type(dev, &attr));
10759
10760         if (!fops->item_release) {
10761                 DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
10762                 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
10763                                    NULL, err_msg);
10764                 return -rte_errno;
10765         }
10766         return fops->item_release(dev, handle, error);
10767 }
10768
10769 static void
10770 mlx5_dbg__print_pattern(const struct rte_flow_item *item)
10771 {
10772         int ret;
10773         struct rte_flow_error error;
10774
10775         for (; item->type != RTE_FLOW_ITEM_TYPE_END; item++) {
10776                 char *item_name;
10777                 ret = rte_flow_conv(RTE_FLOW_CONV_OP_ITEM_NAME_PTR, &item_name,
10778                                     sizeof(item_name),
10779                                     (void *)(uintptr_t)item->type, &error);
10780                 if (ret > 0)
10781                         printf("%s ", item_name);
10782                 else
10783                         printf("%d\n", (int)item->type);
10784         }
10785         printf("END\n");
10786 }
10787
10788 static int
10789 mlx5_flow_is_std_vxlan_port(const struct rte_flow_item *udp_item)
10790 {
10791         const struct rte_flow_item_udp *spec = udp_item->spec;
10792         const struct rte_flow_item_udp *mask = udp_item->mask;
10793         uint16_t udp_dport = 0;
10794
10795         if (spec != NULL) {
10796                 if (!mask)
10797                         mask = &rte_flow_item_udp_mask;
10798                 udp_dport = rte_be_to_cpu_16(spec->hdr.dst_port &
10799                                 mask->hdr.dst_port);
10800         }
10801         return (!udp_dport || udp_dport == MLX5_UDP_PORT_VXLAN);
10802 }
10803
10804 static const struct mlx5_flow_expand_node *
10805 mlx5_flow_expand_rss_adjust_node(const struct rte_flow_item *pattern,
10806                 unsigned int item_idx,
10807                 const struct mlx5_flow_expand_node graph[],
10808                 const struct mlx5_flow_expand_node *node)
10809 {
10810         const struct rte_flow_item *item = pattern + item_idx, *prev_item;
10811
10812         if (item->type == RTE_FLOW_ITEM_TYPE_VXLAN &&
10813                         node != NULL &&
10814                         node->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
10815                 /*
10816                  * The expansion node is VXLAN and it is also the last
10817                  * expandable item in the pattern, so need to continue
10818                  * expansion of the inner tunnel.
10819                  */
10820                 MLX5_ASSERT(item_idx > 0);
10821                 prev_item = pattern + item_idx - 1;
10822                 MLX5_ASSERT(prev_item->type == RTE_FLOW_ITEM_TYPE_UDP);
10823                 if (mlx5_flow_is_std_vxlan_port(prev_item))
10824                         return &graph[MLX5_EXPANSION_STD_VXLAN];
10825                 return &graph[MLX5_EXPANSION_L3_VXLAN];
10826         }
10827         return node;
10828 }
10829
10830 /* Map of Verbs to Flow priority with 8 Verbs priorities. */
10831 static const uint32_t priority_map_3[][MLX5_PRIORITY_MAP_MAX] = {
10832         { 0, 1, 2 }, { 2, 3, 4 }, { 5, 6, 7 },
10833 };
10834
10835 /* Map of Verbs to Flow priority with 16 Verbs priorities. */
10836 static const uint32_t priority_map_5[][MLX5_PRIORITY_MAP_MAX] = {
10837         { 0, 1, 2 }, { 3, 4, 5 }, { 6, 7, 8 },
10838         { 9, 10, 11 }, { 12, 13, 14 },
10839 };
10840
10841 /**
10842  * Discover the number of available flow priorities.
10843  *
10844  * @param dev
10845  *   Ethernet device.
10846  *
10847  * @return
10848  *   On success, number of available flow priorities.
10849  *   On failure, a negative errno-style code and rte_errno is set.
10850  */
10851 int
10852 mlx5_flow_discover_priorities(struct rte_eth_dev *dev)
10853 {
10854         static const uint16_t vprio[] = {8, 16};
10855         const struct mlx5_priv *priv = dev->data->dev_private;
10856         const struct mlx5_flow_driver_ops *fops;
10857         enum mlx5_flow_drv_type type;
10858         int ret;
10859
10860         type = mlx5_flow_os_get_type();
10861         if (type == MLX5_FLOW_TYPE_MAX) {
10862                 type = MLX5_FLOW_TYPE_VERBS;
10863                 if (priv->sh->cdev->config.devx && priv->sh->config.dv_flow_en)
10864                         type = MLX5_FLOW_TYPE_DV;
10865         }
10866         fops = flow_get_drv_ops(type);
10867         if (fops->discover_priorities == NULL) {
10868                 DRV_LOG(ERR, "Priority discovery not supported");
10869                 rte_errno = ENOTSUP;
10870                 return -rte_errno;
10871         }
10872         ret = fops->discover_priorities(dev, vprio, RTE_DIM(vprio));
10873         if (ret < 0)
10874                 return ret;
10875         switch (ret) {
10876         case 8:
10877                 ret = RTE_DIM(priority_map_3);
10878                 break;
10879         case 16:
10880                 ret = RTE_DIM(priority_map_5);
10881                 break;
10882         default:
10883                 rte_errno = ENOTSUP;
10884                 DRV_LOG(ERR,
10885                         "port %u maximum priority: %d expected 8/16",
10886                         dev->data->port_id, ret);
10887                 return -rte_errno;
10888         }
10889         DRV_LOG(INFO, "port %u supported flow priorities:"
10890                 " 0-%d for ingress or egress root table,"
10891                 " 0-%d for non-root table or transfer root table.",
10892                 dev->data->port_id, ret - 2,
10893                 MLX5_NON_ROOT_FLOW_MAX_PRIO - 1);
10894         return ret;
10895 }
10896
10897 /**
10898  * Adjust flow priority based on the highest layer and the request priority.
10899  *
10900  * @param[in] dev
10901  *   Pointer to the Ethernet device structure.
10902  * @param[in] priority
10903  *   The rule base priority.
10904  * @param[in] subpriority
10905  *   The priority based on the items.
10906  *
10907  * @return
10908  *   The new priority.
10909  */
10910 uint32_t
10911 mlx5_flow_adjust_priority(struct rte_eth_dev *dev, int32_t priority,
10912                           uint32_t subpriority)
10913 {
10914         uint32_t res = 0;
10915         struct mlx5_priv *priv = dev->data->dev_private;
10916
10917         switch (priv->sh->flow_max_priority) {
10918         case RTE_DIM(priority_map_3):
10919                 res = priority_map_3[priority][subpriority];
10920                 break;
10921         case RTE_DIM(priority_map_5):
10922                 res = priority_map_5[priority][subpriority];
10923                 break;
10924         }
10925         return  res;
10926 }