net/mlx5: validate MPLSoGRE with GRE key
[dpdk.git] / drivers / net / mlx5 / mlx5_flow.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2016 6WIND S.A.
3  * Copyright 2016 Mellanox Technologies, Ltd
4  */
5
6 #include <netinet/in.h>
7 #include <sys/queue.h>
8 #include <stdalign.h>
9 #include <stdint.h>
10 #include <string.h>
11 #include <stdbool.h>
12
13 #include <rte_common.h>
14 #include <rte_ether.h>
15 #include <rte_ethdev_driver.h>
16 #include <rte_eal_paging.h>
17 #include <rte_flow.h>
18 #include <rte_cycles.h>
19 #include <rte_flow_driver.h>
20 #include <rte_malloc.h>
21 #include <rte_ip.h>
22
23 #include <mlx5_glue.h>
24 #include <mlx5_devx_cmds.h>
25 #include <mlx5_prm.h>
26 #include <mlx5_malloc.h>
27
28 #include "mlx5_defs.h"
29 #include "mlx5.h"
30 #include "mlx5_flow.h"
31 #include "mlx5_flow_os.h"
32 #include "mlx5_rxtx.h"
33 #include "mlx5_common_os.h"
34 #include "rte_pmd_mlx5.h"
35
36 static struct mlx5_flow_tunnel *
37 mlx5_find_tunnel_id(struct rte_eth_dev *dev, uint32_t id);
38 static void
39 mlx5_flow_tunnel_free(struct rte_eth_dev *dev, struct mlx5_flow_tunnel *tunnel);
40 static const struct mlx5_flow_tbl_data_entry  *
41 tunnel_mark_decode(struct rte_eth_dev *dev, uint32_t mark);
42 static int
43 mlx5_get_flow_tunnel(struct rte_eth_dev *dev,
44                      const struct rte_flow_tunnel *app_tunnel,
45                      struct mlx5_flow_tunnel **tunnel);
46
47
48 /** Device flow drivers. */
49 extern const struct mlx5_flow_driver_ops mlx5_flow_verbs_drv_ops;
50
51 const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops;
52
53 const struct mlx5_flow_driver_ops *flow_drv_ops[] = {
54         [MLX5_FLOW_TYPE_MIN] = &mlx5_flow_null_drv_ops,
55 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
56         [MLX5_FLOW_TYPE_DV] = &mlx5_flow_dv_drv_ops,
57 #endif
58         [MLX5_FLOW_TYPE_VERBS] = &mlx5_flow_verbs_drv_ops,
59         [MLX5_FLOW_TYPE_MAX] = &mlx5_flow_null_drv_ops
60 };
61
62 /** Helper macro to build input graph for mlx5_flow_expand_rss(). */
63 #define MLX5_FLOW_EXPAND_RSS_NEXT(...) \
64         (const int []){ \
65                 __VA_ARGS__, 0, \
66         }
67
68 /** Node object of input graph for mlx5_flow_expand_rss(). */
69 struct mlx5_flow_expand_node {
70         const int *const next;
71         /**<
72          * List of next node indexes. Index 0 is interpreted as a terminator.
73          */
74         const enum rte_flow_item_type type;
75         /**< Pattern item type of current node. */
76         uint64_t rss_types;
77         /**<
78          * RSS types bit-field associated with this node
79          * (see ETH_RSS_* definitions).
80          */
81 };
82
83 /** Object returned by mlx5_flow_expand_rss(). */
84 struct mlx5_flow_expand_rss {
85         uint32_t entries;
86         /**< Number of entries @p patterns and @p priorities. */
87         struct {
88                 struct rte_flow_item *pattern; /**< Expanded pattern array. */
89                 uint32_t priority; /**< Priority offset for each expansion. */
90         } entry[];
91 };
92
93 static enum rte_flow_item_type
94 mlx5_flow_expand_rss_item_complete(const struct rte_flow_item *item)
95 {
96         enum rte_flow_item_type ret = RTE_FLOW_ITEM_TYPE_VOID;
97         uint16_t ether_type = 0;
98         uint16_t ether_type_m;
99         uint8_t ip_next_proto = 0;
100         uint8_t ip_next_proto_m;
101
102         if (item == NULL || item->spec == NULL)
103                 return ret;
104         switch (item->type) {
105         case RTE_FLOW_ITEM_TYPE_ETH:
106                 if (item->mask)
107                         ether_type_m = ((const struct rte_flow_item_eth *)
108                                                 (item->mask))->type;
109                 else
110                         ether_type_m = rte_flow_item_eth_mask.type;
111                 if (ether_type_m != RTE_BE16(0xFFFF))
112                         break;
113                 ether_type = ((const struct rte_flow_item_eth *)
114                                 (item->spec))->type;
115                 if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_IPV4)
116                         ret = RTE_FLOW_ITEM_TYPE_IPV4;
117                 else if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_IPV6)
118                         ret = RTE_FLOW_ITEM_TYPE_IPV6;
119                 else if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_VLAN)
120                         ret = RTE_FLOW_ITEM_TYPE_VLAN;
121                 else
122                         ret = RTE_FLOW_ITEM_TYPE_END;
123                 break;
124         case RTE_FLOW_ITEM_TYPE_VLAN:
125                 if (item->mask)
126                         ether_type_m = ((const struct rte_flow_item_vlan *)
127                                                 (item->mask))->inner_type;
128                 else
129                         ether_type_m = rte_flow_item_vlan_mask.inner_type;
130                 if (ether_type_m != RTE_BE16(0xFFFF))
131                         break;
132                 ether_type = ((const struct rte_flow_item_vlan *)
133                                 (item->spec))->inner_type;
134                 if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_IPV4)
135                         ret = RTE_FLOW_ITEM_TYPE_IPV4;
136                 else if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_IPV6)
137                         ret = RTE_FLOW_ITEM_TYPE_IPV6;
138                 else if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_VLAN)
139                         ret = RTE_FLOW_ITEM_TYPE_VLAN;
140                 else
141                         ret = RTE_FLOW_ITEM_TYPE_END;
142                 break;
143         case RTE_FLOW_ITEM_TYPE_IPV4:
144                 if (item->mask)
145                         ip_next_proto_m = ((const struct rte_flow_item_ipv4 *)
146                                         (item->mask))->hdr.next_proto_id;
147                 else
148                         ip_next_proto_m =
149                                 rte_flow_item_ipv4_mask.hdr.next_proto_id;
150                 if (ip_next_proto_m != 0xFF)
151                         break;
152                 ip_next_proto = ((const struct rte_flow_item_ipv4 *)
153                                 (item->spec))->hdr.next_proto_id;
154                 if (ip_next_proto == IPPROTO_UDP)
155                         ret = RTE_FLOW_ITEM_TYPE_UDP;
156                 else if (ip_next_proto == IPPROTO_TCP)
157                         ret = RTE_FLOW_ITEM_TYPE_TCP;
158                 else if (ip_next_proto == IPPROTO_IP)
159                         ret = RTE_FLOW_ITEM_TYPE_IPV4;
160                 else if (ip_next_proto == IPPROTO_IPV6)
161                         ret = RTE_FLOW_ITEM_TYPE_IPV6;
162                 else
163                         ret = RTE_FLOW_ITEM_TYPE_END;
164                 break;
165         case RTE_FLOW_ITEM_TYPE_IPV6:
166                 if (item->mask)
167                         ip_next_proto_m = ((const struct rte_flow_item_ipv6 *)
168                                                 (item->mask))->hdr.proto;
169                 else
170                         ip_next_proto_m =
171                                 rte_flow_item_ipv6_mask.hdr.proto;
172                 if (ip_next_proto_m != 0xFF)
173                         break;
174                 ip_next_proto = ((const struct rte_flow_item_ipv6 *)
175                                 (item->spec))->hdr.proto;
176                 if (ip_next_proto == IPPROTO_UDP)
177                         ret = RTE_FLOW_ITEM_TYPE_UDP;
178                 else if (ip_next_proto == IPPROTO_TCP)
179                         ret = RTE_FLOW_ITEM_TYPE_TCP;
180                 else if (ip_next_proto == IPPROTO_IP)
181                         ret = RTE_FLOW_ITEM_TYPE_IPV4;
182                 else if (ip_next_proto == IPPROTO_IPV6)
183                         ret = RTE_FLOW_ITEM_TYPE_IPV6;
184                 else
185                         ret = RTE_FLOW_ITEM_TYPE_END;
186                 break;
187         default:
188                 ret = RTE_FLOW_ITEM_TYPE_VOID;
189                 break;
190         }
191         return ret;
192 }
193
194 /**
195  * Expand RSS flows into several possible flows according to the RSS hash
196  * fields requested and the driver capabilities.
197  *
198  * @param[out] buf
199  *   Buffer to store the result expansion.
200  * @param[in] size
201  *   Buffer size in bytes. If 0, @p buf can be NULL.
202  * @param[in] pattern
203  *   User flow pattern.
204  * @param[in] types
205  *   RSS types to expand (see ETH_RSS_* definitions).
206  * @param[in] graph
207  *   Input graph to expand @p pattern according to @p types.
208  * @param[in] graph_root_index
209  *   Index of root node in @p graph, typically 0.
210  *
211  * @return
212  *   A positive value representing the size of @p buf in bytes regardless of
213  *   @p size on success, a negative errno value otherwise and rte_errno is
214  *   set, the following errors are defined:
215  *
216  *   -E2BIG: graph-depth @p graph is too deep.
217  */
218 static int
219 mlx5_flow_expand_rss(struct mlx5_flow_expand_rss *buf, size_t size,
220                      const struct rte_flow_item *pattern, uint64_t types,
221                      const struct mlx5_flow_expand_node graph[],
222                      int graph_root_index)
223 {
224         const int elt_n = 8;
225         const struct rte_flow_item *item;
226         const struct mlx5_flow_expand_node *node = &graph[graph_root_index];
227         const int *next_node;
228         const int *stack[elt_n];
229         int stack_pos = 0;
230         struct rte_flow_item flow_items[elt_n];
231         unsigned int i;
232         size_t lsize;
233         size_t user_pattern_size = 0;
234         void *addr = NULL;
235         const struct mlx5_flow_expand_node *next = NULL;
236         struct rte_flow_item missed_item;
237         int missed = 0;
238         int elt = 0;
239         const struct rte_flow_item *last_item = NULL;
240
241         memset(&missed_item, 0, sizeof(missed_item));
242         lsize = offsetof(struct mlx5_flow_expand_rss, entry) +
243                 elt_n * sizeof(buf->entry[0]);
244         if (lsize <= size) {
245                 buf->entry[0].priority = 0;
246                 buf->entry[0].pattern = (void *)&buf->entry[elt_n];
247                 buf->entries = 0;
248                 addr = buf->entry[0].pattern;
249         }
250         for (item = pattern; item->type != RTE_FLOW_ITEM_TYPE_END; item++) {
251                 if (item->type != RTE_FLOW_ITEM_TYPE_VOID)
252                         last_item = item;
253                 for (i = 0; node->next && node->next[i]; ++i) {
254                         next = &graph[node->next[i]];
255                         if (next->type == item->type)
256                                 break;
257                 }
258                 if (next)
259                         node = next;
260                 user_pattern_size += sizeof(*item);
261         }
262         user_pattern_size += sizeof(*item); /* Handle END item. */
263         lsize += user_pattern_size;
264         /* Copy the user pattern in the first entry of the buffer. */
265         if (lsize <= size) {
266                 rte_memcpy(addr, pattern, user_pattern_size);
267                 addr = (void *)(((uintptr_t)addr) + user_pattern_size);
268                 buf->entries = 1;
269         }
270         /* Start expanding. */
271         memset(flow_items, 0, sizeof(flow_items));
272         user_pattern_size -= sizeof(*item);
273         /*
274          * Check if the last valid item has spec set, need complete pattern,
275          * and the pattern can be used for expansion.
276          */
277         missed_item.type = mlx5_flow_expand_rss_item_complete(last_item);
278         if (missed_item.type == RTE_FLOW_ITEM_TYPE_END) {
279                 /* Item type END indicates expansion is not required. */
280                 return lsize;
281         }
282         if (missed_item.type != RTE_FLOW_ITEM_TYPE_VOID) {
283                 next = NULL;
284                 missed = 1;
285                 for (i = 0; node->next && node->next[i]; ++i) {
286                         next = &graph[node->next[i]];
287                         if (next->type == missed_item.type) {
288                                 flow_items[0].type = missed_item.type;
289                                 flow_items[1].type = RTE_FLOW_ITEM_TYPE_END;
290                                 break;
291                         }
292                         next = NULL;
293                 }
294         }
295         if (next && missed) {
296                 elt = 2; /* missed item + item end. */
297                 node = next;
298                 lsize += elt * sizeof(*item) + user_pattern_size;
299                 if ((node->rss_types & types) && lsize <= size) {
300                         buf->entry[buf->entries].priority = 1;
301                         buf->entry[buf->entries].pattern = addr;
302                         buf->entries++;
303                         rte_memcpy(addr, buf->entry[0].pattern,
304                                    user_pattern_size);
305                         addr = (void *)(((uintptr_t)addr) + user_pattern_size);
306                         rte_memcpy(addr, flow_items, elt * sizeof(*item));
307                         addr = (void *)(((uintptr_t)addr) +
308                                         elt * sizeof(*item));
309                 }
310         }
311         memset(flow_items, 0, sizeof(flow_items));
312         next_node = node->next;
313         stack[stack_pos] = next_node;
314         node = next_node ? &graph[*next_node] : NULL;
315         while (node) {
316                 flow_items[stack_pos].type = node->type;
317                 if (node->rss_types & types) {
318                         /*
319                          * compute the number of items to copy from the
320                          * expansion and copy it.
321                          * When the stack_pos is 0, there are 1 element in it,
322                          * plus the addition END item.
323                          */
324                         elt = stack_pos + 2;
325                         flow_items[stack_pos + 1].type = RTE_FLOW_ITEM_TYPE_END;
326                         lsize += elt * sizeof(*item) + user_pattern_size;
327                         if (lsize <= size) {
328                                 size_t n = elt * sizeof(*item);
329
330                                 buf->entry[buf->entries].priority =
331                                         stack_pos + 1 + missed;
332                                 buf->entry[buf->entries].pattern = addr;
333                                 buf->entries++;
334                                 rte_memcpy(addr, buf->entry[0].pattern,
335                                            user_pattern_size);
336                                 addr = (void *)(((uintptr_t)addr) +
337                                                 user_pattern_size);
338                                 rte_memcpy(addr, &missed_item,
339                                            missed * sizeof(*item));
340                                 addr = (void *)(((uintptr_t)addr) +
341                                         missed * sizeof(*item));
342                                 rte_memcpy(addr, flow_items, n);
343                                 addr = (void *)(((uintptr_t)addr) + n);
344                         }
345                 }
346                 /* Go deeper. */
347                 if (node->next) {
348                         next_node = node->next;
349                         if (stack_pos++ == elt_n) {
350                                 rte_errno = E2BIG;
351                                 return -rte_errno;
352                         }
353                         stack[stack_pos] = next_node;
354                 } else if (*(next_node + 1)) {
355                         /* Follow up with the next possibility. */
356                         ++next_node;
357                 } else {
358                         /* Move to the next path. */
359                         if (stack_pos)
360                                 next_node = stack[--stack_pos];
361                         next_node++;
362                         stack[stack_pos] = next_node;
363                 }
364                 node = *next_node ? &graph[*next_node] : NULL;
365         };
366         /* no expanded flows but we have missed item, create one rule for it */
367         if (buf->entries == 1 && missed != 0) {
368                 elt = 2;
369                 lsize += elt * sizeof(*item) + user_pattern_size;
370                 if (lsize <= size) {
371                         buf->entry[buf->entries].priority = 1;
372                         buf->entry[buf->entries].pattern = addr;
373                         buf->entries++;
374                         flow_items[0].type = missed_item.type;
375                         flow_items[1].type = RTE_FLOW_ITEM_TYPE_END;
376                         rte_memcpy(addr, buf->entry[0].pattern,
377                                    user_pattern_size);
378                         addr = (void *)(((uintptr_t)addr) + user_pattern_size);
379                         rte_memcpy(addr, flow_items, elt * sizeof(*item));
380                         addr = (void *)(((uintptr_t)addr) +
381                                         elt * sizeof(*item));
382                 }
383         }
384         return lsize;
385 }
386
387 enum mlx5_expansion {
388         MLX5_EXPANSION_ROOT,
389         MLX5_EXPANSION_ROOT_OUTER,
390         MLX5_EXPANSION_ROOT_ETH_VLAN,
391         MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN,
392         MLX5_EXPANSION_OUTER_ETH,
393         MLX5_EXPANSION_OUTER_ETH_VLAN,
394         MLX5_EXPANSION_OUTER_VLAN,
395         MLX5_EXPANSION_OUTER_IPV4,
396         MLX5_EXPANSION_OUTER_IPV4_UDP,
397         MLX5_EXPANSION_OUTER_IPV4_TCP,
398         MLX5_EXPANSION_OUTER_IPV6,
399         MLX5_EXPANSION_OUTER_IPV6_UDP,
400         MLX5_EXPANSION_OUTER_IPV6_TCP,
401         MLX5_EXPANSION_VXLAN,
402         MLX5_EXPANSION_VXLAN_GPE,
403         MLX5_EXPANSION_GRE,
404         MLX5_EXPANSION_MPLS,
405         MLX5_EXPANSION_ETH,
406         MLX5_EXPANSION_ETH_VLAN,
407         MLX5_EXPANSION_VLAN,
408         MLX5_EXPANSION_IPV4,
409         MLX5_EXPANSION_IPV4_UDP,
410         MLX5_EXPANSION_IPV4_TCP,
411         MLX5_EXPANSION_IPV6,
412         MLX5_EXPANSION_IPV6_UDP,
413         MLX5_EXPANSION_IPV6_TCP,
414 };
415
416 /** Supported expansion of items. */
417 static const struct mlx5_flow_expand_node mlx5_support_expansion[] = {
418         [MLX5_EXPANSION_ROOT] = {
419                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
420                                                   MLX5_EXPANSION_IPV4,
421                                                   MLX5_EXPANSION_IPV6),
422                 .type = RTE_FLOW_ITEM_TYPE_END,
423         },
424         [MLX5_EXPANSION_ROOT_OUTER] = {
425                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_ETH,
426                                                   MLX5_EXPANSION_OUTER_IPV4,
427                                                   MLX5_EXPANSION_OUTER_IPV6),
428                 .type = RTE_FLOW_ITEM_TYPE_END,
429         },
430         [MLX5_EXPANSION_ROOT_ETH_VLAN] = {
431                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH_VLAN),
432                 .type = RTE_FLOW_ITEM_TYPE_END,
433         },
434         [MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN] = {
435                 .next = MLX5_FLOW_EXPAND_RSS_NEXT
436                                                 (MLX5_EXPANSION_OUTER_ETH_VLAN),
437                 .type = RTE_FLOW_ITEM_TYPE_END,
438         },
439         [MLX5_EXPANSION_OUTER_ETH] = {
440                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_IPV4,
441                                                   MLX5_EXPANSION_OUTER_IPV6,
442                                                   MLX5_EXPANSION_MPLS),
443                 .type = RTE_FLOW_ITEM_TYPE_ETH,
444                 .rss_types = 0,
445         },
446         [MLX5_EXPANSION_OUTER_ETH_VLAN] = {
447                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_VLAN),
448                 .type = RTE_FLOW_ITEM_TYPE_ETH,
449                 .rss_types = 0,
450         },
451         [MLX5_EXPANSION_OUTER_VLAN] = {
452                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_IPV4,
453                                                   MLX5_EXPANSION_OUTER_IPV6),
454                 .type = RTE_FLOW_ITEM_TYPE_VLAN,
455         },
456         [MLX5_EXPANSION_OUTER_IPV4] = {
457                 .next = MLX5_FLOW_EXPAND_RSS_NEXT
458                         (MLX5_EXPANSION_OUTER_IPV4_UDP,
459                          MLX5_EXPANSION_OUTER_IPV4_TCP,
460                          MLX5_EXPANSION_GRE,
461                          MLX5_EXPANSION_IPV4,
462                          MLX5_EXPANSION_IPV6),
463                 .type = RTE_FLOW_ITEM_TYPE_IPV4,
464                 .rss_types = ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
465                         ETH_RSS_NONFRAG_IPV4_OTHER,
466         },
467         [MLX5_EXPANSION_OUTER_IPV4_UDP] = {
468                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN,
469                                                   MLX5_EXPANSION_VXLAN_GPE),
470                 .type = RTE_FLOW_ITEM_TYPE_UDP,
471                 .rss_types = ETH_RSS_NONFRAG_IPV4_UDP,
472         },
473         [MLX5_EXPANSION_OUTER_IPV4_TCP] = {
474                 .type = RTE_FLOW_ITEM_TYPE_TCP,
475                 .rss_types = ETH_RSS_NONFRAG_IPV4_TCP,
476         },
477         [MLX5_EXPANSION_OUTER_IPV6] = {
478                 .next = MLX5_FLOW_EXPAND_RSS_NEXT
479                         (MLX5_EXPANSION_OUTER_IPV6_UDP,
480                          MLX5_EXPANSION_OUTER_IPV6_TCP,
481                          MLX5_EXPANSION_IPV4,
482                          MLX5_EXPANSION_IPV6),
483                 .type = RTE_FLOW_ITEM_TYPE_IPV6,
484                 .rss_types = ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 |
485                         ETH_RSS_NONFRAG_IPV6_OTHER,
486         },
487         [MLX5_EXPANSION_OUTER_IPV6_UDP] = {
488                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN,
489                                                   MLX5_EXPANSION_VXLAN_GPE),
490                 .type = RTE_FLOW_ITEM_TYPE_UDP,
491                 .rss_types = ETH_RSS_NONFRAG_IPV6_UDP,
492         },
493         [MLX5_EXPANSION_OUTER_IPV6_TCP] = {
494                 .type = RTE_FLOW_ITEM_TYPE_TCP,
495                 .rss_types = ETH_RSS_NONFRAG_IPV6_TCP,
496         },
497         [MLX5_EXPANSION_VXLAN] = {
498                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
499                                                   MLX5_EXPANSION_IPV4,
500                                                   MLX5_EXPANSION_IPV6),
501                 .type = RTE_FLOW_ITEM_TYPE_VXLAN,
502         },
503         [MLX5_EXPANSION_VXLAN_GPE] = {
504                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
505                                                   MLX5_EXPANSION_IPV4,
506                                                   MLX5_EXPANSION_IPV6),
507                 .type = RTE_FLOW_ITEM_TYPE_VXLAN_GPE,
508         },
509         [MLX5_EXPANSION_GRE] = {
510                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4),
511                 .type = RTE_FLOW_ITEM_TYPE_GRE,
512         },
513         [MLX5_EXPANSION_MPLS] = {
514                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
515                                                   MLX5_EXPANSION_IPV6),
516                 .type = RTE_FLOW_ITEM_TYPE_MPLS,
517         },
518         [MLX5_EXPANSION_ETH] = {
519                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
520                                                   MLX5_EXPANSION_IPV6),
521                 .type = RTE_FLOW_ITEM_TYPE_ETH,
522         },
523         [MLX5_EXPANSION_ETH_VLAN] = {
524                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VLAN),
525                 .type = RTE_FLOW_ITEM_TYPE_ETH,
526         },
527         [MLX5_EXPANSION_VLAN] = {
528                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
529                                                   MLX5_EXPANSION_IPV6),
530                 .type = RTE_FLOW_ITEM_TYPE_VLAN,
531         },
532         [MLX5_EXPANSION_IPV4] = {
533                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4_UDP,
534                                                   MLX5_EXPANSION_IPV4_TCP),
535                 .type = RTE_FLOW_ITEM_TYPE_IPV4,
536                 .rss_types = ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
537                         ETH_RSS_NONFRAG_IPV4_OTHER,
538         },
539         [MLX5_EXPANSION_IPV4_UDP] = {
540                 .type = RTE_FLOW_ITEM_TYPE_UDP,
541                 .rss_types = ETH_RSS_NONFRAG_IPV4_UDP,
542         },
543         [MLX5_EXPANSION_IPV4_TCP] = {
544                 .type = RTE_FLOW_ITEM_TYPE_TCP,
545                 .rss_types = ETH_RSS_NONFRAG_IPV4_TCP,
546         },
547         [MLX5_EXPANSION_IPV6] = {
548                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV6_UDP,
549                                                   MLX5_EXPANSION_IPV6_TCP),
550                 .type = RTE_FLOW_ITEM_TYPE_IPV6,
551                 .rss_types = ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 |
552                         ETH_RSS_NONFRAG_IPV6_OTHER,
553         },
554         [MLX5_EXPANSION_IPV6_UDP] = {
555                 .type = RTE_FLOW_ITEM_TYPE_UDP,
556                 .rss_types = ETH_RSS_NONFRAG_IPV6_UDP,
557         },
558         [MLX5_EXPANSION_IPV6_TCP] = {
559                 .type = RTE_FLOW_ITEM_TYPE_TCP,
560                 .rss_types = ETH_RSS_NONFRAG_IPV6_TCP,
561         },
562 };
563
564 static struct rte_flow_shared_action *
565 mlx5_shared_action_create(struct rte_eth_dev *dev,
566                           const struct rte_flow_shared_action_conf *conf,
567                           const struct rte_flow_action *action,
568                           struct rte_flow_error *error);
569 static int mlx5_shared_action_destroy
570                                 (struct rte_eth_dev *dev,
571                                  struct rte_flow_shared_action *shared_action,
572                                  struct rte_flow_error *error);
573 static int mlx5_shared_action_update
574                                 (struct rte_eth_dev *dev,
575                                  struct rte_flow_shared_action *shared_action,
576                                  const struct rte_flow_action *action,
577                                  struct rte_flow_error *error);
578 static int mlx5_shared_action_query
579                                 (struct rte_eth_dev *dev,
580                                  const struct rte_flow_shared_action *action,
581                                  void *data,
582                                  struct rte_flow_error *error);
583 static inline bool
584 mlx5_flow_tunnel_validate(struct rte_eth_dev *dev,
585                           struct rte_flow_tunnel *tunnel,
586                           const char *err_msg)
587 {
588         err_msg = NULL;
589         if (!is_tunnel_offload_active(dev)) {
590                 err_msg = "tunnel offload was not activated";
591                 goto out;
592         } else if (!tunnel) {
593                 err_msg = "no application tunnel";
594                 goto out;
595         }
596
597         switch (tunnel->type) {
598         default:
599                 err_msg = "unsupported tunnel type";
600                 goto out;
601         case RTE_FLOW_ITEM_TYPE_VXLAN:
602                 break;
603         }
604
605 out:
606         return !err_msg;
607 }
608
609
610 static int
611 mlx5_flow_tunnel_decap_set(struct rte_eth_dev *dev,
612                     struct rte_flow_tunnel *app_tunnel,
613                     struct rte_flow_action **actions,
614                     uint32_t *num_of_actions,
615                     struct rte_flow_error *error)
616 {
617         int ret;
618         struct mlx5_flow_tunnel *tunnel;
619         const char *err_msg = NULL;
620         bool verdict = mlx5_flow_tunnel_validate(dev, app_tunnel, err_msg);
621
622         if (!verdict)
623                 return rte_flow_error_set(error, EINVAL,
624                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
625                                           err_msg);
626         ret = mlx5_get_flow_tunnel(dev, app_tunnel, &tunnel);
627         if (ret < 0) {
628                 return rte_flow_error_set(error, ret,
629                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
630                                           "failed to initialize pmd tunnel");
631         }
632         *actions = &tunnel->action;
633         *num_of_actions = 1;
634         return 0;
635 }
636
637 static int
638 mlx5_flow_tunnel_match(struct rte_eth_dev *dev,
639                        struct rte_flow_tunnel *app_tunnel,
640                        struct rte_flow_item **items,
641                        uint32_t *num_of_items,
642                        struct rte_flow_error *error)
643 {
644         int ret;
645         struct mlx5_flow_tunnel *tunnel;
646         const char *err_msg = NULL;
647         bool verdict = mlx5_flow_tunnel_validate(dev, app_tunnel, err_msg);
648
649         if (!verdict)
650                 return rte_flow_error_set(error, EINVAL,
651                                           RTE_FLOW_ERROR_TYPE_HANDLE, NULL,
652                                           err_msg);
653         ret = mlx5_get_flow_tunnel(dev, app_tunnel, &tunnel);
654         if (ret < 0) {
655                 return rte_flow_error_set(error, ret,
656                                           RTE_FLOW_ERROR_TYPE_HANDLE, NULL,
657                                           "failed to initialize pmd tunnel");
658         }
659         *items = &tunnel->item;
660         *num_of_items = 1;
661         return 0;
662 }
663
664 static int
665 mlx5_flow_item_release(struct rte_eth_dev *dev,
666                        struct rte_flow_item *pmd_items,
667                        uint32_t num_items, struct rte_flow_error *err)
668 {
669         struct mlx5_flow_tunnel_hub *thub = mlx5_tunnel_hub(dev);
670         struct mlx5_flow_tunnel *tun;
671
672         rte_spinlock_lock(&thub->sl);
673         LIST_FOREACH(tun, &thub->tunnels, chain) {
674                 if (&tun->item == pmd_items) {
675                         LIST_REMOVE(tun, chain);
676                         break;
677                 }
678         }
679         rte_spinlock_unlock(&thub->sl);
680         if (!tun || num_items != 1)
681                 return rte_flow_error_set(err, EINVAL,
682                                           RTE_FLOW_ERROR_TYPE_HANDLE, NULL,
683                                           "invalid argument");
684         if (!__atomic_sub_fetch(&tun->refctn, 1, __ATOMIC_RELAXED))
685                 mlx5_flow_tunnel_free(dev, tun);
686         return 0;
687 }
688
689 static int
690 mlx5_flow_action_release(struct rte_eth_dev *dev,
691                          struct rte_flow_action *pmd_actions,
692                          uint32_t num_actions, struct rte_flow_error *err)
693 {
694         struct mlx5_flow_tunnel_hub *thub = mlx5_tunnel_hub(dev);
695         struct mlx5_flow_tunnel *tun;
696
697         rte_spinlock_lock(&thub->sl);
698         LIST_FOREACH(tun, &thub->tunnels, chain) {
699                 if (&tun->action == pmd_actions) {
700                         LIST_REMOVE(tun, chain);
701                         break;
702                 }
703         }
704         rte_spinlock_unlock(&thub->sl);
705         if (!tun || num_actions != 1)
706                 return rte_flow_error_set(err, EINVAL,
707                                           RTE_FLOW_ERROR_TYPE_HANDLE, NULL,
708                                           "invalid argument");
709         if (!__atomic_sub_fetch(&tun->refctn, 1, __ATOMIC_RELAXED))
710                 mlx5_flow_tunnel_free(dev, tun);
711
712         return 0;
713 }
714
715 static int
716 mlx5_flow_tunnel_get_restore_info(struct rte_eth_dev *dev,
717                                   struct rte_mbuf *m,
718                                   struct rte_flow_restore_info *info,
719                                   struct rte_flow_error *err)
720 {
721         uint64_t ol_flags = m->ol_flags;
722         const struct mlx5_flow_tbl_data_entry *tble;
723         const uint64_t mask = PKT_RX_FDIR | PKT_RX_FDIR_ID;
724
725         if ((ol_flags & mask) != mask)
726                 goto err;
727         tble = tunnel_mark_decode(dev, m->hash.fdir.hi);
728         if (!tble) {
729                 DRV_LOG(DEBUG, "port %u invalid miss tunnel mark %#x",
730                         dev->data->port_id, m->hash.fdir.hi);
731                 goto err;
732         }
733         MLX5_ASSERT(tble->tunnel);
734         memcpy(&info->tunnel, &tble->tunnel->app_tunnel, sizeof(info->tunnel));
735         info->group_id = tble->group_id;
736         info->flags = RTE_FLOW_RESTORE_INFO_TUNNEL |
737                       RTE_FLOW_RESTORE_INFO_GROUP_ID |
738                       RTE_FLOW_RESTORE_INFO_ENCAPSULATED;
739
740         return 0;
741
742 err:
743         return rte_flow_error_set(err, EINVAL,
744                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
745                                   "failed to get restore info");
746 }
747
748 static const struct rte_flow_ops mlx5_flow_ops = {
749         .validate = mlx5_flow_validate,
750         .create = mlx5_flow_create,
751         .destroy = mlx5_flow_destroy,
752         .flush = mlx5_flow_flush,
753         .isolate = mlx5_flow_isolate,
754         .query = mlx5_flow_query,
755         .dev_dump = mlx5_flow_dev_dump,
756         .get_aged_flows = mlx5_flow_get_aged_flows,
757         .shared_action_create = mlx5_shared_action_create,
758         .shared_action_destroy = mlx5_shared_action_destroy,
759         .shared_action_update = mlx5_shared_action_update,
760         .shared_action_query = mlx5_shared_action_query,
761         .tunnel_decap_set = mlx5_flow_tunnel_decap_set,
762         .tunnel_match = mlx5_flow_tunnel_match,
763         .tunnel_action_decap_release = mlx5_flow_action_release,
764         .tunnel_item_release = mlx5_flow_item_release,
765         .get_restore_info = mlx5_flow_tunnel_get_restore_info,
766 };
767
768 /* Tunnel information. */
769 struct mlx5_flow_tunnel_info {
770         uint64_t tunnel; /**< Tunnel bit (see MLX5_FLOW_*). */
771         uint32_t ptype; /**< Tunnel Ptype (see RTE_PTYPE_*). */
772 };
773
774 static struct mlx5_flow_tunnel_info tunnels_info[] = {
775         {
776                 .tunnel = MLX5_FLOW_LAYER_VXLAN,
777                 .ptype = RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L4_UDP,
778         },
779         {
780                 .tunnel = MLX5_FLOW_LAYER_GENEVE,
781                 .ptype = RTE_PTYPE_TUNNEL_GENEVE | RTE_PTYPE_L4_UDP,
782         },
783         {
784                 .tunnel = MLX5_FLOW_LAYER_VXLAN_GPE,
785                 .ptype = RTE_PTYPE_TUNNEL_VXLAN_GPE | RTE_PTYPE_L4_UDP,
786         },
787         {
788                 .tunnel = MLX5_FLOW_LAYER_GRE,
789                 .ptype = RTE_PTYPE_TUNNEL_GRE,
790         },
791         {
792                 .tunnel = MLX5_FLOW_LAYER_MPLS | MLX5_FLOW_LAYER_OUTER_L4_UDP,
793                 .ptype = RTE_PTYPE_TUNNEL_MPLS_IN_UDP | RTE_PTYPE_L4_UDP,
794         },
795         {
796                 .tunnel = MLX5_FLOW_LAYER_MPLS,
797                 .ptype = RTE_PTYPE_TUNNEL_MPLS_IN_GRE,
798         },
799         {
800                 .tunnel = MLX5_FLOW_LAYER_NVGRE,
801                 .ptype = RTE_PTYPE_TUNNEL_NVGRE,
802         },
803         {
804                 .tunnel = MLX5_FLOW_LAYER_IPIP,
805                 .ptype = RTE_PTYPE_TUNNEL_IP,
806         },
807         {
808                 .tunnel = MLX5_FLOW_LAYER_IPV6_ENCAP,
809                 .ptype = RTE_PTYPE_TUNNEL_IP,
810         },
811         {
812                 .tunnel = MLX5_FLOW_LAYER_GTP,
813                 .ptype = RTE_PTYPE_TUNNEL_GTPU,
814         },
815 };
816
817 /* Key of thread specific flow workspace data. */
818 static pthread_key_t key_workspace;
819
820 /* Thread specific flow workspace data once initialization data. */
821 static pthread_once_t key_workspace_init;
822
823
824 /**
825  * Translate tag ID to register.
826  *
827  * @param[in] dev
828  *   Pointer to the Ethernet device structure.
829  * @param[in] feature
830  *   The feature that request the register.
831  * @param[in] id
832  *   The request register ID.
833  * @param[out] error
834  *   Error description in case of any.
835  *
836  * @return
837  *   The request register on success, a negative errno
838  *   value otherwise and rte_errno is set.
839  */
840 int
841 mlx5_flow_get_reg_id(struct rte_eth_dev *dev,
842                      enum mlx5_feature_name feature,
843                      uint32_t id,
844                      struct rte_flow_error *error)
845 {
846         struct mlx5_priv *priv = dev->data->dev_private;
847         struct mlx5_dev_config *config = &priv->config;
848         enum modify_reg start_reg;
849         bool skip_mtr_reg = false;
850
851         switch (feature) {
852         case MLX5_HAIRPIN_RX:
853                 return REG_B;
854         case MLX5_HAIRPIN_TX:
855                 return REG_A;
856         case MLX5_METADATA_RX:
857                 switch (config->dv_xmeta_en) {
858                 case MLX5_XMETA_MODE_LEGACY:
859                         return REG_B;
860                 case MLX5_XMETA_MODE_META16:
861                         return REG_C_0;
862                 case MLX5_XMETA_MODE_META32:
863                         return REG_C_1;
864                 }
865                 break;
866         case MLX5_METADATA_TX:
867                 return REG_A;
868         case MLX5_METADATA_FDB:
869                 switch (config->dv_xmeta_en) {
870                 case MLX5_XMETA_MODE_LEGACY:
871                         return REG_NON;
872                 case MLX5_XMETA_MODE_META16:
873                         return REG_C_0;
874                 case MLX5_XMETA_MODE_META32:
875                         return REG_C_1;
876                 }
877                 break;
878         case MLX5_FLOW_MARK:
879                 switch (config->dv_xmeta_en) {
880                 case MLX5_XMETA_MODE_LEGACY:
881                         return REG_NON;
882                 case MLX5_XMETA_MODE_META16:
883                         return REG_C_1;
884                 case MLX5_XMETA_MODE_META32:
885                         return REG_C_0;
886                 }
887                 break;
888         case MLX5_MTR_SFX:
889                 /*
890                  * If meter color and flow match share one register, flow match
891                  * should use the meter color register for match.
892                  */
893                 if (priv->mtr_reg_share)
894                         return priv->mtr_color_reg;
895                 else
896                         return priv->mtr_color_reg != REG_C_2 ? REG_C_2 :
897                                REG_C_3;
898         case MLX5_MTR_COLOR:
899                 MLX5_ASSERT(priv->mtr_color_reg != REG_NON);
900                 return priv->mtr_color_reg;
901         case MLX5_COPY_MARK:
902                 /*
903                  * Metadata COPY_MARK register using is in meter suffix sub
904                  * flow while with meter. It's safe to share the same register.
905                  */
906                 return priv->mtr_color_reg != REG_C_2 ? REG_C_2 : REG_C_3;
907         case MLX5_APP_TAG:
908                 /*
909                  * If meter is enable, it will engage the register for color
910                  * match and flow match. If meter color match is not using the
911                  * REG_C_2, need to skip the REG_C_x be used by meter color
912                  * match.
913                  * If meter is disable, free to use all available registers.
914                  */
915                 start_reg = priv->mtr_color_reg != REG_C_2 ? REG_C_2 :
916                             (priv->mtr_reg_share ? REG_C_3 : REG_C_4);
917                 skip_mtr_reg = !!(priv->mtr_en && start_reg == REG_C_2);
918                 if (id > (REG_C_7 - start_reg))
919                         return rte_flow_error_set(error, EINVAL,
920                                                   RTE_FLOW_ERROR_TYPE_ITEM,
921                                                   NULL, "invalid tag id");
922                 if (config->flow_mreg_c[id + start_reg - REG_C_0] == REG_NON)
923                         return rte_flow_error_set(error, ENOTSUP,
924                                                   RTE_FLOW_ERROR_TYPE_ITEM,
925                                                   NULL, "unsupported tag id");
926                 /*
927                  * This case means meter is using the REG_C_x great than 2.
928                  * Take care not to conflict with meter color REG_C_x.
929                  * If the available index REG_C_y >= REG_C_x, skip the
930                  * color register.
931                  */
932                 if (skip_mtr_reg && config->flow_mreg_c
933                     [id + start_reg - REG_C_0] >= priv->mtr_color_reg) {
934                         if (id >= (REG_C_7 - start_reg))
935                                 return rte_flow_error_set(error, EINVAL,
936                                                        RTE_FLOW_ERROR_TYPE_ITEM,
937                                                         NULL, "invalid tag id");
938                         if (config->flow_mreg_c
939                             [id + 1 + start_reg - REG_C_0] != REG_NON)
940                                 return config->flow_mreg_c
941                                                [id + 1 + start_reg - REG_C_0];
942                         return rte_flow_error_set(error, ENOTSUP,
943                                                   RTE_FLOW_ERROR_TYPE_ITEM,
944                                                   NULL, "unsupported tag id");
945                 }
946                 return config->flow_mreg_c[id + start_reg - REG_C_0];
947         }
948         MLX5_ASSERT(false);
949         return rte_flow_error_set(error, EINVAL,
950                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
951                                   NULL, "invalid feature name");
952 }
953
954 /**
955  * Check extensive flow metadata register support.
956  *
957  * @param dev
958  *   Pointer to rte_eth_dev structure.
959  *
960  * @return
961  *   True if device supports extensive flow metadata register, otherwise false.
962  */
963 bool
964 mlx5_flow_ext_mreg_supported(struct rte_eth_dev *dev)
965 {
966         struct mlx5_priv *priv = dev->data->dev_private;
967         struct mlx5_dev_config *config = &priv->config;
968
969         /*
970          * Having available reg_c can be regarded inclusively as supporting
971          * extensive flow metadata register, which could mean,
972          * - metadata register copy action by modify header.
973          * - 16 modify header actions is supported.
974          * - reg_c's are preserved across different domain (FDB and NIC) on
975          *   packet loopback by flow lookup miss.
976          */
977         return config->flow_mreg_c[2] != REG_NON;
978 }
979
980 /**
981  * Verify the @p item specifications (spec, last, mask) are compatible with the
982  * NIC capabilities.
983  *
984  * @param[in] item
985  *   Item specification.
986  * @param[in] mask
987  *   @p item->mask or flow default bit-masks.
988  * @param[in] nic_mask
989  *   Bit-masks covering supported fields by the NIC to compare with user mask.
990  * @param[in] size
991  *   Bit-masks size in bytes.
992  * @param[in] range_accepted
993  *   True if range of values is accepted for specific fields, false otherwise.
994  * @param[out] error
995  *   Pointer to error structure.
996  *
997  * @return
998  *   0 on success, a negative errno value otherwise and rte_errno is set.
999  */
1000 int
1001 mlx5_flow_item_acceptable(const struct rte_flow_item *item,
1002                           const uint8_t *mask,
1003                           const uint8_t *nic_mask,
1004                           unsigned int size,
1005                           bool range_accepted,
1006                           struct rte_flow_error *error)
1007 {
1008         unsigned int i;
1009
1010         MLX5_ASSERT(nic_mask);
1011         for (i = 0; i < size; ++i)
1012                 if ((nic_mask[i] | mask[i]) != nic_mask[i])
1013                         return rte_flow_error_set(error, ENOTSUP,
1014                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1015                                                   item,
1016                                                   "mask enables non supported"
1017                                                   " bits");
1018         if (!item->spec && (item->mask || item->last))
1019                 return rte_flow_error_set(error, EINVAL,
1020                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1021                                           "mask/last without a spec is not"
1022                                           " supported");
1023         if (item->spec && item->last && !range_accepted) {
1024                 uint8_t spec[size];
1025                 uint8_t last[size];
1026                 unsigned int i;
1027                 int ret;
1028
1029                 for (i = 0; i < size; ++i) {
1030                         spec[i] = ((const uint8_t *)item->spec)[i] & mask[i];
1031                         last[i] = ((const uint8_t *)item->last)[i] & mask[i];
1032                 }
1033                 ret = memcmp(spec, last, size);
1034                 if (ret != 0)
1035                         return rte_flow_error_set(error, EINVAL,
1036                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1037                                                   item,
1038                                                   "range is not valid");
1039         }
1040         return 0;
1041 }
1042
1043 /**
1044  * Adjust the hash fields according to the @p flow information.
1045  *
1046  * @param[in] dev_flow.
1047  *   Pointer to the mlx5_flow.
1048  * @param[in] tunnel
1049  *   1 when the hash field is for a tunnel item.
1050  * @param[in] layer_types
1051  *   ETH_RSS_* types.
1052  * @param[in] hash_fields
1053  *   Item hash fields.
1054  *
1055  * @return
1056  *   The hash fields that should be used.
1057  */
1058 uint64_t
1059 mlx5_flow_hashfields_adjust(struct mlx5_flow_rss_desc *rss_desc,
1060                             int tunnel __rte_unused, uint64_t layer_types,
1061                             uint64_t hash_fields)
1062 {
1063 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
1064         int rss_request_inner = rss_desc->level >= 2;
1065
1066         /* Check RSS hash level for tunnel. */
1067         if (tunnel && rss_request_inner)
1068                 hash_fields |= IBV_RX_HASH_INNER;
1069         else if (tunnel || rss_request_inner)
1070                 return 0;
1071 #endif
1072         /* Check if requested layer matches RSS hash fields. */
1073         if (!(rss_desc->types & layer_types))
1074                 return 0;
1075         return hash_fields;
1076 }
1077
1078 /**
1079  * Lookup and set the ptype in the data Rx part.  A single Ptype can be used,
1080  * if several tunnel rules are used on this queue, the tunnel ptype will be
1081  * cleared.
1082  *
1083  * @param rxq_ctrl
1084  *   Rx queue to update.
1085  */
1086 static void
1087 flow_rxq_tunnel_ptype_update(struct mlx5_rxq_ctrl *rxq_ctrl)
1088 {
1089         unsigned int i;
1090         uint32_t tunnel_ptype = 0;
1091
1092         /* Look up for the ptype to use. */
1093         for (i = 0; i != MLX5_FLOW_TUNNEL; ++i) {
1094                 if (!rxq_ctrl->flow_tunnels_n[i])
1095                         continue;
1096                 if (!tunnel_ptype) {
1097                         tunnel_ptype = tunnels_info[i].ptype;
1098                 } else {
1099                         tunnel_ptype = 0;
1100                         break;
1101                 }
1102         }
1103         rxq_ctrl->rxq.tunnel = tunnel_ptype;
1104 }
1105
1106 /**
1107  * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) according to the devive
1108  * flow.
1109  *
1110  * @param[in] dev
1111  *   Pointer to the Ethernet device structure.
1112  * @param[in] dev_handle
1113  *   Pointer to device flow handle structure.
1114  */
1115 static void
1116 flow_drv_rxq_flags_set(struct rte_eth_dev *dev,
1117                        struct mlx5_flow_handle *dev_handle)
1118 {
1119         struct mlx5_priv *priv = dev->data->dev_private;
1120         const int mark = dev_handle->mark;
1121         const int tunnel = !!(dev_handle->layers & MLX5_FLOW_LAYER_TUNNEL);
1122         struct mlx5_hrxq *hrxq;
1123         unsigned int i;
1124
1125         if (dev_handle->fate_action != MLX5_FLOW_FATE_QUEUE)
1126                 return;
1127         hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ],
1128                               dev_handle->rix_hrxq);
1129         if (!hrxq)
1130                 return;
1131         for (i = 0; i != hrxq->ind_table->queues_n; ++i) {
1132                 int idx = hrxq->ind_table->queues[i];
1133                 struct mlx5_rxq_ctrl *rxq_ctrl =
1134                         container_of((*priv->rxqs)[idx],
1135                                      struct mlx5_rxq_ctrl, rxq);
1136
1137                 /*
1138                  * To support metadata register copy on Tx loopback,
1139                  * this must be always enabled (metadata may arive
1140                  * from other port - not from local flows only.
1141                  */
1142                 if (priv->config.dv_flow_en &&
1143                     priv->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY &&
1144                     mlx5_flow_ext_mreg_supported(dev)) {
1145                         rxq_ctrl->rxq.mark = 1;
1146                         rxq_ctrl->flow_mark_n = 1;
1147                 } else if (mark) {
1148                         rxq_ctrl->rxq.mark = 1;
1149                         rxq_ctrl->flow_mark_n++;
1150                 }
1151                 if (tunnel) {
1152                         unsigned int j;
1153
1154                         /* Increase the counter matching the flow. */
1155                         for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) {
1156                                 if ((tunnels_info[j].tunnel &
1157                                      dev_handle->layers) ==
1158                                     tunnels_info[j].tunnel) {
1159                                         rxq_ctrl->flow_tunnels_n[j]++;
1160                                         break;
1161                                 }
1162                         }
1163                         flow_rxq_tunnel_ptype_update(rxq_ctrl);
1164                 }
1165         }
1166 }
1167
1168 /**
1169  * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) for a flow
1170  *
1171  * @param[in] dev
1172  *   Pointer to the Ethernet device structure.
1173  * @param[in] flow
1174  *   Pointer to flow structure.
1175  */
1176 static void
1177 flow_rxq_flags_set(struct rte_eth_dev *dev, struct rte_flow *flow)
1178 {
1179         struct mlx5_priv *priv = dev->data->dev_private;
1180         uint32_t handle_idx;
1181         struct mlx5_flow_handle *dev_handle;
1182
1183         SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles,
1184                        handle_idx, dev_handle, next)
1185                 flow_drv_rxq_flags_set(dev, dev_handle);
1186 }
1187
1188 /**
1189  * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the
1190  * device flow if no other flow uses it with the same kind of request.
1191  *
1192  * @param dev
1193  *   Pointer to Ethernet device.
1194  * @param[in] dev_handle
1195  *   Pointer to the device flow handle structure.
1196  */
1197 static void
1198 flow_drv_rxq_flags_trim(struct rte_eth_dev *dev,
1199                         struct mlx5_flow_handle *dev_handle)
1200 {
1201         struct mlx5_priv *priv = dev->data->dev_private;
1202         const int mark = dev_handle->mark;
1203         const int tunnel = !!(dev_handle->layers & MLX5_FLOW_LAYER_TUNNEL);
1204         struct mlx5_hrxq *hrxq;
1205         unsigned int i;
1206
1207         if (dev_handle->fate_action != MLX5_FLOW_FATE_QUEUE)
1208                 return;
1209         hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ],
1210                               dev_handle->rix_hrxq);
1211         if (!hrxq)
1212                 return;
1213         MLX5_ASSERT(dev->data->dev_started);
1214         for (i = 0; i != hrxq->ind_table->queues_n; ++i) {
1215                 int idx = hrxq->ind_table->queues[i];
1216                 struct mlx5_rxq_ctrl *rxq_ctrl =
1217                         container_of((*priv->rxqs)[idx],
1218                                      struct mlx5_rxq_ctrl, rxq);
1219
1220                 if (priv->config.dv_flow_en &&
1221                     priv->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY &&
1222                     mlx5_flow_ext_mreg_supported(dev)) {
1223                         rxq_ctrl->rxq.mark = 1;
1224                         rxq_ctrl->flow_mark_n = 1;
1225                 } else if (mark) {
1226                         rxq_ctrl->flow_mark_n--;
1227                         rxq_ctrl->rxq.mark = !!rxq_ctrl->flow_mark_n;
1228                 }
1229                 if (tunnel) {
1230                         unsigned int j;
1231
1232                         /* Decrease the counter matching the flow. */
1233                         for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) {
1234                                 if ((tunnels_info[j].tunnel &
1235                                      dev_handle->layers) ==
1236                                     tunnels_info[j].tunnel) {
1237                                         rxq_ctrl->flow_tunnels_n[j]--;
1238                                         break;
1239                                 }
1240                         }
1241                         flow_rxq_tunnel_ptype_update(rxq_ctrl);
1242                 }
1243         }
1244 }
1245
1246 /**
1247  * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the
1248  * @p flow if no other flow uses it with the same kind of request.
1249  *
1250  * @param dev
1251  *   Pointer to Ethernet device.
1252  * @param[in] flow
1253  *   Pointer to the flow.
1254  */
1255 static void
1256 flow_rxq_flags_trim(struct rte_eth_dev *dev, struct rte_flow *flow)
1257 {
1258         struct mlx5_priv *priv = dev->data->dev_private;
1259         uint32_t handle_idx;
1260         struct mlx5_flow_handle *dev_handle;
1261
1262         SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles,
1263                        handle_idx, dev_handle, next)
1264                 flow_drv_rxq_flags_trim(dev, dev_handle);
1265 }
1266
1267 /**
1268  * Clear the Mark/Flag and Tunnel ptype information in all Rx queues.
1269  *
1270  * @param dev
1271  *   Pointer to Ethernet device.
1272  */
1273 static void
1274 flow_rxq_flags_clear(struct rte_eth_dev *dev)
1275 {
1276         struct mlx5_priv *priv = dev->data->dev_private;
1277         unsigned int i;
1278
1279         for (i = 0; i != priv->rxqs_n; ++i) {
1280                 struct mlx5_rxq_ctrl *rxq_ctrl;
1281                 unsigned int j;
1282
1283                 if (!(*priv->rxqs)[i])
1284                         continue;
1285                 rxq_ctrl = container_of((*priv->rxqs)[i],
1286                                         struct mlx5_rxq_ctrl, rxq);
1287                 rxq_ctrl->flow_mark_n = 0;
1288                 rxq_ctrl->rxq.mark = 0;
1289                 for (j = 0; j != MLX5_FLOW_TUNNEL; ++j)
1290                         rxq_ctrl->flow_tunnels_n[j] = 0;
1291                 rxq_ctrl->rxq.tunnel = 0;
1292         }
1293 }
1294
1295 /**
1296  * Set the Rx queue dynamic metadata (mask and offset) for a flow
1297  *
1298  * @param[in] dev
1299  *   Pointer to the Ethernet device structure.
1300  */
1301 void
1302 mlx5_flow_rxq_dynf_metadata_set(struct rte_eth_dev *dev)
1303 {
1304         struct mlx5_priv *priv = dev->data->dev_private;
1305         struct mlx5_rxq_data *data;
1306         unsigned int i;
1307
1308         for (i = 0; i != priv->rxqs_n; ++i) {
1309                 if (!(*priv->rxqs)[i])
1310                         continue;
1311                 data = (*priv->rxqs)[i];
1312                 if (!rte_flow_dynf_metadata_avail()) {
1313                         data->dynf_meta = 0;
1314                         data->flow_meta_mask = 0;
1315                         data->flow_meta_offset = -1;
1316                 } else {
1317                         data->dynf_meta = 1;
1318                         data->flow_meta_mask = rte_flow_dynf_metadata_mask;
1319                         data->flow_meta_offset = rte_flow_dynf_metadata_offs;
1320                 }
1321         }
1322 }
1323
1324 /*
1325  * return a pointer to the desired action in the list of actions.
1326  *
1327  * @param[in] actions
1328  *   The list of actions to search the action in.
1329  * @param[in] action
1330  *   The action to find.
1331  *
1332  * @return
1333  *   Pointer to the action in the list, if found. NULL otherwise.
1334  */
1335 const struct rte_flow_action *
1336 mlx5_flow_find_action(const struct rte_flow_action *actions,
1337                       enum rte_flow_action_type action)
1338 {
1339         if (actions == NULL)
1340                 return NULL;
1341         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++)
1342                 if (actions->type == action)
1343                         return actions;
1344         return NULL;
1345 }
1346
1347 /*
1348  * Validate the flag action.
1349  *
1350  * @param[in] action_flags
1351  *   Bit-fields that holds the actions detected until now.
1352  * @param[in] attr
1353  *   Attributes of flow that includes this action.
1354  * @param[out] error
1355  *   Pointer to error structure.
1356  *
1357  * @return
1358  *   0 on success, a negative errno value otherwise and rte_errno is set.
1359  */
1360 int
1361 mlx5_flow_validate_action_flag(uint64_t action_flags,
1362                                const struct rte_flow_attr *attr,
1363                                struct rte_flow_error *error)
1364 {
1365         if (action_flags & MLX5_FLOW_ACTION_MARK)
1366                 return rte_flow_error_set(error, EINVAL,
1367                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1368                                           "can't mark and flag in same flow");
1369         if (action_flags & MLX5_FLOW_ACTION_FLAG)
1370                 return rte_flow_error_set(error, EINVAL,
1371                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1372                                           "can't have 2 flag"
1373                                           " actions in same flow");
1374         if (attr->egress)
1375                 return rte_flow_error_set(error, ENOTSUP,
1376                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1377                                           "flag action not supported for "
1378                                           "egress");
1379         return 0;
1380 }
1381
1382 /*
1383  * Validate the mark action.
1384  *
1385  * @param[in] action
1386  *   Pointer to the queue action.
1387  * @param[in] action_flags
1388  *   Bit-fields that holds the actions detected until now.
1389  * @param[in] attr
1390  *   Attributes of flow that includes this action.
1391  * @param[out] error
1392  *   Pointer to error structure.
1393  *
1394  * @return
1395  *   0 on success, a negative errno value otherwise and rte_errno is set.
1396  */
1397 int
1398 mlx5_flow_validate_action_mark(const struct rte_flow_action *action,
1399                                uint64_t action_flags,
1400                                const struct rte_flow_attr *attr,
1401                                struct rte_flow_error *error)
1402 {
1403         const struct rte_flow_action_mark *mark = action->conf;
1404
1405         if (!mark)
1406                 return rte_flow_error_set(error, EINVAL,
1407                                           RTE_FLOW_ERROR_TYPE_ACTION,
1408                                           action,
1409                                           "configuration cannot be null");
1410         if (mark->id >= MLX5_FLOW_MARK_MAX)
1411                 return rte_flow_error_set(error, EINVAL,
1412                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1413                                           &mark->id,
1414                                           "mark id must in 0 <= id < "
1415                                           RTE_STR(MLX5_FLOW_MARK_MAX));
1416         if (action_flags & MLX5_FLOW_ACTION_FLAG)
1417                 return rte_flow_error_set(error, EINVAL,
1418                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1419                                           "can't flag and mark in same flow");
1420         if (action_flags & MLX5_FLOW_ACTION_MARK)
1421                 return rte_flow_error_set(error, EINVAL,
1422                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1423                                           "can't have 2 mark actions in same"
1424                                           " flow");
1425         if (attr->egress)
1426                 return rte_flow_error_set(error, ENOTSUP,
1427                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1428                                           "mark action not supported for "
1429                                           "egress");
1430         return 0;
1431 }
1432
1433 /*
1434  * Validate the drop action.
1435  *
1436  * @param[in] action_flags
1437  *   Bit-fields that holds the actions detected until now.
1438  * @param[in] attr
1439  *   Attributes of flow that includes this action.
1440  * @param[out] error
1441  *   Pointer to error structure.
1442  *
1443  * @return
1444  *   0 on success, a negative errno value otherwise and rte_errno is set.
1445  */
1446 int
1447 mlx5_flow_validate_action_drop(uint64_t action_flags __rte_unused,
1448                                const struct rte_flow_attr *attr,
1449                                struct rte_flow_error *error)
1450 {
1451         if (attr->egress)
1452                 return rte_flow_error_set(error, ENOTSUP,
1453                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1454                                           "drop action not supported for "
1455                                           "egress");
1456         return 0;
1457 }
1458
1459 /*
1460  * Validate the queue action.
1461  *
1462  * @param[in] action
1463  *   Pointer to the queue action.
1464  * @param[in] action_flags
1465  *   Bit-fields that holds the actions detected until now.
1466  * @param[in] dev
1467  *   Pointer to the Ethernet device structure.
1468  * @param[in] attr
1469  *   Attributes of flow that includes this action.
1470  * @param[out] error
1471  *   Pointer to error structure.
1472  *
1473  * @return
1474  *   0 on success, a negative errno value otherwise and rte_errno is set.
1475  */
1476 int
1477 mlx5_flow_validate_action_queue(const struct rte_flow_action *action,
1478                                 uint64_t action_flags,
1479                                 struct rte_eth_dev *dev,
1480                                 const struct rte_flow_attr *attr,
1481                                 struct rte_flow_error *error)
1482 {
1483         struct mlx5_priv *priv = dev->data->dev_private;
1484         const struct rte_flow_action_queue *queue = action->conf;
1485
1486         if (action_flags & MLX5_FLOW_FATE_ACTIONS)
1487                 return rte_flow_error_set(error, EINVAL,
1488                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1489                                           "can't have 2 fate actions in"
1490                                           " same flow");
1491         if (!priv->rxqs_n)
1492                 return rte_flow_error_set(error, EINVAL,
1493                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1494                                           NULL, "No Rx queues configured");
1495         if (queue->index >= priv->rxqs_n)
1496                 return rte_flow_error_set(error, EINVAL,
1497                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1498                                           &queue->index,
1499                                           "queue index out of range");
1500         if (!(*priv->rxqs)[queue->index])
1501                 return rte_flow_error_set(error, EINVAL,
1502                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1503                                           &queue->index,
1504                                           "queue is not configured");
1505         if (attr->egress)
1506                 return rte_flow_error_set(error, ENOTSUP,
1507                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1508                                           "queue action not supported for "
1509                                           "egress");
1510         return 0;
1511 }
1512
1513 /*
1514  * Validate the rss action.
1515  *
1516  * @param[in] dev
1517  *   Pointer to the Ethernet device structure.
1518  * @param[in] action
1519  *   Pointer to the queue action.
1520  * @param[out] error
1521  *   Pointer to error structure.
1522  *
1523  * @return
1524  *   0 on success, a negative errno value otherwise and rte_errno is set.
1525  */
1526 int
1527 mlx5_validate_action_rss(struct rte_eth_dev *dev,
1528                          const struct rte_flow_action *action,
1529                          struct rte_flow_error *error)
1530 {
1531         struct mlx5_priv *priv = dev->data->dev_private;
1532         const struct rte_flow_action_rss *rss = action->conf;
1533         unsigned int i;
1534
1535         if (rss->func != RTE_ETH_HASH_FUNCTION_DEFAULT &&
1536             rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ)
1537                 return rte_flow_error_set(error, ENOTSUP,
1538                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1539                                           &rss->func,
1540                                           "RSS hash function not supported");
1541 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
1542         if (rss->level > 2)
1543 #else
1544         if (rss->level > 1)
1545 #endif
1546                 return rte_flow_error_set(error, ENOTSUP,
1547                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1548                                           &rss->level,
1549                                           "tunnel RSS is not supported");
1550         /* allow RSS key_len 0 in case of NULL (default) RSS key. */
1551         if (rss->key_len == 0 && rss->key != NULL)
1552                 return rte_flow_error_set(error, ENOTSUP,
1553                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1554                                           &rss->key_len,
1555                                           "RSS hash key length 0");
1556         if (rss->key_len > 0 && rss->key_len < MLX5_RSS_HASH_KEY_LEN)
1557                 return rte_flow_error_set(error, ENOTSUP,
1558                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1559                                           &rss->key_len,
1560                                           "RSS hash key too small");
1561         if (rss->key_len > MLX5_RSS_HASH_KEY_LEN)
1562                 return rte_flow_error_set(error, ENOTSUP,
1563                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1564                                           &rss->key_len,
1565                                           "RSS hash key too large");
1566         if (rss->queue_num > priv->config.ind_table_max_size)
1567                 return rte_flow_error_set(error, ENOTSUP,
1568                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1569                                           &rss->queue_num,
1570                                           "number of queues too large");
1571         if (rss->types & MLX5_RSS_HF_MASK)
1572                 return rte_flow_error_set(error, ENOTSUP,
1573                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1574                                           &rss->types,
1575                                           "some RSS protocols are not"
1576                                           " supported");
1577         if ((rss->types & (ETH_RSS_L3_SRC_ONLY | ETH_RSS_L3_DST_ONLY)) &&
1578             !(rss->types & ETH_RSS_IP))
1579                 return rte_flow_error_set(error, EINVAL,
1580                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
1581                                           "L3 partial RSS requested but L3 RSS"
1582                                           " type not specified");
1583         if ((rss->types & (ETH_RSS_L4_SRC_ONLY | ETH_RSS_L4_DST_ONLY)) &&
1584             !(rss->types & (ETH_RSS_UDP | ETH_RSS_TCP)))
1585                 return rte_flow_error_set(error, EINVAL,
1586                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
1587                                           "L4 partial RSS requested but L4 RSS"
1588                                           " type not specified");
1589         if (!priv->rxqs_n)
1590                 return rte_flow_error_set(error, EINVAL,
1591                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1592                                           NULL, "No Rx queues configured");
1593         if (!rss->queue_num)
1594                 return rte_flow_error_set(error, EINVAL,
1595                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1596                                           NULL, "No queues configured");
1597         for (i = 0; i != rss->queue_num; ++i) {
1598                 if (rss->queue[i] >= priv->rxqs_n)
1599                         return rte_flow_error_set
1600                                 (error, EINVAL,
1601                                  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1602                                  &rss->queue[i], "queue index out of range");
1603                 if (!(*priv->rxqs)[rss->queue[i]])
1604                         return rte_flow_error_set
1605                                 (error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1606                                  &rss->queue[i], "queue is not configured");
1607         }
1608         return 0;
1609 }
1610
1611 /*
1612  * Validate the rss action.
1613  *
1614  * @param[in] action
1615  *   Pointer to the queue action.
1616  * @param[in] action_flags
1617  *   Bit-fields that holds the actions detected until now.
1618  * @param[in] dev
1619  *   Pointer to the Ethernet device structure.
1620  * @param[in] attr
1621  *   Attributes of flow that includes this action.
1622  * @param[in] item_flags
1623  *   Items that were detected.
1624  * @param[out] error
1625  *   Pointer to error structure.
1626  *
1627  * @return
1628  *   0 on success, a negative errno value otherwise and rte_errno is set.
1629  */
1630 int
1631 mlx5_flow_validate_action_rss(const struct rte_flow_action *action,
1632                               uint64_t action_flags,
1633                               struct rte_eth_dev *dev,
1634                               const struct rte_flow_attr *attr,
1635                               uint64_t item_flags,
1636                               struct rte_flow_error *error)
1637 {
1638         const struct rte_flow_action_rss *rss = action->conf;
1639         int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1640         int ret;
1641
1642         if (action_flags & MLX5_FLOW_FATE_ACTIONS)
1643                 return rte_flow_error_set(error, EINVAL,
1644                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1645                                           "can't have 2 fate actions"
1646                                           " in same flow");
1647         ret = mlx5_validate_action_rss(dev, action, error);
1648         if (ret)
1649                 return ret;
1650         if (attr->egress)
1651                 return rte_flow_error_set(error, ENOTSUP,
1652                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1653                                           "rss action not supported for "
1654                                           "egress");
1655         if (rss->level > 1 && !tunnel)
1656                 return rte_flow_error_set(error, EINVAL,
1657                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
1658                                           "inner RSS is not supported for "
1659                                           "non-tunnel flows");
1660         if ((item_flags & MLX5_FLOW_LAYER_ECPRI) &&
1661             !(item_flags & MLX5_FLOW_LAYER_INNER_L4_UDP)) {
1662                 return rte_flow_error_set(error, EINVAL,
1663                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
1664                                           "RSS on eCPRI is not supported now");
1665         }
1666         return 0;
1667 }
1668
1669 /*
1670  * Validate the default miss action.
1671  *
1672  * @param[in] action_flags
1673  *   Bit-fields that holds the actions detected until now.
1674  * @param[out] error
1675  *   Pointer to error structure.
1676  *
1677  * @return
1678  *   0 on success, a negative errno value otherwise and rte_errno is set.
1679  */
1680 int
1681 mlx5_flow_validate_action_default_miss(uint64_t action_flags,
1682                                 const struct rte_flow_attr *attr,
1683                                 struct rte_flow_error *error)
1684 {
1685         if (action_flags & MLX5_FLOW_FATE_ACTIONS)
1686                 return rte_flow_error_set(error, EINVAL,
1687                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1688                                           "can't have 2 fate actions in"
1689                                           " same flow");
1690         if (attr->egress)
1691                 return rte_flow_error_set(error, ENOTSUP,
1692                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1693                                           "default miss action not supported "
1694                                           "for egress");
1695         if (attr->group)
1696                 return rte_flow_error_set(error, ENOTSUP,
1697                                           RTE_FLOW_ERROR_TYPE_ATTR_GROUP, NULL,
1698                                           "only group 0 is supported");
1699         if (attr->transfer)
1700                 return rte_flow_error_set(error, ENOTSUP,
1701                                           RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
1702                                           NULL, "transfer is not supported");
1703         return 0;
1704 }
1705
1706 /*
1707  * Validate the count action.
1708  *
1709  * @param[in] dev
1710  *   Pointer to the Ethernet device structure.
1711  * @param[in] attr
1712  *   Attributes of flow that includes this action.
1713  * @param[out] error
1714  *   Pointer to error structure.
1715  *
1716  * @return
1717  *   0 on success, a negative errno value otherwise and rte_errno is set.
1718  */
1719 int
1720 mlx5_flow_validate_action_count(struct rte_eth_dev *dev __rte_unused,
1721                                 const struct rte_flow_attr *attr,
1722                                 struct rte_flow_error *error)
1723 {
1724         if (attr->egress)
1725                 return rte_flow_error_set(error, ENOTSUP,
1726                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1727                                           "count action not supported for "
1728                                           "egress");
1729         return 0;
1730 }
1731
1732 /**
1733  * Verify the @p attributes will be correctly understood by the NIC and store
1734  * them in the @p flow if everything is correct.
1735  *
1736  * @param[in] dev
1737  *   Pointer to the Ethernet device structure.
1738  * @param[in] attributes
1739  *   Pointer to flow attributes
1740  * @param[out] error
1741  *   Pointer to error structure.
1742  *
1743  * @return
1744  *   0 on success, a negative errno value otherwise and rte_errno is set.
1745  */
1746 int
1747 mlx5_flow_validate_attributes(struct rte_eth_dev *dev,
1748                               const struct rte_flow_attr *attributes,
1749                               struct rte_flow_error *error)
1750 {
1751         struct mlx5_priv *priv = dev->data->dev_private;
1752         uint32_t priority_max = priv->config.flow_prio - 1;
1753
1754         if (attributes->group)
1755                 return rte_flow_error_set(error, ENOTSUP,
1756                                           RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
1757                                           NULL, "groups is not supported");
1758         if (attributes->priority != MLX5_FLOW_PRIO_RSVD &&
1759             attributes->priority >= priority_max)
1760                 return rte_flow_error_set(error, ENOTSUP,
1761                                           RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
1762                                           NULL, "priority out of range");
1763         if (attributes->egress)
1764                 return rte_flow_error_set(error, ENOTSUP,
1765                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1766                                           "egress is not supported");
1767         if (attributes->transfer && !priv->config.dv_esw_en)
1768                 return rte_flow_error_set(error, ENOTSUP,
1769                                           RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
1770                                           NULL, "transfer is not supported");
1771         if (!attributes->ingress)
1772                 return rte_flow_error_set(error, EINVAL,
1773                                           RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
1774                                           NULL,
1775                                           "ingress attribute is mandatory");
1776         return 0;
1777 }
1778
1779 /**
1780  * Validate ICMP6 item.
1781  *
1782  * @param[in] item
1783  *   Item specification.
1784  * @param[in] item_flags
1785  *   Bit-fields that holds the items detected until now.
1786  * @param[in] ext_vlan_sup
1787  *   Whether extended VLAN features are supported or not.
1788  * @param[out] error
1789  *   Pointer to error structure.
1790  *
1791  * @return
1792  *   0 on success, a negative errno value otherwise and rte_errno is set.
1793  */
1794 int
1795 mlx5_flow_validate_item_icmp6(const struct rte_flow_item *item,
1796                                uint64_t item_flags,
1797                                uint8_t target_protocol,
1798                                struct rte_flow_error *error)
1799 {
1800         const struct rte_flow_item_icmp6 *mask = item->mask;
1801         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1802         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
1803                                       MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1804         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
1805                                       MLX5_FLOW_LAYER_OUTER_L4;
1806         int ret;
1807
1808         if (target_protocol != 0xFF && target_protocol != IPPROTO_ICMPV6)
1809                 return rte_flow_error_set(error, EINVAL,
1810                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1811                                           "protocol filtering not compatible"
1812                                           " with ICMP6 layer");
1813         if (!(item_flags & l3m))
1814                 return rte_flow_error_set(error, EINVAL,
1815                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1816                                           "IPv6 is mandatory to filter on"
1817                                           " ICMP6");
1818         if (item_flags & l4m)
1819                 return rte_flow_error_set(error, EINVAL,
1820                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1821                                           "multiple L4 layers not supported");
1822         if (!mask)
1823                 mask = &rte_flow_item_icmp6_mask;
1824         ret = mlx5_flow_item_acceptable
1825                 (item, (const uint8_t *)mask,
1826                  (const uint8_t *)&rte_flow_item_icmp6_mask,
1827                  sizeof(struct rte_flow_item_icmp6),
1828                  MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
1829         if (ret < 0)
1830                 return ret;
1831         return 0;
1832 }
1833
1834 /**
1835  * Validate ICMP item.
1836  *
1837  * @param[in] item
1838  *   Item specification.
1839  * @param[in] item_flags
1840  *   Bit-fields that holds the items detected until now.
1841  * @param[out] error
1842  *   Pointer to error structure.
1843  *
1844  * @return
1845  *   0 on success, a negative errno value otherwise and rte_errno is set.
1846  */
1847 int
1848 mlx5_flow_validate_item_icmp(const struct rte_flow_item *item,
1849                              uint64_t item_flags,
1850                              uint8_t target_protocol,
1851                              struct rte_flow_error *error)
1852 {
1853         const struct rte_flow_item_icmp *mask = item->mask;
1854         const struct rte_flow_item_icmp nic_mask = {
1855                 .hdr.icmp_type = 0xff,
1856                 .hdr.icmp_code = 0xff,
1857                 .hdr.icmp_ident = RTE_BE16(0xffff),
1858                 .hdr.icmp_seq_nb = RTE_BE16(0xffff),
1859         };
1860         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1861         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
1862                                       MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1863         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
1864                                       MLX5_FLOW_LAYER_OUTER_L4;
1865         int ret;
1866
1867         if (target_protocol != 0xFF && target_protocol != IPPROTO_ICMP)
1868                 return rte_flow_error_set(error, EINVAL,
1869                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1870                                           "protocol filtering not compatible"
1871                                           " with ICMP layer");
1872         if (!(item_flags & l3m))
1873                 return rte_flow_error_set(error, EINVAL,
1874                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1875                                           "IPv4 is mandatory to filter"
1876                                           " on ICMP");
1877         if (item_flags & l4m)
1878                 return rte_flow_error_set(error, EINVAL,
1879                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1880                                           "multiple L4 layers not supported");
1881         if (!mask)
1882                 mask = &nic_mask;
1883         ret = mlx5_flow_item_acceptable
1884                 (item, (const uint8_t *)mask,
1885                  (const uint8_t *)&nic_mask,
1886                  sizeof(struct rte_flow_item_icmp),
1887                  MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
1888         if (ret < 0)
1889                 return ret;
1890         return 0;
1891 }
1892
1893 /**
1894  * Validate Ethernet item.
1895  *
1896  * @param[in] item
1897  *   Item specification.
1898  * @param[in] item_flags
1899  *   Bit-fields that holds the items detected until now.
1900  * @param[out] error
1901  *   Pointer to error structure.
1902  *
1903  * @return
1904  *   0 on success, a negative errno value otherwise and rte_errno is set.
1905  */
1906 int
1907 mlx5_flow_validate_item_eth(const struct rte_flow_item *item,
1908                             uint64_t item_flags, bool ext_vlan_sup,
1909                             struct rte_flow_error *error)
1910 {
1911         const struct rte_flow_item_eth *mask = item->mask;
1912         const struct rte_flow_item_eth nic_mask = {
1913                 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1914                 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1915                 .type = RTE_BE16(0xffff),
1916                 .has_vlan = ext_vlan_sup ? 1 : 0,
1917         };
1918         int ret;
1919         int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1920         const uint64_t ethm = tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
1921                                        MLX5_FLOW_LAYER_OUTER_L2;
1922
1923         if (item_flags & ethm)
1924                 return rte_flow_error_set(error, ENOTSUP,
1925                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1926                                           "multiple L2 layers not supported");
1927         if ((!tunnel && (item_flags & MLX5_FLOW_LAYER_OUTER_L3)) ||
1928             (tunnel && (item_flags & MLX5_FLOW_LAYER_INNER_L3)))
1929                 return rte_flow_error_set(error, EINVAL,
1930                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1931                                           "L2 layer should not follow "
1932                                           "L3 layers");
1933         if ((!tunnel && (item_flags & MLX5_FLOW_LAYER_OUTER_VLAN)) ||
1934             (tunnel && (item_flags & MLX5_FLOW_LAYER_INNER_VLAN)))
1935                 return rte_flow_error_set(error, EINVAL,
1936                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1937                                           "L2 layer should not follow VLAN");
1938         if (!mask)
1939                 mask = &rte_flow_item_eth_mask;
1940         ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
1941                                         (const uint8_t *)&nic_mask,
1942                                         sizeof(struct rte_flow_item_eth),
1943                                         MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
1944         return ret;
1945 }
1946
1947 /**
1948  * Validate VLAN item.
1949  *
1950  * @param[in] item
1951  *   Item specification.
1952  * @param[in] item_flags
1953  *   Bit-fields that holds the items detected until now.
1954  * @param[in] dev
1955  *   Ethernet device flow is being created on.
1956  * @param[out] error
1957  *   Pointer to error structure.
1958  *
1959  * @return
1960  *   0 on success, a negative errno value otherwise and rte_errno is set.
1961  */
1962 int
1963 mlx5_flow_validate_item_vlan(const struct rte_flow_item *item,
1964                              uint64_t item_flags,
1965                              struct rte_eth_dev *dev,
1966                              struct rte_flow_error *error)
1967 {
1968         const struct rte_flow_item_vlan *spec = item->spec;
1969         const struct rte_flow_item_vlan *mask = item->mask;
1970         const struct rte_flow_item_vlan nic_mask = {
1971                 .tci = RTE_BE16(UINT16_MAX),
1972                 .inner_type = RTE_BE16(UINT16_MAX),
1973         };
1974         uint16_t vlan_tag = 0;
1975         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1976         int ret;
1977         const uint64_t l34m = tunnel ? (MLX5_FLOW_LAYER_INNER_L3 |
1978                                         MLX5_FLOW_LAYER_INNER_L4) :
1979                                        (MLX5_FLOW_LAYER_OUTER_L3 |
1980                                         MLX5_FLOW_LAYER_OUTER_L4);
1981         const uint64_t vlanm = tunnel ? MLX5_FLOW_LAYER_INNER_VLAN :
1982                                         MLX5_FLOW_LAYER_OUTER_VLAN;
1983
1984         if (item_flags & vlanm)
1985                 return rte_flow_error_set(error, EINVAL,
1986                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1987                                           "multiple VLAN layers not supported");
1988         else if ((item_flags & l34m) != 0)
1989                 return rte_flow_error_set(error, EINVAL,
1990                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1991                                           "VLAN cannot follow L3/L4 layer");
1992         if (!mask)
1993                 mask = &rte_flow_item_vlan_mask;
1994         ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
1995                                         (const uint8_t *)&nic_mask,
1996                                         sizeof(struct rte_flow_item_vlan),
1997                                         MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
1998         if (ret)
1999                 return ret;
2000         if (!tunnel && mask->tci != RTE_BE16(0x0fff)) {
2001                 struct mlx5_priv *priv = dev->data->dev_private;
2002
2003                 if (priv->vmwa_context) {
2004                         /*
2005                          * Non-NULL context means we have a virtual machine
2006                          * and SR-IOV enabled, we have to create VLAN interface
2007                          * to make hypervisor to setup E-Switch vport
2008                          * context correctly. We avoid creating the multiple
2009                          * VLAN interfaces, so we cannot support VLAN tag mask.
2010                          */
2011                         return rte_flow_error_set(error, EINVAL,
2012                                                   RTE_FLOW_ERROR_TYPE_ITEM,
2013                                                   item,
2014                                                   "VLAN tag mask is not"
2015                                                   " supported in virtual"
2016                                                   " environment");
2017                 }
2018         }
2019         if (spec) {
2020                 vlan_tag = spec->tci;
2021                 vlan_tag &= mask->tci;
2022         }
2023         /*
2024          * From verbs perspective an empty VLAN is equivalent
2025          * to a packet without VLAN layer.
2026          */
2027         if (!vlan_tag)
2028                 return rte_flow_error_set(error, EINVAL,
2029                                           RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
2030                                           item->spec,
2031                                           "VLAN cannot be empty");
2032         return 0;
2033 }
2034
2035 /**
2036  * Validate IPV4 item.
2037  *
2038  * @param[in] item
2039  *   Item specification.
2040  * @param[in] item_flags
2041  *   Bit-fields that holds the items detected until now.
2042  * @param[in] last_item
2043  *   Previous validated item in the pattern items.
2044  * @param[in] ether_type
2045  *   Type in the ethernet layer header (including dot1q).
2046  * @param[in] acc_mask
2047  *   Acceptable mask, if NULL default internal default mask
2048  *   will be used to check whether item fields are supported.
2049  * @param[in] range_accepted
2050  *   True if range of values is accepted for specific fields, false otherwise.
2051  * @param[out] error
2052  *   Pointer to error structure.
2053  *
2054  * @return
2055  *   0 on success, a negative errno value otherwise and rte_errno is set.
2056  */
2057 int
2058 mlx5_flow_validate_item_ipv4(const struct rte_flow_item *item,
2059                              uint64_t item_flags,
2060                              uint64_t last_item,
2061                              uint16_t ether_type,
2062                              const struct rte_flow_item_ipv4 *acc_mask,
2063                              bool range_accepted,
2064                              struct rte_flow_error *error)
2065 {
2066         const struct rte_flow_item_ipv4 *mask = item->mask;
2067         const struct rte_flow_item_ipv4 *spec = item->spec;
2068         const struct rte_flow_item_ipv4 nic_mask = {
2069                 .hdr = {
2070                         .src_addr = RTE_BE32(0xffffffff),
2071                         .dst_addr = RTE_BE32(0xffffffff),
2072                         .type_of_service = 0xff,
2073                         .next_proto_id = 0xff,
2074                 },
2075         };
2076         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2077         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
2078                                       MLX5_FLOW_LAYER_OUTER_L3;
2079         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2080                                       MLX5_FLOW_LAYER_OUTER_L4;
2081         int ret;
2082         uint8_t next_proto = 0xFF;
2083         const uint64_t l2_vlan = (MLX5_FLOW_LAYER_L2 |
2084                                   MLX5_FLOW_LAYER_OUTER_VLAN |
2085                                   MLX5_FLOW_LAYER_INNER_VLAN);
2086
2087         if ((last_item & l2_vlan) && ether_type &&
2088             ether_type != RTE_ETHER_TYPE_IPV4)
2089                 return rte_flow_error_set(error, EINVAL,
2090                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2091                                           "IPv4 cannot follow L2/VLAN layer "
2092                                           "which ether type is not IPv4");
2093         if (item_flags & MLX5_FLOW_LAYER_IPIP) {
2094                 if (mask && spec)
2095                         next_proto = mask->hdr.next_proto_id &
2096                                      spec->hdr.next_proto_id;
2097                 if (next_proto == IPPROTO_IPIP || next_proto == IPPROTO_IPV6)
2098                         return rte_flow_error_set(error, EINVAL,
2099                                                   RTE_FLOW_ERROR_TYPE_ITEM,
2100                                                   item,
2101                                                   "multiple tunnel "
2102                                                   "not supported");
2103         }
2104         if (item_flags & MLX5_FLOW_LAYER_IPV6_ENCAP)
2105                 return rte_flow_error_set(error, EINVAL,
2106                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2107                                           "wrong tunnel type - IPv6 specified "
2108                                           "but IPv4 item provided");
2109         if (item_flags & l3m)
2110                 return rte_flow_error_set(error, ENOTSUP,
2111                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2112                                           "multiple L3 layers not supported");
2113         else if (item_flags & l4m)
2114                 return rte_flow_error_set(error, EINVAL,
2115                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2116                                           "L3 cannot follow an L4 layer.");
2117         else if ((item_flags & MLX5_FLOW_LAYER_NVGRE) &&
2118                   !(item_flags & MLX5_FLOW_LAYER_INNER_L2))
2119                 return rte_flow_error_set(error, EINVAL,
2120                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2121                                           "L3 cannot follow an NVGRE layer.");
2122         if (!mask)
2123                 mask = &rte_flow_item_ipv4_mask;
2124         else if (mask->hdr.next_proto_id != 0 &&
2125                  mask->hdr.next_proto_id != 0xff)
2126                 return rte_flow_error_set(error, EINVAL,
2127                                           RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
2128                                           "partial mask is not supported"
2129                                           " for protocol");
2130         ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
2131                                         acc_mask ? (const uint8_t *)acc_mask
2132                                                  : (const uint8_t *)&nic_mask,
2133                                         sizeof(struct rte_flow_item_ipv4),
2134                                         range_accepted, error);
2135         if (ret < 0)
2136                 return ret;
2137         return 0;
2138 }
2139
2140 /**
2141  * Validate IPV6 item.
2142  *
2143  * @param[in] item
2144  *   Item specification.
2145  * @param[in] item_flags
2146  *   Bit-fields that holds the items detected until now.
2147  * @param[in] last_item
2148  *   Previous validated item in the pattern items.
2149  * @param[in] ether_type
2150  *   Type in the ethernet layer header (including dot1q).
2151  * @param[in] acc_mask
2152  *   Acceptable mask, if NULL default internal default mask
2153  *   will be used to check whether item fields are supported.
2154  * @param[out] error
2155  *   Pointer to error structure.
2156  *
2157  * @return
2158  *   0 on success, a negative errno value otherwise and rte_errno is set.
2159  */
2160 int
2161 mlx5_flow_validate_item_ipv6(const struct rte_flow_item *item,
2162                              uint64_t item_flags,
2163                              uint64_t last_item,
2164                              uint16_t ether_type,
2165                              const struct rte_flow_item_ipv6 *acc_mask,
2166                              struct rte_flow_error *error)
2167 {
2168         const struct rte_flow_item_ipv6 *mask = item->mask;
2169         const struct rte_flow_item_ipv6 *spec = item->spec;
2170         const struct rte_flow_item_ipv6 nic_mask = {
2171                 .hdr = {
2172                         .src_addr =
2173                                 "\xff\xff\xff\xff\xff\xff\xff\xff"
2174                                 "\xff\xff\xff\xff\xff\xff\xff\xff",
2175                         .dst_addr =
2176                                 "\xff\xff\xff\xff\xff\xff\xff\xff"
2177                                 "\xff\xff\xff\xff\xff\xff\xff\xff",
2178                         .vtc_flow = RTE_BE32(0xffffffff),
2179                         .proto = 0xff,
2180                 },
2181         };
2182         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2183         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
2184                                       MLX5_FLOW_LAYER_OUTER_L3;
2185         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2186                                       MLX5_FLOW_LAYER_OUTER_L4;
2187         int ret;
2188         uint8_t next_proto = 0xFF;
2189         const uint64_t l2_vlan = (MLX5_FLOW_LAYER_L2 |
2190                                   MLX5_FLOW_LAYER_OUTER_VLAN |
2191                                   MLX5_FLOW_LAYER_INNER_VLAN);
2192
2193         if ((last_item & l2_vlan) && ether_type &&
2194             ether_type != RTE_ETHER_TYPE_IPV6)
2195                 return rte_flow_error_set(error, EINVAL,
2196                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2197                                           "IPv6 cannot follow L2/VLAN layer "
2198                                           "which ether type is not IPv6");
2199         if (mask && mask->hdr.proto == UINT8_MAX && spec)
2200                 next_proto = spec->hdr.proto;
2201         if (item_flags & MLX5_FLOW_LAYER_IPV6_ENCAP) {
2202                 if (next_proto == IPPROTO_IPIP || next_proto == IPPROTO_IPV6)
2203                         return rte_flow_error_set(error, EINVAL,
2204                                                   RTE_FLOW_ERROR_TYPE_ITEM,
2205                                                   item,
2206                                                   "multiple tunnel "
2207                                                   "not supported");
2208         }
2209         if (next_proto == IPPROTO_HOPOPTS  ||
2210             next_proto == IPPROTO_ROUTING  ||
2211             next_proto == IPPROTO_FRAGMENT ||
2212             next_proto == IPPROTO_ESP      ||
2213             next_proto == IPPROTO_AH       ||
2214             next_proto == IPPROTO_DSTOPTS)
2215                 return rte_flow_error_set(error, EINVAL,
2216                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2217                                           "IPv6 proto (next header) should "
2218                                           "not be set as extension header");
2219         if (item_flags & MLX5_FLOW_LAYER_IPIP)
2220                 return rte_flow_error_set(error, EINVAL,
2221                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2222                                           "wrong tunnel type - IPv4 specified "
2223                                           "but IPv6 item provided");
2224         if (item_flags & l3m)
2225                 return rte_flow_error_set(error, ENOTSUP,
2226                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2227                                           "multiple L3 layers not supported");
2228         else if (item_flags & l4m)
2229                 return rte_flow_error_set(error, EINVAL,
2230                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2231                                           "L3 cannot follow an L4 layer.");
2232         else if ((item_flags & MLX5_FLOW_LAYER_NVGRE) &&
2233                   !(item_flags & MLX5_FLOW_LAYER_INNER_L2))
2234                 return rte_flow_error_set(error, EINVAL,
2235                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2236                                           "L3 cannot follow an NVGRE layer.");
2237         if (!mask)
2238                 mask = &rte_flow_item_ipv6_mask;
2239         ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
2240                                         acc_mask ? (const uint8_t *)acc_mask
2241                                                  : (const uint8_t *)&nic_mask,
2242                                         sizeof(struct rte_flow_item_ipv6),
2243                                         MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2244         if (ret < 0)
2245                 return ret;
2246         return 0;
2247 }
2248
2249 /**
2250  * Validate UDP item.
2251  *
2252  * @param[in] item
2253  *   Item specification.
2254  * @param[in] item_flags
2255  *   Bit-fields that holds the items detected until now.
2256  * @param[in] target_protocol
2257  *   The next protocol in the previous item.
2258  * @param[in] flow_mask
2259  *   mlx5 flow-specific (DV, verbs, etc.) supported header fields mask.
2260  * @param[out] error
2261  *   Pointer to error structure.
2262  *
2263  * @return
2264  *   0 on success, a negative errno value otherwise and rte_errno is set.
2265  */
2266 int
2267 mlx5_flow_validate_item_udp(const struct rte_flow_item *item,
2268                             uint64_t item_flags,
2269                             uint8_t target_protocol,
2270                             struct rte_flow_error *error)
2271 {
2272         const struct rte_flow_item_udp *mask = item->mask;
2273         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2274         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
2275                                       MLX5_FLOW_LAYER_OUTER_L3;
2276         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2277                                       MLX5_FLOW_LAYER_OUTER_L4;
2278         int ret;
2279
2280         if (target_protocol != 0xff && target_protocol != IPPROTO_UDP)
2281                 return rte_flow_error_set(error, EINVAL,
2282                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2283                                           "protocol filtering not compatible"
2284                                           " with UDP layer");
2285         if (!(item_flags & l3m))
2286                 return rte_flow_error_set(error, EINVAL,
2287                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2288                                           "L3 is mandatory to filter on L4");
2289         if (item_flags & l4m)
2290                 return rte_flow_error_set(error, EINVAL,
2291                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2292                                           "multiple L4 layers not supported");
2293         if (!mask)
2294                 mask = &rte_flow_item_udp_mask;
2295         ret = mlx5_flow_item_acceptable
2296                 (item, (const uint8_t *)mask,
2297                  (const uint8_t *)&rte_flow_item_udp_mask,
2298                  sizeof(struct rte_flow_item_udp), MLX5_ITEM_RANGE_NOT_ACCEPTED,
2299                  error);
2300         if (ret < 0)
2301                 return ret;
2302         return 0;
2303 }
2304
2305 /**
2306  * Validate TCP item.
2307  *
2308  * @param[in] item
2309  *   Item specification.
2310  * @param[in] item_flags
2311  *   Bit-fields that holds the items detected until now.
2312  * @param[in] target_protocol
2313  *   The next protocol in the previous item.
2314  * @param[out] error
2315  *   Pointer to error structure.
2316  *
2317  * @return
2318  *   0 on success, a negative errno value otherwise and rte_errno is set.
2319  */
2320 int
2321 mlx5_flow_validate_item_tcp(const struct rte_flow_item *item,
2322                             uint64_t item_flags,
2323                             uint8_t target_protocol,
2324                             const struct rte_flow_item_tcp *flow_mask,
2325                             struct rte_flow_error *error)
2326 {
2327         const struct rte_flow_item_tcp *mask = item->mask;
2328         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2329         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
2330                                       MLX5_FLOW_LAYER_OUTER_L3;
2331         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2332                                       MLX5_FLOW_LAYER_OUTER_L4;
2333         int ret;
2334
2335         MLX5_ASSERT(flow_mask);
2336         if (target_protocol != 0xff && target_protocol != IPPROTO_TCP)
2337                 return rte_flow_error_set(error, EINVAL,
2338                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2339                                           "protocol filtering not compatible"
2340                                           " with TCP layer");
2341         if (!(item_flags & l3m))
2342                 return rte_flow_error_set(error, EINVAL,
2343                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2344                                           "L3 is mandatory to filter on L4");
2345         if (item_flags & l4m)
2346                 return rte_flow_error_set(error, EINVAL,
2347                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2348                                           "multiple L4 layers not supported");
2349         if (!mask)
2350                 mask = &rte_flow_item_tcp_mask;
2351         ret = mlx5_flow_item_acceptable
2352                 (item, (const uint8_t *)mask,
2353                  (const uint8_t *)flow_mask,
2354                  sizeof(struct rte_flow_item_tcp), MLX5_ITEM_RANGE_NOT_ACCEPTED,
2355                  error);
2356         if (ret < 0)
2357                 return ret;
2358         return 0;
2359 }
2360
2361 /**
2362  * Validate VXLAN item.
2363  *
2364  * @param[in] item
2365  *   Item specification.
2366  * @param[in] item_flags
2367  *   Bit-fields that holds the items detected until now.
2368  * @param[in] target_protocol
2369  *   The next protocol in the previous item.
2370  * @param[out] error
2371  *   Pointer to error structure.
2372  *
2373  * @return
2374  *   0 on success, a negative errno value otherwise and rte_errno is set.
2375  */
2376 int
2377 mlx5_flow_validate_item_vxlan(const struct rte_flow_item *item,
2378                               uint64_t item_flags,
2379                               struct rte_flow_error *error)
2380 {
2381         const struct rte_flow_item_vxlan *spec = item->spec;
2382         const struct rte_flow_item_vxlan *mask = item->mask;
2383         int ret;
2384         union vni {
2385                 uint32_t vlan_id;
2386                 uint8_t vni[4];
2387         } id = { .vlan_id = 0, };
2388
2389
2390         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2391                 return rte_flow_error_set(error, ENOTSUP,
2392                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2393                                           "multiple tunnel layers not"
2394                                           " supported");
2395         /*
2396          * Verify only UDPv4 is present as defined in
2397          * https://tools.ietf.org/html/rfc7348
2398          */
2399         if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
2400                 return rte_flow_error_set(error, EINVAL,
2401                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2402                                           "no outer UDP layer found");
2403         if (!mask)
2404                 mask = &rte_flow_item_vxlan_mask;
2405         ret = mlx5_flow_item_acceptable
2406                 (item, (const uint8_t *)mask,
2407                  (const uint8_t *)&rte_flow_item_vxlan_mask,
2408                  sizeof(struct rte_flow_item_vxlan),
2409                  MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2410         if (ret < 0)
2411                 return ret;
2412         if (spec) {
2413                 memcpy(&id.vni[1], spec->vni, 3);
2414                 memcpy(&id.vni[1], mask->vni, 3);
2415         }
2416         if (!(item_flags & MLX5_FLOW_LAYER_OUTER))
2417                 return rte_flow_error_set(error, ENOTSUP,
2418                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2419                                           "VXLAN tunnel must be fully defined");
2420         return 0;
2421 }
2422
2423 /**
2424  * Validate VXLAN_GPE item.
2425  *
2426  * @param[in] item
2427  *   Item specification.
2428  * @param[in] item_flags
2429  *   Bit-fields that holds the items detected until now.
2430  * @param[in] priv
2431  *   Pointer to the private data structure.
2432  * @param[in] target_protocol
2433  *   The next protocol in the previous item.
2434  * @param[out] error
2435  *   Pointer to error structure.
2436  *
2437  * @return
2438  *   0 on success, a negative errno value otherwise and rte_errno is set.
2439  */
2440 int
2441 mlx5_flow_validate_item_vxlan_gpe(const struct rte_flow_item *item,
2442                                   uint64_t item_flags,
2443                                   struct rte_eth_dev *dev,
2444                                   struct rte_flow_error *error)
2445 {
2446         struct mlx5_priv *priv = dev->data->dev_private;
2447         const struct rte_flow_item_vxlan_gpe *spec = item->spec;
2448         const struct rte_flow_item_vxlan_gpe *mask = item->mask;
2449         int ret;
2450         union vni {
2451                 uint32_t vlan_id;
2452                 uint8_t vni[4];
2453         } id = { .vlan_id = 0, };
2454
2455         if (!priv->config.l3_vxlan_en)
2456                 return rte_flow_error_set(error, ENOTSUP,
2457                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2458                                           "L3 VXLAN is not enabled by device"
2459                                           " parameter and/or not configured in"
2460                                           " firmware");
2461         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2462                 return rte_flow_error_set(error, ENOTSUP,
2463                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2464                                           "multiple tunnel layers not"
2465                                           " supported");
2466         /*
2467          * Verify only UDPv4 is present as defined in
2468          * https://tools.ietf.org/html/rfc7348
2469          */
2470         if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
2471                 return rte_flow_error_set(error, EINVAL,
2472                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2473                                           "no outer UDP layer found");
2474         if (!mask)
2475                 mask = &rte_flow_item_vxlan_gpe_mask;
2476         ret = mlx5_flow_item_acceptable
2477                 (item, (const uint8_t *)mask,
2478                  (const uint8_t *)&rte_flow_item_vxlan_gpe_mask,
2479                  sizeof(struct rte_flow_item_vxlan_gpe),
2480                  MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2481         if (ret < 0)
2482                 return ret;
2483         if (spec) {
2484                 if (spec->protocol)
2485                         return rte_flow_error_set(error, ENOTSUP,
2486                                                   RTE_FLOW_ERROR_TYPE_ITEM,
2487                                                   item,
2488                                                   "VxLAN-GPE protocol"
2489                                                   " not supported");
2490                 memcpy(&id.vni[1], spec->vni, 3);
2491                 memcpy(&id.vni[1], mask->vni, 3);
2492         }
2493         if (!(item_flags & MLX5_FLOW_LAYER_OUTER))
2494                 return rte_flow_error_set(error, ENOTSUP,
2495                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2496                                           "VXLAN-GPE tunnel must be fully"
2497                                           " defined");
2498         return 0;
2499 }
2500 /**
2501  * Validate GRE Key item.
2502  *
2503  * @param[in] item
2504  *   Item specification.
2505  * @param[in] item_flags
2506  *   Bit flags to mark detected items.
2507  * @param[in] gre_item
2508  *   Pointer to gre_item
2509  * @param[out] error
2510  *   Pointer to error structure.
2511  *
2512  * @return
2513  *   0 on success, a negative errno value otherwise and rte_errno is set.
2514  */
2515 int
2516 mlx5_flow_validate_item_gre_key(const struct rte_flow_item *item,
2517                                 uint64_t item_flags,
2518                                 const struct rte_flow_item *gre_item,
2519                                 struct rte_flow_error *error)
2520 {
2521         const rte_be32_t *mask = item->mask;
2522         int ret = 0;
2523         rte_be32_t gre_key_default_mask = RTE_BE32(UINT32_MAX);
2524         const struct rte_flow_item_gre *gre_spec;
2525         const struct rte_flow_item_gre *gre_mask;
2526
2527         if (item_flags & MLX5_FLOW_LAYER_GRE_KEY)
2528                 return rte_flow_error_set(error, ENOTSUP,
2529                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2530                                           "Multiple GRE key not support");
2531         if (!(item_flags & MLX5_FLOW_LAYER_GRE))
2532                 return rte_flow_error_set(error, ENOTSUP,
2533                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2534                                           "No preceding GRE header");
2535         if (item_flags & MLX5_FLOW_LAYER_INNER)
2536                 return rte_flow_error_set(error, ENOTSUP,
2537                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2538                                           "GRE key following a wrong item");
2539         gre_mask = gre_item->mask;
2540         if (!gre_mask)
2541                 gre_mask = &rte_flow_item_gre_mask;
2542         gre_spec = gre_item->spec;
2543         if (gre_spec && (gre_mask->c_rsvd0_ver & RTE_BE16(0x2000)) &&
2544                          !(gre_spec->c_rsvd0_ver & RTE_BE16(0x2000)))
2545                 return rte_flow_error_set(error, EINVAL,
2546                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2547                                           "Key bit must be on");
2548
2549         if (!mask)
2550                 mask = &gre_key_default_mask;
2551         ret = mlx5_flow_item_acceptable
2552                 (item, (const uint8_t *)mask,
2553                  (const uint8_t *)&gre_key_default_mask,
2554                  sizeof(rte_be32_t), MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2555         return ret;
2556 }
2557
2558 /**
2559  * Validate GRE item.
2560  *
2561  * @param[in] item
2562  *   Item specification.
2563  * @param[in] item_flags
2564  *   Bit flags to mark detected items.
2565  * @param[in] target_protocol
2566  *   The next protocol in the previous item.
2567  * @param[out] error
2568  *   Pointer to error structure.
2569  *
2570  * @return
2571  *   0 on success, a negative errno value otherwise and rte_errno is set.
2572  */
2573 int
2574 mlx5_flow_validate_item_gre(const struct rte_flow_item *item,
2575                             uint64_t item_flags,
2576                             uint8_t target_protocol,
2577                             struct rte_flow_error *error)
2578 {
2579         const struct rte_flow_item_gre *spec __rte_unused = item->spec;
2580         const struct rte_flow_item_gre *mask = item->mask;
2581         int ret;
2582         const struct rte_flow_item_gre nic_mask = {
2583                 .c_rsvd0_ver = RTE_BE16(0xB000),
2584                 .protocol = RTE_BE16(UINT16_MAX),
2585         };
2586
2587         if (target_protocol != 0xff && target_protocol != IPPROTO_GRE)
2588                 return rte_flow_error_set(error, EINVAL,
2589                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2590                                           "protocol filtering not compatible"
2591                                           " with this GRE layer");
2592         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2593                 return rte_flow_error_set(error, ENOTSUP,
2594                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2595                                           "multiple tunnel layers not"
2596                                           " supported");
2597         if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3))
2598                 return rte_flow_error_set(error, ENOTSUP,
2599                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2600                                           "L3 Layer is missing");
2601         if (!mask)
2602                 mask = &rte_flow_item_gre_mask;
2603         ret = mlx5_flow_item_acceptable
2604                 (item, (const uint8_t *)mask,
2605                  (const uint8_t *)&nic_mask,
2606                  sizeof(struct rte_flow_item_gre), MLX5_ITEM_RANGE_NOT_ACCEPTED,
2607                  error);
2608         if (ret < 0)
2609                 return ret;
2610 #ifndef HAVE_MLX5DV_DR
2611 #ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT
2612         if (spec && (spec->protocol & mask->protocol))
2613                 return rte_flow_error_set(error, ENOTSUP,
2614                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2615                                           "without MPLS support the"
2616                                           " specification cannot be used for"
2617                                           " filtering");
2618 #endif
2619 #endif
2620         return 0;
2621 }
2622
2623 /**
2624  * Validate Geneve item.
2625  *
2626  * @param[in] item
2627  *   Item specification.
2628  * @param[in] itemFlags
2629  *   Bit-fields that holds the items detected until now.
2630  * @param[in] enPriv
2631  *   Pointer to the private data structure.
2632  * @param[out] error
2633  *   Pointer to error structure.
2634  *
2635  * @return
2636  *   0 on success, a negative errno value otherwise and rte_errno is set.
2637  */
2638
2639 int
2640 mlx5_flow_validate_item_geneve(const struct rte_flow_item *item,
2641                                uint64_t item_flags,
2642                                struct rte_eth_dev *dev,
2643                                struct rte_flow_error *error)
2644 {
2645         struct mlx5_priv *priv = dev->data->dev_private;
2646         const struct rte_flow_item_geneve *spec = item->spec;
2647         const struct rte_flow_item_geneve *mask = item->mask;
2648         int ret;
2649         uint16_t gbhdr;
2650         uint8_t opt_len = priv->config.hca_attr.geneve_max_opt_len ?
2651                           MLX5_GENEVE_OPT_LEN_1 : MLX5_GENEVE_OPT_LEN_0;
2652         const struct rte_flow_item_geneve nic_mask = {
2653                 .ver_opt_len_o_c_rsvd0 = RTE_BE16(0x3f80),
2654                 .vni = "\xff\xff\xff",
2655                 .protocol = RTE_BE16(UINT16_MAX),
2656         };
2657
2658         if (!priv->config.hca_attr.tunnel_stateless_geneve_rx)
2659                 return rte_flow_error_set(error, ENOTSUP,
2660                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2661                                           "L3 Geneve is not enabled by device"
2662                                           " parameter and/or not configured in"
2663                                           " firmware");
2664         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2665                 return rte_flow_error_set(error, ENOTSUP,
2666                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2667                                           "multiple tunnel layers not"
2668                                           " supported");
2669         /*
2670          * Verify only UDPv4 is present as defined in
2671          * https://tools.ietf.org/html/rfc7348
2672          */
2673         if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
2674                 return rte_flow_error_set(error, EINVAL,
2675                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2676                                           "no outer UDP layer found");
2677         if (!mask)
2678                 mask = &rte_flow_item_geneve_mask;
2679         ret = mlx5_flow_item_acceptable
2680                                   (item, (const uint8_t *)mask,
2681                                    (const uint8_t *)&nic_mask,
2682                                    sizeof(struct rte_flow_item_geneve),
2683                                    MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2684         if (ret)
2685                 return ret;
2686         if (spec) {
2687                 gbhdr = rte_be_to_cpu_16(spec->ver_opt_len_o_c_rsvd0);
2688                 if (MLX5_GENEVE_VER_VAL(gbhdr) ||
2689                      MLX5_GENEVE_CRITO_VAL(gbhdr) ||
2690                      MLX5_GENEVE_RSVD_VAL(gbhdr) || spec->rsvd1)
2691                         return rte_flow_error_set(error, ENOTSUP,
2692                                                   RTE_FLOW_ERROR_TYPE_ITEM,
2693                                                   item,
2694                                                   "Geneve protocol unsupported"
2695                                                   " fields are being used");
2696                 if (MLX5_GENEVE_OPTLEN_VAL(gbhdr) > opt_len)
2697                         return rte_flow_error_set
2698                                         (error, ENOTSUP,
2699                                          RTE_FLOW_ERROR_TYPE_ITEM,
2700                                          item,
2701                                          "Unsupported Geneve options length");
2702         }
2703         if (!(item_flags & MLX5_FLOW_LAYER_OUTER))
2704                 return rte_flow_error_set
2705                                     (error, ENOTSUP,
2706                                      RTE_FLOW_ERROR_TYPE_ITEM, item,
2707                                      "Geneve tunnel must be fully defined");
2708         return 0;
2709 }
2710
2711 /**
2712  * Validate MPLS item.
2713  *
2714  * @param[in] dev
2715  *   Pointer to the rte_eth_dev structure.
2716  * @param[in] item
2717  *   Item specification.
2718  * @param[in] item_flags
2719  *   Bit-fields that holds the items detected until now.
2720  * @param[in] prev_layer
2721  *   The protocol layer indicated in previous item.
2722  * @param[out] error
2723  *   Pointer to error structure.
2724  *
2725  * @return
2726  *   0 on success, a negative errno value otherwise and rte_errno is set.
2727  */
2728 int
2729 mlx5_flow_validate_item_mpls(struct rte_eth_dev *dev __rte_unused,
2730                              const struct rte_flow_item *item __rte_unused,
2731                              uint64_t item_flags __rte_unused,
2732                              uint64_t prev_layer __rte_unused,
2733                              struct rte_flow_error *error)
2734 {
2735 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
2736         const struct rte_flow_item_mpls *mask = item->mask;
2737         struct mlx5_priv *priv = dev->data->dev_private;
2738         int ret;
2739
2740         if (!priv->config.mpls_en)
2741                 return rte_flow_error_set(error, ENOTSUP,
2742                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2743                                           "MPLS not supported or"
2744                                           " disabled in firmware"
2745                                           " configuration.");
2746         /* MPLS over IP, UDP, GRE is allowed */
2747         if (!(prev_layer & (MLX5_FLOW_LAYER_OUTER_L3 |
2748                             MLX5_FLOW_LAYER_OUTER_L4_UDP |
2749                             MLX5_FLOW_LAYER_GRE |
2750                             MLX5_FLOW_LAYER_GRE_KEY)))
2751                 return rte_flow_error_set(error, EINVAL,
2752                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2753                                           "protocol filtering not compatible"
2754                                           " with MPLS layer");
2755         /* Multi-tunnel isn't allowed but MPLS over GRE is an exception. */
2756         if ((item_flags & MLX5_FLOW_LAYER_TUNNEL) &&
2757             !(item_flags & MLX5_FLOW_LAYER_GRE))
2758                 return rte_flow_error_set(error, ENOTSUP,
2759                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2760                                           "multiple tunnel layers not"
2761                                           " supported");
2762         if (!mask)
2763                 mask = &rte_flow_item_mpls_mask;
2764         ret = mlx5_flow_item_acceptable
2765                 (item, (const uint8_t *)mask,
2766                  (const uint8_t *)&rte_flow_item_mpls_mask,
2767                  sizeof(struct rte_flow_item_mpls),
2768                  MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2769         if (ret < 0)
2770                 return ret;
2771         return 0;
2772 #else
2773         return rte_flow_error_set(error, ENOTSUP,
2774                                   RTE_FLOW_ERROR_TYPE_ITEM, item,
2775                                   "MPLS is not supported by Verbs, please"
2776                                   " update.");
2777 #endif
2778 }
2779
2780 /**
2781  * Validate NVGRE item.
2782  *
2783  * @param[in] item
2784  *   Item specification.
2785  * @param[in] item_flags
2786  *   Bit flags to mark detected items.
2787  * @param[in] target_protocol
2788  *   The next protocol in the previous item.
2789  * @param[out] error
2790  *   Pointer to error structure.
2791  *
2792  * @return
2793  *   0 on success, a negative errno value otherwise and rte_errno is set.
2794  */
2795 int
2796 mlx5_flow_validate_item_nvgre(const struct rte_flow_item *item,
2797                               uint64_t item_flags,
2798                               uint8_t target_protocol,
2799                               struct rte_flow_error *error)
2800 {
2801         const struct rte_flow_item_nvgre *mask = item->mask;
2802         int ret;
2803
2804         if (target_protocol != 0xff && target_protocol != IPPROTO_GRE)
2805                 return rte_flow_error_set(error, EINVAL,
2806                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2807                                           "protocol filtering not compatible"
2808                                           " with this GRE layer");
2809         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2810                 return rte_flow_error_set(error, ENOTSUP,
2811                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2812                                           "multiple tunnel layers not"
2813                                           " supported");
2814         if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3))
2815                 return rte_flow_error_set(error, ENOTSUP,
2816                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2817                                           "L3 Layer is missing");
2818         if (!mask)
2819                 mask = &rte_flow_item_nvgre_mask;
2820         ret = mlx5_flow_item_acceptable
2821                 (item, (const uint8_t *)mask,
2822                  (const uint8_t *)&rte_flow_item_nvgre_mask,
2823                  sizeof(struct rte_flow_item_nvgre),
2824                  MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2825         if (ret < 0)
2826                 return ret;
2827         return 0;
2828 }
2829
2830 /**
2831  * Validate eCPRI item.
2832  *
2833  * @param[in] item
2834  *   Item specification.
2835  * @param[in] item_flags
2836  *   Bit-fields that holds the items detected until now.
2837  * @param[in] last_item
2838  *   Previous validated item in the pattern items.
2839  * @param[in] ether_type
2840  *   Type in the ethernet layer header (including dot1q).
2841  * @param[in] acc_mask
2842  *   Acceptable mask, if NULL default internal default mask
2843  *   will be used to check whether item fields are supported.
2844  * @param[out] error
2845  *   Pointer to error structure.
2846  *
2847  * @return
2848  *   0 on success, a negative errno value otherwise and rte_errno is set.
2849  */
2850 int
2851 mlx5_flow_validate_item_ecpri(const struct rte_flow_item *item,
2852                               uint64_t item_flags,
2853                               uint64_t last_item,
2854                               uint16_t ether_type,
2855                               const struct rte_flow_item_ecpri *acc_mask,
2856                               struct rte_flow_error *error)
2857 {
2858         const struct rte_flow_item_ecpri *mask = item->mask;
2859         const struct rte_flow_item_ecpri nic_mask = {
2860                 .hdr = {
2861                         .common = {
2862                                 .u32 =
2863                                 RTE_BE32(((const struct rte_ecpri_common_hdr) {
2864                                         .type = 0xFF,
2865                                         }).u32),
2866                         },
2867                         .dummy[0] = 0xFFFFFFFF,
2868                 },
2869         };
2870         const uint64_t outer_l2_vlan = (MLX5_FLOW_LAYER_OUTER_L2 |
2871                                         MLX5_FLOW_LAYER_OUTER_VLAN);
2872         struct rte_flow_item_ecpri mask_lo;
2873
2874         if (!(last_item & outer_l2_vlan) &&
2875             last_item != MLX5_FLOW_LAYER_OUTER_L4_UDP)
2876                 return rte_flow_error_set(error, EINVAL,
2877                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2878                                           "eCPRI can only follow L2/VLAN layer or UDP layer");
2879         if ((last_item & outer_l2_vlan) && ether_type &&
2880             ether_type != RTE_ETHER_TYPE_ECPRI)
2881                 return rte_flow_error_set(error, EINVAL,
2882                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2883                                           "eCPRI cannot follow L2/VLAN layer which ether type is not 0xAEFE");
2884         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2885                 return rte_flow_error_set(error, EINVAL,
2886                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2887                                           "eCPRI with tunnel is not supported right now");
2888         if (item_flags & MLX5_FLOW_LAYER_OUTER_L3)
2889                 return rte_flow_error_set(error, ENOTSUP,
2890                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2891                                           "multiple L3 layers not supported");
2892         else if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_TCP)
2893                 return rte_flow_error_set(error, EINVAL,
2894                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2895                                           "eCPRI cannot coexist with a TCP layer");
2896         /* In specification, eCPRI could be over UDP layer. */
2897         else if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)
2898                 return rte_flow_error_set(error, EINVAL,
2899                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2900                                           "eCPRI over UDP layer is not yet supported right now");
2901         /* Mask for type field in common header could be zero. */
2902         if (!mask)
2903                 mask = &rte_flow_item_ecpri_mask;
2904         mask_lo.hdr.common.u32 = rte_be_to_cpu_32(mask->hdr.common.u32);
2905         /* Input mask is in big-endian format. */
2906         if (mask_lo.hdr.common.type != 0 && mask_lo.hdr.common.type != 0xff)
2907                 return rte_flow_error_set(error, EINVAL,
2908                                           RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
2909                                           "partial mask is not supported for protocol");
2910         else if (mask_lo.hdr.common.type == 0 && mask->hdr.dummy[0] != 0)
2911                 return rte_flow_error_set(error, EINVAL,
2912                                           RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
2913                                           "message header mask must be after a type mask");
2914         return mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
2915                                          acc_mask ? (const uint8_t *)acc_mask
2916                                                   : (const uint8_t *)&nic_mask,
2917                                          sizeof(struct rte_flow_item_ecpri),
2918                                          MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2919 }
2920
2921 /**
2922  * Release resource related QUEUE/RSS action split.
2923  *
2924  * @param dev
2925  *   Pointer to Ethernet device.
2926  * @param flow
2927  *   Flow to release id's from.
2928  */
2929 static void
2930 flow_mreg_split_qrss_release(struct rte_eth_dev *dev,
2931                              struct rte_flow *flow)
2932 {
2933         struct mlx5_priv *priv = dev->data->dev_private;
2934         uint32_t handle_idx;
2935         struct mlx5_flow_handle *dev_handle;
2936
2937         SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles,
2938                        handle_idx, dev_handle, next)
2939                 if (dev_handle->split_flow_id)
2940                         mlx5_ipool_free(priv->sh->ipool
2941                                         [MLX5_IPOOL_RSS_EXPANTION_FLOW_ID],
2942                                         dev_handle->split_flow_id);
2943 }
2944
2945 static int
2946 flow_null_validate(struct rte_eth_dev *dev __rte_unused,
2947                    const struct rte_flow_attr *attr __rte_unused,
2948                    const struct rte_flow_item items[] __rte_unused,
2949                    const struct rte_flow_action actions[] __rte_unused,
2950                    bool external __rte_unused,
2951                    int hairpin __rte_unused,
2952                    struct rte_flow_error *error)
2953 {
2954         return rte_flow_error_set(error, ENOTSUP,
2955                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
2956 }
2957
2958 static struct mlx5_flow *
2959 flow_null_prepare(struct rte_eth_dev *dev __rte_unused,
2960                   const struct rte_flow_attr *attr __rte_unused,
2961                   const struct rte_flow_item items[] __rte_unused,
2962                   const struct rte_flow_action actions[] __rte_unused,
2963                   struct rte_flow_error *error)
2964 {
2965         rte_flow_error_set(error, ENOTSUP,
2966                            RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
2967         return NULL;
2968 }
2969
2970 static int
2971 flow_null_translate(struct rte_eth_dev *dev __rte_unused,
2972                     struct mlx5_flow *dev_flow __rte_unused,
2973                     const struct rte_flow_attr *attr __rte_unused,
2974                     const struct rte_flow_item items[] __rte_unused,
2975                     const struct rte_flow_action actions[] __rte_unused,
2976                     struct rte_flow_error *error)
2977 {
2978         return rte_flow_error_set(error, ENOTSUP,
2979                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
2980 }
2981
2982 static int
2983 flow_null_apply(struct rte_eth_dev *dev __rte_unused,
2984                 struct rte_flow *flow __rte_unused,
2985                 struct rte_flow_error *error)
2986 {
2987         return rte_flow_error_set(error, ENOTSUP,
2988                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
2989 }
2990
2991 static void
2992 flow_null_remove(struct rte_eth_dev *dev __rte_unused,
2993                  struct rte_flow *flow __rte_unused)
2994 {
2995 }
2996
2997 static void
2998 flow_null_destroy(struct rte_eth_dev *dev __rte_unused,
2999                   struct rte_flow *flow __rte_unused)
3000 {
3001 }
3002
3003 static int
3004 flow_null_query(struct rte_eth_dev *dev __rte_unused,
3005                 struct rte_flow *flow __rte_unused,
3006                 const struct rte_flow_action *actions __rte_unused,
3007                 void *data __rte_unused,
3008                 struct rte_flow_error *error)
3009 {
3010         return rte_flow_error_set(error, ENOTSUP,
3011                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
3012 }
3013
3014 static int
3015 flow_null_sync_domain(struct rte_eth_dev *dev __rte_unused,
3016                       uint32_t domains __rte_unused,
3017                       uint32_t flags __rte_unused)
3018 {
3019         return 0;
3020 }
3021
3022 /* Void driver to protect from null pointer reference. */
3023 const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops = {
3024         .validate = flow_null_validate,
3025         .prepare = flow_null_prepare,
3026         .translate = flow_null_translate,
3027         .apply = flow_null_apply,
3028         .remove = flow_null_remove,
3029         .destroy = flow_null_destroy,
3030         .query = flow_null_query,
3031         .sync_domain = flow_null_sync_domain,
3032 };
3033
3034 /**
3035  * Select flow driver type according to flow attributes and device
3036  * configuration.
3037  *
3038  * @param[in] dev
3039  *   Pointer to the dev structure.
3040  * @param[in] attr
3041  *   Pointer to the flow attributes.
3042  *
3043  * @return
3044  *   flow driver type, MLX5_FLOW_TYPE_MAX otherwise.
3045  */
3046 static enum mlx5_flow_drv_type
3047 flow_get_drv_type(struct rte_eth_dev *dev, const struct rte_flow_attr *attr)
3048 {
3049         struct mlx5_priv *priv = dev->data->dev_private;
3050         /* The OS can determine first a specific flow type (DV, VERBS) */
3051         enum mlx5_flow_drv_type type = mlx5_flow_os_get_type();
3052
3053         if (type != MLX5_FLOW_TYPE_MAX)
3054                 return type;
3055         /* If no OS specific type - continue with DV/VERBS selection */
3056         if (attr->transfer && priv->config.dv_esw_en)
3057                 type = MLX5_FLOW_TYPE_DV;
3058         if (!attr->transfer)
3059                 type = priv->config.dv_flow_en ? MLX5_FLOW_TYPE_DV :
3060                                                  MLX5_FLOW_TYPE_VERBS;
3061         return type;
3062 }
3063
3064 #define flow_get_drv_ops(type) flow_drv_ops[type]
3065
3066 /**
3067  * Flow driver validation API. This abstracts calling driver specific functions.
3068  * The type of flow driver is determined according to flow attributes.
3069  *
3070  * @param[in] dev
3071  *   Pointer to the dev structure.
3072  * @param[in] attr
3073  *   Pointer to the flow attributes.
3074  * @param[in] items
3075  *   Pointer to the list of items.
3076  * @param[in] actions
3077  *   Pointer to the list of actions.
3078  * @param[in] external
3079  *   This flow rule is created by request external to PMD.
3080  * @param[in] hairpin
3081  *   Number of hairpin TX actions, 0 means classic flow.
3082  * @param[out] error
3083  *   Pointer to the error structure.
3084  *
3085  * @return
3086  *   0 on success, a negative errno value otherwise and rte_errno is set.
3087  */
3088 static inline int
3089 flow_drv_validate(struct rte_eth_dev *dev,
3090                   const struct rte_flow_attr *attr,
3091                   const struct rte_flow_item items[],
3092                   const struct rte_flow_action actions[],
3093                   bool external, int hairpin, struct rte_flow_error *error)
3094 {
3095         const struct mlx5_flow_driver_ops *fops;
3096         enum mlx5_flow_drv_type type = flow_get_drv_type(dev, attr);
3097
3098         fops = flow_get_drv_ops(type);
3099         return fops->validate(dev, attr, items, actions, external,
3100                               hairpin, error);
3101 }
3102
3103 /**
3104  * Flow driver preparation API. This abstracts calling driver specific
3105  * functions. Parent flow (rte_flow) should have driver type (drv_type). It
3106  * calculates the size of memory required for device flow, allocates the memory,
3107  * initializes the device flow and returns the pointer.
3108  *
3109  * @note
3110  *   This function initializes device flow structure such as dv or verbs in
3111  *   struct mlx5_flow. However, it is caller's responsibility to initialize the
3112  *   rest. For example, adding returning device flow to flow->dev_flow list and
3113  *   setting backward reference to the flow should be done out of this function.
3114  *   layers field is not filled either.
3115  *
3116  * @param[in] dev
3117  *   Pointer to the dev structure.
3118  * @param[in] attr
3119  *   Pointer to the flow attributes.
3120  * @param[in] items
3121  *   Pointer to the list of items.
3122  * @param[in] actions
3123  *   Pointer to the list of actions.
3124  * @param[in] flow_idx
3125  *   This memory pool index to the flow.
3126  * @param[out] error
3127  *   Pointer to the error structure.
3128  *
3129  * @return
3130  *   Pointer to device flow on success, otherwise NULL and rte_errno is set.
3131  */
3132 static inline struct mlx5_flow *
3133 flow_drv_prepare(struct rte_eth_dev *dev,
3134                  const struct rte_flow *flow,
3135                  const struct rte_flow_attr *attr,
3136                  const struct rte_flow_item items[],
3137                  const struct rte_flow_action actions[],
3138                  uint32_t flow_idx,
3139                  struct rte_flow_error *error)
3140 {
3141         const struct mlx5_flow_driver_ops *fops;
3142         enum mlx5_flow_drv_type type = flow->drv_type;
3143         struct mlx5_flow *mlx5_flow = NULL;
3144
3145         MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
3146         fops = flow_get_drv_ops(type);
3147         mlx5_flow = fops->prepare(dev, attr, items, actions, error);
3148         if (mlx5_flow)
3149                 mlx5_flow->flow_idx = flow_idx;
3150         return mlx5_flow;
3151 }
3152
3153 /**
3154  * Flow driver translation API. This abstracts calling driver specific
3155  * functions. Parent flow (rte_flow) should have driver type (drv_type). It
3156  * translates a generic flow into a driver flow. flow_drv_prepare() must
3157  * precede.
3158  *
3159  * @note
3160  *   dev_flow->layers could be filled as a result of parsing during translation
3161  *   if needed by flow_drv_apply(). dev_flow->flow->actions can also be filled
3162  *   if necessary. As a flow can have multiple dev_flows by RSS flow expansion,
3163  *   flow->actions could be overwritten even though all the expanded dev_flows
3164  *   have the same actions.
3165  *
3166  * @param[in] dev
3167  *   Pointer to the rte dev structure.
3168  * @param[in, out] dev_flow
3169  *   Pointer to the mlx5 flow.
3170  * @param[in] attr
3171  *   Pointer to the flow attributes.
3172  * @param[in] items
3173  *   Pointer to the list of items.
3174  * @param[in] actions
3175  *   Pointer to the list of actions.
3176  * @param[out] error
3177  *   Pointer to the error structure.
3178  *
3179  * @return
3180  *   0 on success, a negative errno value otherwise and rte_errno is set.
3181  */
3182 static inline int
3183 flow_drv_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
3184                    const struct rte_flow_attr *attr,
3185                    const struct rte_flow_item items[],
3186                    const struct rte_flow_action actions[],
3187                    struct rte_flow_error *error)
3188 {
3189         const struct mlx5_flow_driver_ops *fops;
3190         enum mlx5_flow_drv_type type = dev_flow->flow->drv_type;
3191
3192         MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
3193         fops = flow_get_drv_ops(type);
3194         return fops->translate(dev, dev_flow, attr, items, actions, error);
3195 }
3196
3197 /**
3198  * Flow driver apply API. This abstracts calling driver specific functions.
3199  * Parent flow (rte_flow) should have driver type (drv_type). It applies
3200  * translated driver flows on to device. flow_drv_translate() must precede.
3201  *
3202  * @param[in] dev
3203  *   Pointer to Ethernet device structure.
3204  * @param[in, out] flow
3205  *   Pointer to flow structure.
3206  * @param[out] error
3207  *   Pointer to error structure.
3208  *
3209  * @return
3210  *   0 on success, a negative errno value otherwise and rte_errno is set.
3211  */
3212 static inline int
3213 flow_drv_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
3214                struct rte_flow_error *error)
3215 {
3216         const struct mlx5_flow_driver_ops *fops;
3217         enum mlx5_flow_drv_type type = flow->drv_type;
3218
3219         MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
3220         fops = flow_get_drv_ops(type);
3221         return fops->apply(dev, flow, error);
3222 }
3223
3224 /**
3225  * Flow driver destroy API. This abstracts calling driver specific functions.
3226  * Parent flow (rte_flow) should have driver type (drv_type). It removes a flow
3227  * on device and releases resources of the flow.
3228  *
3229  * @param[in] dev
3230  *   Pointer to Ethernet device.
3231  * @param[in, out] flow
3232  *   Pointer to flow structure.
3233  */
3234 static inline void
3235 flow_drv_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
3236 {
3237         const struct mlx5_flow_driver_ops *fops;
3238         enum mlx5_flow_drv_type type = flow->drv_type;
3239
3240         flow_mreg_split_qrss_release(dev, flow);
3241         MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
3242         fops = flow_get_drv_ops(type);
3243         fops->destroy(dev, flow);
3244 }
3245
3246 /**
3247  * Get RSS action from the action list.
3248  *
3249  * @param[in] actions
3250  *   Pointer to the list of actions.
3251  *
3252  * @return
3253  *   Pointer to the RSS action if exist, else return NULL.
3254  */
3255 static const struct rte_flow_action_rss*
3256 flow_get_rss_action(const struct rte_flow_action actions[])
3257 {
3258         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
3259                 switch (actions->type) {
3260                 case RTE_FLOW_ACTION_TYPE_RSS:
3261                         return (const struct rte_flow_action_rss *)
3262                                actions->conf;
3263                 default:
3264                         break;
3265                 }
3266         }
3267         return NULL;
3268 }
3269
3270 /**
3271  * Get ASO age action by index.
3272  *
3273  * @param[in] dev
3274  *   Pointer to the Ethernet device structure.
3275  * @param[in] age_idx
3276  *   Index to the ASO age action.
3277  *
3278  * @return
3279  *   The specified ASO age action.
3280  */
3281 struct mlx5_aso_age_action*
3282 flow_aso_age_get_by_idx(struct rte_eth_dev *dev, uint32_t age_idx)
3283 {
3284         uint16_t pool_idx = age_idx & UINT16_MAX;
3285         uint16_t offset = (age_idx >> 16) & UINT16_MAX;
3286         struct mlx5_priv *priv = dev->data->dev_private;
3287         struct mlx5_aso_age_mng *mng = priv->sh->aso_age_mng;
3288         struct mlx5_aso_age_pool *pool = mng->pools[pool_idx];
3289
3290         return &pool->actions[offset - 1];
3291 }
3292
3293 /* maps shared action to translated non shared in some actions array */
3294 struct mlx5_translated_shared_action {
3295         struct rte_flow_shared_action *action; /**< Shared action */
3296         int index; /**< Index in related array of rte_flow_action */
3297 };
3298
3299 /**
3300  * Translates actions of type RTE_FLOW_ACTION_TYPE_SHARED to related
3301  * non shared action if translation possible.
3302  * This functionality used to run same execution path for both shared & non
3303  * shared actions on flow create. All necessary preparations for shared
3304  * action handling should be preformed on *shared* actions list returned
3305  * from this call.
3306  *
3307  * @param[in] dev
3308  *   Pointer to Ethernet device.
3309  * @param[in] actions
3310  *   List of actions to translate.
3311  * @param[out] shared
3312  *   List to store translated shared actions.
3313  * @param[in, out] shared_n
3314  *   Size of *shared* array. On return should be updated with number of shared
3315  *   actions retrieved from the *actions* list.
3316  * @param[out] translated_actions
3317  *   List of actions where all shared actions were translated to non shared
3318  *   if possible. NULL if no translation took place.
3319  * @param[out] error
3320  *   Pointer to the error structure.
3321  *
3322  * @return
3323  *   0 on success, a negative errno value otherwise and rte_errno is set.
3324  */
3325 static int
3326 flow_shared_actions_translate(struct rte_eth_dev *dev,
3327                               const struct rte_flow_action actions[],
3328                               struct mlx5_translated_shared_action *shared,
3329                               int *shared_n,
3330                               struct rte_flow_action **translated_actions,
3331                               struct rte_flow_error *error)
3332 {
3333         struct mlx5_priv *priv = dev->data->dev_private;
3334         struct rte_flow_action *translated = NULL;
3335         size_t actions_size;
3336         int n;
3337         int copied_n = 0;
3338         struct mlx5_translated_shared_action *shared_end = NULL;
3339
3340         for (n = 0; actions[n].type != RTE_FLOW_ACTION_TYPE_END; n++) {
3341                 if (actions[n].type != RTE_FLOW_ACTION_TYPE_SHARED)
3342                         continue;
3343                 if (copied_n == *shared_n) {
3344                         return rte_flow_error_set
3345                                 (error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION_NUM,
3346                                  NULL, "too many shared actions");
3347                 }
3348                 rte_memcpy(&shared[copied_n].action, &actions[n].conf,
3349                            sizeof(actions[n].conf));
3350                 shared[copied_n].index = n;
3351                 copied_n++;
3352         }
3353         n++;
3354         *shared_n = copied_n;
3355         if (!copied_n)
3356                 return 0;
3357         actions_size = sizeof(struct rte_flow_action) * n;
3358         translated = mlx5_malloc(MLX5_MEM_ZERO, actions_size, 0, SOCKET_ID_ANY);
3359         if (!translated) {
3360                 rte_errno = ENOMEM;
3361                 return -ENOMEM;
3362         }
3363         memcpy(translated, actions, actions_size);
3364         for (shared_end = shared + copied_n; shared < shared_end; shared++) {
3365                 struct mlx5_shared_action_rss *shared_rss;
3366                 uint32_t act_idx = (uint32_t)(uintptr_t)shared->action;
3367                 uint32_t type = act_idx >> MLX5_SHARED_ACTION_TYPE_OFFSET;
3368                 uint32_t idx = act_idx & ((1u << MLX5_SHARED_ACTION_TYPE_OFFSET)
3369                                                                            - 1);
3370
3371                 switch (type) {
3372                 case MLX5_SHARED_ACTION_TYPE_RSS:
3373                         shared_rss = mlx5_ipool_get
3374                           (priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS], idx);
3375                         translated[shared->index].type =
3376                                 RTE_FLOW_ACTION_TYPE_RSS;
3377                         translated[shared->index].conf =
3378                                 &shared_rss->origin;
3379                         break;
3380                 case MLX5_SHARED_ACTION_TYPE_AGE:
3381                         if (priv->sh->flow_hit_aso_en) {
3382                                 translated[shared->index].type =
3383                                         (enum rte_flow_action_type)
3384                                         MLX5_RTE_FLOW_ACTION_TYPE_AGE;
3385                                 translated[shared->index].conf =
3386                                                          (void *)(uintptr_t)idx;
3387                                 break;
3388                         }
3389                         /* Fall-through */
3390                 default:
3391                         mlx5_free(translated);
3392                         return rte_flow_error_set
3393                                 (error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION,
3394                                  NULL, "invalid shared action type");
3395                 }
3396         }
3397         *translated_actions = translated;
3398         return 0;
3399 }
3400
3401 /**
3402  * Get Shared RSS action from the action list.
3403  *
3404  * @param[in] dev
3405  *   Pointer to Ethernet device.
3406  * @param[in] shared
3407  *   Pointer to the list of actions.
3408  * @param[in] shared_n
3409  *   Actions list length.
3410  *
3411  * @return
3412  *   The MLX5 RSS action ID if exists, otherwise return 0.
3413  */
3414 static uint32_t
3415 flow_get_shared_rss_action(struct rte_eth_dev *dev,
3416                            struct mlx5_translated_shared_action *shared,
3417                            int shared_n)
3418 {
3419         struct mlx5_translated_shared_action *shared_end;
3420         struct mlx5_priv *priv = dev->data->dev_private;
3421         struct mlx5_shared_action_rss *shared_rss;
3422
3423
3424         for (shared_end = shared + shared_n; shared < shared_end; shared++) {
3425                 uint32_t act_idx = (uint32_t)(uintptr_t)shared->action;
3426                 uint32_t type = act_idx >> MLX5_SHARED_ACTION_TYPE_OFFSET;
3427                 uint32_t idx = act_idx &
3428                                    ((1u << MLX5_SHARED_ACTION_TYPE_OFFSET) - 1);
3429                 switch (type) {
3430                 case MLX5_SHARED_ACTION_TYPE_RSS:
3431                         shared_rss = mlx5_ipool_get
3432                                 (priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS],
3433                                                                            idx);
3434                         __atomic_add_fetch(&shared_rss->refcnt, 1,
3435                                            __ATOMIC_RELAXED);
3436                         return idx;
3437                 default:
3438                         break;
3439                 }
3440         }
3441         return 0;
3442 }
3443
3444 static unsigned int
3445 find_graph_root(const struct rte_flow_item pattern[], uint32_t rss_level)
3446 {
3447         const struct rte_flow_item *item;
3448         unsigned int has_vlan = 0;
3449
3450         for (item = pattern; item->type != RTE_FLOW_ITEM_TYPE_END; item++) {
3451                 if (item->type == RTE_FLOW_ITEM_TYPE_VLAN) {
3452                         has_vlan = 1;
3453                         break;
3454                 }
3455         }
3456         if (has_vlan)
3457                 return rss_level < 2 ? MLX5_EXPANSION_ROOT_ETH_VLAN :
3458                                        MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN;
3459         return rss_level < 2 ? MLX5_EXPANSION_ROOT :
3460                                MLX5_EXPANSION_ROOT_OUTER;
3461 }
3462
3463 /**
3464  *  Get layer flags from the prefix flow.
3465  *
3466  *  Some flows may be split to several subflows, the prefix subflow gets the
3467  *  match items and the suffix sub flow gets the actions.
3468  *  Some actions need the user defined match item flags to get the detail for
3469  *  the action.
3470  *  This function helps the suffix flow to get the item layer flags from prefix
3471  *  subflow.
3472  *
3473  * @param[in] dev_flow
3474  *   Pointer the created preifx subflow.
3475  *
3476  * @return
3477  *   The layers get from prefix subflow.
3478  */
3479 static inline uint64_t
3480 flow_get_prefix_layer_flags(struct mlx5_flow *dev_flow)
3481 {
3482         uint64_t layers = 0;
3483
3484         /*
3485          * Layers bits could be localization, but usually the compiler will
3486          * help to do the optimization work for source code.
3487          * If no decap actions, use the layers directly.
3488          */
3489         if (!(dev_flow->act_flags & MLX5_FLOW_ACTION_DECAP))
3490                 return dev_flow->handle->layers;
3491         /* Convert L3 layers with decap action. */
3492         if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L3_IPV4)
3493                 layers |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
3494         else if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L3_IPV6)
3495                 layers |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
3496         /* Convert L4 layers with decap action.  */
3497         if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L4_TCP)
3498                 layers |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
3499         else if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L4_UDP)
3500                 layers |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
3501         return layers;
3502 }
3503
3504 /**
3505  * Get metadata split action information.
3506  *
3507  * @param[in] actions
3508  *   Pointer to the list of actions.
3509  * @param[out] qrss
3510  *   Pointer to the return pointer.
3511  * @param[out] qrss_type
3512  *   Pointer to the action type to return. RTE_FLOW_ACTION_TYPE_END is returned
3513  *   if no QUEUE/RSS is found.
3514  * @param[out] encap_idx
3515  *   Pointer to the index of the encap action if exists, otherwise the last
3516  *   action index.
3517  *
3518  * @return
3519  *   Total number of actions.
3520  */
3521 static int
3522 flow_parse_metadata_split_actions_info(const struct rte_flow_action actions[],
3523                                        const struct rte_flow_action **qrss,
3524                                        int *encap_idx)
3525 {
3526         const struct rte_flow_action_raw_encap *raw_encap;
3527         int actions_n = 0;
3528         int raw_decap_idx = -1;
3529
3530         *encap_idx = -1;
3531         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
3532                 switch (actions->type) {
3533                 case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
3534                 case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
3535                         *encap_idx = actions_n;
3536                         break;
3537                 case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
3538                         raw_decap_idx = actions_n;
3539                         break;
3540                 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
3541                         raw_encap = actions->conf;
3542                         if (raw_encap->size > MLX5_ENCAPSULATION_DECISION_SIZE)
3543                                 *encap_idx = raw_decap_idx != -1 ?
3544                                                       raw_decap_idx : actions_n;
3545                         break;
3546                 case RTE_FLOW_ACTION_TYPE_QUEUE:
3547                 case RTE_FLOW_ACTION_TYPE_RSS:
3548                         *qrss = actions;
3549                         break;
3550                 default:
3551                         break;
3552                 }
3553                 actions_n++;
3554         }
3555         if (*encap_idx == -1)
3556                 *encap_idx = actions_n;
3557         /* Count RTE_FLOW_ACTION_TYPE_END. */
3558         return actions_n + 1;
3559 }
3560
3561 /**
3562  * Check meter action from the action list.
3563  *
3564  * @param[in] actions
3565  *   Pointer to the list of actions.
3566  * @param[out] mtr
3567  *   Pointer to the meter exist flag.
3568  *
3569  * @return
3570  *   Total number of actions.
3571  */
3572 static int
3573 flow_check_meter_action(const struct rte_flow_action actions[], uint32_t *mtr)
3574 {
3575         int actions_n = 0;
3576
3577         MLX5_ASSERT(mtr);
3578         *mtr = 0;
3579         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
3580                 switch (actions->type) {
3581                 case RTE_FLOW_ACTION_TYPE_METER:
3582                         *mtr = 1;
3583                         break;
3584                 default:
3585                         break;
3586                 }
3587                 actions_n++;
3588         }
3589         /* Count RTE_FLOW_ACTION_TYPE_END. */
3590         return actions_n + 1;
3591 }
3592
3593 /**
3594  * Check if the flow should be split due to hairpin.
3595  * The reason for the split is that in current HW we can't
3596  * support encap and push-vlan on Rx, so if a flow contains
3597  * these actions we move it to Tx.
3598  *
3599  * @param dev
3600  *   Pointer to Ethernet device.
3601  * @param[in] attr
3602  *   Flow rule attributes.
3603  * @param[in] actions
3604  *   Associated actions (list terminated by the END action).
3605  *
3606  * @return
3607  *   > 0 the number of actions and the flow should be split,
3608  *   0 when no split required.
3609  */
3610 static int
3611 flow_check_hairpin_split(struct rte_eth_dev *dev,
3612                          const struct rte_flow_attr *attr,
3613                          const struct rte_flow_action actions[])
3614 {
3615         int queue_action = 0;
3616         int action_n = 0;
3617         int split = 0;
3618         const struct rte_flow_action_queue *queue;
3619         const struct rte_flow_action_rss *rss;
3620         const struct rte_flow_action_raw_encap *raw_encap;
3621         const struct rte_eth_hairpin_conf *conf;
3622
3623         if (!attr->ingress)
3624                 return 0;
3625         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
3626                 switch (actions->type) {
3627                 case RTE_FLOW_ACTION_TYPE_QUEUE:
3628                         queue = actions->conf;
3629                         if (queue == NULL)
3630                                 return 0;
3631                         conf = mlx5_rxq_get_hairpin_conf(dev, queue->index);
3632                         if (conf != NULL && !!conf->tx_explicit)
3633                                 return 0;
3634                         queue_action = 1;
3635                         action_n++;
3636                         break;
3637                 case RTE_FLOW_ACTION_TYPE_RSS:
3638                         rss = actions->conf;
3639                         if (rss == NULL || rss->queue_num == 0)
3640                                 return 0;
3641                         conf = mlx5_rxq_get_hairpin_conf(dev, rss->queue[0]);
3642                         if (conf != NULL && !!conf->tx_explicit)
3643                                 return 0;
3644                         queue_action = 1;
3645                         action_n++;
3646                         break;
3647                 case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
3648                 case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
3649                 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
3650                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
3651                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
3652                         split++;
3653                         action_n++;
3654                         break;
3655                 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
3656                         raw_encap = actions->conf;
3657                         if (raw_encap->size >
3658                             (sizeof(struct rte_flow_item_eth) +
3659                              sizeof(struct rte_flow_item_ipv4)))
3660                                 split++;
3661                         action_n++;
3662                         break;
3663                 default:
3664                         action_n++;
3665                         break;
3666                 }
3667         }
3668         if (split && queue_action)
3669                 return action_n;
3670         return 0;
3671 }
3672
3673 /* Declare flow create/destroy prototype in advance. */
3674 static uint32_t
3675 flow_list_create(struct rte_eth_dev *dev, uint32_t *list,
3676                  const struct rte_flow_attr *attr,
3677                  const struct rte_flow_item items[],
3678                  const struct rte_flow_action actions[],
3679                  bool external, struct rte_flow_error *error);
3680
3681 static void
3682 flow_list_destroy(struct rte_eth_dev *dev, uint32_t *list,
3683                   uint32_t flow_idx);
3684
3685 struct mlx5_hlist_entry *
3686 flow_dv_mreg_create_cb(struct mlx5_hlist *list, uint64_t key,
3687                        void *cb_ctx)
3688 {
3689         struct rte_eth_dev *dev = list->ctx;
3690         struct mlx5_priv *priv = dev->data->dev_private;
3691         struct mlx5_flow_cb_ctx *ctx = cb_ctx;
3692         struct mlx5_flow_mreg_copy_resource *mcp_res;
3693         struct rte_flow_error *error = ctx->error;
3694         uint32_t idx = 0;
3695         int ret;
3696         uint32_t mark_id = key;
3697         struct rte_flow_attr attr = {
3698                 .group = MLX5_FLOW_MREG_CP_TABLE_GROUP,
3699                 .ingress = 1,
3700         };
3701         struct mlx5_rte_flow_item_tag tag_spec = {
3702                 .data = mark_id,
3703         };
3704         struct rte_flow_item items[] = {
3705                 [1] = { .type = RTE_FLOW_ITEM_TYPE_END, },
3706         };
3707         struct rte_flow_action_mark ftag = {
3708                 .id = mark_id,
3709         };
3710         struct mlx5_flow_action_copy_mreg cp_mreg = {
3711                 .dst = REG_B,
3712                 .src = REG_NON,
3713         };
3714         struct rte_flow_action_jump jump = {
3715                 .group = MLX5_FLOW_MREG_ACT_TABLE_GROUP,
3716         };
3717         struct rte_flow_action actions[] = {
3718                 [3] = { .type = RTE_FLOW_ACTION_TYPE_END, },
3719         };
3720
3721         /* Fill the register fileds in the flow. */
3722         ret = mlx5_flow_get_reg_id(dev, MLX5_FLOW_MARK, 0, error);
3723         if (ret < 0)
3724                 return NULL;
3725         tag_spec.id = ret;
3726         ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_RX, 0, error);
3727         if (ret < 0)
3728                 return NULL;
3729         cp_mreg.src = ret;
3730         /* Provide the full width of FLAG specific value. */
3731         if (mark_id == (priv->sh->dv_regc0_mask & MLX5_FLOW_MARK_DEFAULT))
3732                 tag_spec.data = MLX5_FLOW_MARK_DEFAULT;
3733         /* Build a new flow. */
3734         if (mark_id != MLX5_DEFAULT_COPY_ID) {
3735                 items[0] = (struct rte_flow_item){
3736                         .type = (enum rte_flow_item_type)
3737                                 MLX5_RTE_FLOW_ITEM_TYPE_TAG,
3738                         .spec = &tag_spec,
3739                 };
3740                 items[1] = (struct rte_flow_item){
3741                         .type = RTE_FLOW_ITEM_TYPE_END,
3742                 };
3743                 actions[0] = (struct rte_flow_action){
3744                         .type = (enum rte_flow_action_type)
3745                                 MLX5_RTE_FLOW_ACTION_TYPE_MARK,
3746                         .conf = &ftag,
3747                 };
3748                 actions[1] = (struct rte_flow_action){
3749                         .type = (enum rte_flow_action_type)
3750                                 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
3751                         .conf = &cp_mreg,
3752                 };
3753                 actions[2] = (struct rte_flow_action){
3754                         .type = RTE_FLOW_ACTION_TYPE_JUMP,
3755                         .conf = &jump,
3756                 };
3757                 actions[3] = (struct rte_flow_action){
3758                         .type = RTE_FLOW_ACTION_TYPE_END,
3759                 };
3760         } else {
3761                 /* Default rule, wildcard match. */
3762                 attr.priority = MLX5_FLOW_PRIO_RSVD;
3763                 items[0] = (struct rte_flow_item){
3764                         .type = RTE_FLOW_ITEM_TYPE_END,
3765                 };
3766                 actions[0] = (struct rte_flow_action){
3767                         .type = (enum rte_flow_action_type)
3768                                 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
3769                         .conf = &cp_mreg,
3770                 };
3771                 actions[1] = (struct rte_flow_action){
3772                         .type = RTE_FLOW_ACTION_TYPE_JUMP,
3773                         .conf = &jump,
3774                 };
3775                 actions[2] = (struct rte_flow_action){
3776                         .type = RTE_FLOW_ACTION_TYPE_END,
3777                 };
3778         }
3779         /* Build a new entry. */
3780         mcp_res = mlx5_ipool_zmalloc(priv->sh->ipool[MLX5_IPOOL_MCP], &idx);
3781         if (!mcp_res) {
3782                 rte_errno = ENOMEM;
3783                 return NULL;
3784         }
3785         mcp_res->idx = idx;
3786         /*
3787          * The copy Flows are not included in any list. There
3788          * ones are referenced from other Flows and can not
3789          * be applied, removed, deleted in ardbitrary order
3790          * by list traversing.
3791          */
3792         mcp_res->rix_flow = flow_list_create(dev, NULL, &attr, items,
3793                                          actions, false, error);
3794         if (!mcp_res->rix_flow) {
3795                 mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MCP], idx);
3796                 return NULL;
3797         }
3798         return &mcp_res->hlist_ent;
3799 }
3800
3801 /**
3802  * Add a flow of copying flow metadata registers in RX_CP_TBL.
3803  *
3804  * As mark_id is unique, if there's already a registered flow for the mark_id,
3805  * return by increasing the reference counter of the resource. Otherwise, create
3806  * the resource (mcp_res) and flow.
3807  *
3808  * Flow looks like,
3809  *   - If ingress port is ANY and reg_c[1] is mark_id,
3810  *     flow_tag := mark_id, reg_b := reg_c[0] and jump to RX_ACT_TBL.
3811  *
3812  * For default flow (zero mark_id), flow is like,
3813  *   - If ingress port is ANY,
3814  *     reg_b := reg_c[0] and jump to RX_ACT_TBL.
3815  *
3816  * @param dev
3817  *   Pointer to Ethernet device.
3818  * @param mark_id
3819  *   ID of MARK action, zero means default flow for META.
3820  * @param[out] error
3821  *   Perform verbose error reporting if not NULL.
3822  *
3823  * @return
3824  *   Associated resource on success, NULL otherwise and rte_errno is set.
3825  */
3826 static struct mlx5_flow_mreg_copy_resource *
3827 flow_mreg_add_copy_action(struct rte_eth_dev *dev, uint32_t mark_id,
3828                           struct rte_flow_error *error)
3829 {
3830         struct mlx5_priv *priv = dev->data->dev_private;
3831         struct mlx5_hlist_entry *entry;
3832         struct mlx5_flow_cb_ctx ctx = {
3833                 .dev = dev,
3834                 .error = error,
3835         };
3836
3837         /* Check if already registered. */
3838         MLX5_ASSERT(priv->mreg_cp_tbl);
3839         entry = mlx5_hlist_register(priv->mreg_cp_tbl, mark_id, &ctx);
3840         if (!entry)
3841                 return NULL;
3842         return container_of(entry, struct mlx5_flow_mreg_copy_resource,
3843                             hlist_ent);
3844 }
3845
3846 void
3847 flow_dv_mreg_remove_cb(struct mlx5_hlist *list, struct mlx5_hlist_entry *entry)
3848 {
3849         struct mlx5_flow_mreg_copy_resource *mcp_res =
3850                 container_of(entry, typeof(*mcp_res), hlist_ent);
3851         struct rte_eth_dev *dev = list->ctx;
3852         struct mlx5_priv *priv = dev->data->dev_private;
3853
3854         MLX5_ASSERT(mcp_res->rix_flow);
3855         flow_list_destroy(dev, NULL, mcp_res->rix_flow);
3856         mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MCP], mcp_res->idx);
3857 }
3858
3859 /**
3860  * Release flow in RX_CP_TBL.
3861  *
3862  * @param dev
3863  *   Pointer to Ethernet device.
3864  * @flow
3865  *   Parent flow for wich copying is provided.
3866  */
3867 static void
3868 flow_mreg_del_copy_action(struct rte_eth_dev *dev,
3869                           struct rte_flow *flow)
3870 {
3871         struct mlx5_flow_mreg_copy_resource *mcp_res;
3872         struct mlx5_priv *priv = dev->data->dev_private;
3873
3874         if (!flow->rix_mreg_copy)
3875                 return;
3876         mcp_res = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_MCP],
3877                                  flow->rix_mreg_copy);
3878         if (!mcp_res || !priv->mreg_cp_tbl)
3879                 return;
3880         MLX5_ASSERT(mcp_res->rix_flow);
3881         mlx5_hlist_unregister(priv->mreg_cp_tbl, &mcp_res->hlist_ent);
3882         flow->rix_mreg_copy = 0;
3883 }
3884
3885 /**
3886  * Remove the default copy action from RX_CP_TBL.
3887  *
3888  * This functions is called in the mlx5_dev_start(). No thread safe
3889  * is guaranteed.
3890  *
3891  * @param dev
3892  *   Pointer to Ethernet device.
3893  */
3894 static void
3895 flow_mreg_del_default_copy_action(struct rte_eth_dev *dev)
3896 {
3897         struct mlx5_hlist_entry *entry;
3898         struct mlx5_priv *priv = dev->data->dev_private;
3899
3900         /* Check if default flow is registered. */
3901         if (!priv->mreg_cp_tbl)
3902                 return;
3903         entry = mlx5_hlist_lookup(priv->mreg_cp_tbl,
3904                                   MLX5_DEFAULT_COPY_ID, NULL);
3905         if (!entry)
3906                 return;
3907         mlx5_hlist_unregister(priv->mreg_cp_tbl, entry);
3908 }
3909
3910 /**
3911  * Add the default copy action in in RX_CP_TBL.
3912  *
3913  * This functions is called in the mlx5_dev_start(). No thread safe
3914  * is guaranteed.
3915  *
3916  * @param dev
3917  *   Pointer to Ethernet device.
3918  * @param[out] error
3919  *   Perform verbose error reporting if not NULL.
3920  *
3921  * @return
3922  *   0 for success, negative value otherwise and rte_errno is set.
3923  */
3924 static int
3925 flow_mreg_add_default_copy_action(struct rte_eth_dev *dev,
3926                                   struct rte_flow_error *error)
3927 {
3928         struct mlx5_priv *priv = dev->data->dev_private;
3929         struct mlx5_flow_mreg_copy_resource *mcp_res;
3930
3931         /* Check whether extensive metadata feature is engaged. */
3932         if (!priv->config.dv_flow_en ||
3933             priv->config.dv_xmeta_en == MLX5_XMETA_MODE_LEGACY ||
3934             !mlx5_flow_ext_mreg_supported(dev) ||
3935             !priv->sh->dv_regc0_mask)
3936                 return 0;
3937         /*
3938          * Add default mreg copy flow may be called multiple time, but
3939          * only be called once in stop. Avoid register it twice.
3940          */
3941         if (mlx5_hlist_lookup(priv->mreg_cp_tbl, MLX5_DEFAULT_COPY_ID, NULL))
3942                 return 0;
3943         mcp_res = flow_mreg_add_copy_action(dev, MLX5_DEFAULT_COPY_ID, error);
3944         if (!mcp_res)
3945                 return -rte_errno;
3946         return 0;
3947 }
3948
3949 /**
3950  * Add a flow of copying flow metadata registers in RX_CP_TBL.
3951  *
3952  * All the flow having Q/RSS action should be split by
3953  * flow_mreg_split_qrss_prep() to pass by RX_CP_TBL. A flow in the RX_CP_TBL
3954  * performs the following,
3955  *   - CQE->flow_tag := reg_c[1] (MARK)
3956  *   - CQE->flow_table_metadata (reg_b) := reg_c[0] (META)
3957  * As CQE's flow_tag is not a register, it can't be simply copied from reg_c[1]
3958  * but there should be a flow per each MARK ID set by MARK action.
3959  *
3960  * For the aforementioned reason, if there's a MARK action in flow's action
3961  * list, a corresponding flow should be added to the RX_CP_TBL in order to copy
3962  * the MARK ID to CQE's flow_tag like,
3963  *   - If reg_c[1] is mark_id,
3964  *     flow_tag := mark_id, reg_b := reg_c[0] and jump to RX_ACT_TBL.
3965  *
3966  * For SET_META action which stores value in reg_c[0], as the destination is
3967  * also a flow metadata register (reg_b), adding a default flow is enough. Zero
3968  * MARK ID means the default flow. The default flow looks like,
3969  *   - For all flow, reg_b := reg_c[0] and jump to RX_ACT_TBL.
3970  *
3971  * @param dev
3972  *   Pointer to Ethernet device.
3973  * @param flow
3974  *   Pointer to flow structure.
3975  * @param[in] actions
3976  *   Pointer to the list of actions.
3977  * @param[out] error
3978  *   Perform verbose error reporting if not NULL.
3979  *
3980  * @return
3981  *   0 on success, negative value otherwise and rte_errno is set.
3982  */
3983 static int
3984 flow_mreg_update_copy_table(struct rte_eth_dev *dev,
3985                             struct rte_flow *flow,
3986                             const struct rte_flow_action *actions,
3987                             struct rte_flow_error *error)
3988 {
3989         struct mlx5_priv *priv = dev->data->dev_private;
3990         struct mlx5_dev_config *config = &priv->config;
3991         struct mlx5_flow_mreg_copy_resource *mcp_res;
3992         const struct rte_flow_action_mark *mark;
3993
3994         /* Check whether extensive metadata feature is engaged. */
3995         if (!config->dv_flow_en ||
3996             config->dv_xmeta_en == MLX5_XMETA_MODE_LEGACY ||
3997             !mlx5_flow_ext_mreg_supported(dev) ||
3998             !priv->sh->dv_regc0_mask)
3999                 return 0;
4000         /* Find MARK action. */
4001         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
4002                 switch (actions->type) {
4003                 case RTE_FLOW_ACTION_TYPE_FLAG:
4004                         mcp_res = flow_mreg_add_copy_action
4005                                 (dev, MLX5_FLOW_MARK_DEFAULT, error);
4006                         if (!mcp_res)
4007                                 return -rte_errno;
4008                         flow->rix_mreg_copy = mcp_res->idx;
4009                         return 0;
4010                 case RTE_FLOW_ACTION_TYPE_MARK:
4011                         mark = (const struct rte_flow_action_mark *)
4012                                 actions->conf;
4013                         mcp_res =
4014                                 flow_mreg_add_copy_action(dev, mark->id, error);
4015                         if (!mcp_res)
4016                                 return -rte_errno;
4017                         flow->rix_mreg_copy = mcp_res->idx;
4018                         return 0;
4019                 default:
4020                         break;
4021                 }
4022         }
4023         return 0;
4024 }
4025
4026 #define MLX5_MAX_SPLIT_ACTIONS 24
4027 #define MLX5_MAX_SPLIT_ITEMS 24
4028
4029 /**
4030  * Split the hairpin flow.
4031  * Since HW can't support encap and push-vlan on Rx, we move these
4032  * actions to Tx.
4033  * If the count action is after the encap then we also
4034  * move the count action. in this case the count will also measure
4035  * the outer bytes.
4036  *
4037  * @param dev
4038  *   Pointer to Ethernet device.
4039  * @param[in] actions
4040  *   Associated actions (list terminated by the END action).
4041  * @param[out] actions_rx
4042  *   Rx flow actions.
4043  * @param[out] actions_tx
4044  *   Tx flow actions..
4045  * @param[out] pattern_tx
4046  *   The pattern items for the Tx flow.
4047  * @param[out] flow_id
4048  *   The flow ID connected to this flow.
4049  *
4050  * @return
4051  *   0 on success.
4052  */
4053 static int
4054 flow_hairpin_split(struct rte_eth_dev *dev,
4055                    const struct rte_flow_action actions[],
4056                    struct rte_flow_action actions_rx[],
4057                    struct rte_flow_action actions_tx[],
4058                    struct rte_flow_item pattern_tx[],
4059                    uint32_t flow_id)
4060 {
4061         const struct rte_flow_action_raw_encap *raw_encap;
4062         const struct rte_flow_action_raw_decap *raw_decap;
4063         struct mlx5_rte_flow_action_set_tag *set_tag;
4064         struct rte_flow_action *tag_action;
4065         struct mlx5_rte_flow_item_tag *tag_item;
4066         struct rte_flow_item *item;
4067         char *addr;
4068         int encap = 0;
4069
4070         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
4071                 switch (actions->type) {
4072                 case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
4073                 case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
4074                 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
4075                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
4076                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
4077                         rte_memcpy(actions_tx, actions,
4078                                sizeof(struct rte_flow_action));
4079                         actions_tx++;
4080                         break;
4081                 case RTE_FLOW_ACTION_TYPE_COUNT:
4082                         if (encap) {
4083                                 rte_memcpy(actions_tx, actions,
4084                                            sizeof(struct rte_flow_action));
4085                                 actions_tx++;
4086                         } else {
4087                                 rte_memcpy(actions_rx, actions,
4088                                            sizeof(struct rte_flow_action));
4089                                 actions_rx++;
4090                         }
4091                         break;
4092                 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
4093                         raw_encap = actions->conf;
4094                         if (raw_encap->size >
4095                             (sizeof(struct rte_flow_item_eth) +
4096                              sizeof(struct rte_flow_item_ipv4))) {
4097                                 memcpy(actions_tx, actions,
4098                                        sizeof(struct rte_flow_action));
4099                                 actions_tx++;
4100                                 encap = 1;
4101                         } else {
4102                                 rte_memcpy(actions_rx, actions,
4103                                            sizeof(struct rte_flow_action));
4104                                 actions_rx++;
4105                         }
4106                         break;
4107                 case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
4108                         raw_decap = actions->conf;
4109                         if (raw_decap->size <
4110                             (sizeof(struct rte_flow_item_eth) +
4111                              sizeof(struct rte_flow_item_ipv4))) {
4112                                 memcpy(actions_tx, actions,
4113                                        sizeof(struct rte_flow_action));
4114                                 actions_tx++;
4115                         } else {
4116                                 rte_memcpy(actions_rx, actions,
4117                                            sizeof(struct rte_flow_action));
4118                                 actions_rx++;
4119                         }
4120                         break;
4121                 default:
4122                         rte_memcpy(actions_rx, actions,
4123                                    sizeof(struct rte_flow_action));
4124                         actions_rx++;
4125                         break;
4126                 }
4127         }
4128         /* Add set meta action and end action for the Rx flow. */
4129         tag_action = actions_rx;
4130         tag_action->type = (enum rte_flow_action_type)
4131                            MLX5_RTE_FLOW_ACTION_TYPE_TAG;
4132         actions_rx++;
4133         rte_memcpy(actions_rx, actions, sizeof(struct rte_flow_action));
4134         actions_rx++;
4135         set_tag = (void *)actions_rx;
4136         set_tag->id = mlx5_flow_get_reg_id(dev, MLX5_HAIRPIN_RX, 0, NULL);
4137         MLX5_ASSERT(set_tag->id > REG_NON);
4138         set_tag->data = flow_id;
4139         tag_action->conf = set_tag;
4140         /* Create Tx item list. */
4141         rte_memcpy(actions_tx, actions, sizeof(struct rte_flow_action));
4142         addr = (void *)&pattern_tx[2];
4143         item = pattern_tx;
4144         item->type = (enum rte_flow_item_type)
4145                      MLX5_RTE_FLOW_ITEM_TYPE_TAG;
4146         tag_item = (void *)addr;
4147         tag_item->data = flow_id;
4148         tag_item->id = mlx5_flow_get_reg_id(dev, MLX5_HAIRPIN_TX, 0, NULL);
4149         MLX5_ASSERT(set_tag->id > REG_NON);
4150         item->spec = tag_item;
4151         addr += sizeof(struct mlx5_rte_flow_item_tag);
4152         tag_item = (void *)addr;
4153         tag_item->data = UINT32_MAX;
4154         tag_item->id = UINT16_MAX;
4155         item->mask = tag_item;
4156         item->last = NULL;
4157         item++;
4158         item->type = RTE_FLOW_ITEM_TYPE_END;
4159         return 0;
4160 }
4161
4162 __extension__
4163 union tunnel_offload_mark {
4164         uint32_t val;
4165         struct {
4166                 uint32_t app_reserve:8;
4167                 uint32_t table_id:15;
4168                 uint32_t transfer:1;
4169                 uint32_t _unused_:8;
4170         };
4171 };
4172
4173 struct tunnel_default_miss_ctx {
4174         uint16_t *queue;
4175         __extension__
4176         union {
4177                 struct rte_flow_action_rss action_rss;
4178                 struct rte_flow_action_queue miss_queue;
4179                 struct rte_flow_action_jump miss_jump;
4180                 uint8_t raw[0];
4181         };
4182 };
4183
4184 static int
4185 flow_tunnel_add_default_miss(struct rte_eth_dev *dev,
4186                              struct rte_flow *flow,
4187                              const struct rte_flow_attr *attr,
4188                              const struct rte_flow_action *app_actions,
4189                              uint32_t flow_idx,
4190                              struct tunnel_default_miss_ctx *ctx,
4191                              struct rte_flow_error *error)
4192 {
4193         struct mlx5_priv *priv = dev->data->dev_private;
4194         struct mlx5_flow *dev_flow;
4195         struct rte_flow_attr miss_attr = *attr;
4196         const struct mlx5_flow_tunnel *tunnel = app_actions[0].conf;
4197         const struct rte_flow_item miss_items[2] = {
4198                 {
4199                         .type = RTE_FLOW_ITEM_TYPE_ETH,
4200                         .spec = NULL,
4201                         .last = NULL,
4202                         .mask = NULL
4203                 },
4204                 {
4205                         .type = RTE_FLOW_ITEM_TYPE_END,
4206                         .spec = NULL,
4207                         .last = NULL,
4208                         .mask = NULL
4209                 }
4210         };
4211         union tunnel_offload_mark mark_id;
4212         struct rte_flow_action_mark miss_mark;
4213         struct rte_flow_action miss_actions[3] = {
4214                 [0] = { .type = RTE_FLOW_ACTION_TYPE_MARK, .conf = &miss_mark },
4215                 [2] = { .type = RTE_FLOW_ACTION_TYPE_END,  .conf = NULL }
4216         };
4217         const struct rte_flow_action_jump *jump_data;
4218         uint32_t i, flow_table = 0; /* prevent compilation warning */
4219         struct flow_grp_info grp_info = {
4220                 .external = 1,
4221                 .transfer = attr->transfer,
4222                 .fdb_def_rule = !!priv->fdb_def_rule,
4223                 .std_tbl_fix = 0,
4224         };
4225         int ret;
4226
4227         if (!attr->transfer) {
4228                 uint32_t q_size;
4229
4230                 miss_actions[1].type = RTE_FLOW_ACTION_TYPE_RSS;
4231                 q_size = priv->reta_idx_n * sizeof(ctx->queue[0]);
4232                 ctx->queue = mlx5_malloc(MLX5_MEM_SYS | MLX5_MEM_ZERO, q_size,
4233                                          0, SOCKET_ID_ANY);
4234                 if (!ctx->queue)
4235                         return rte_flow_error_set
4236                                 (error, ENOMEM,
4237                                 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
4238                                 NULL, "invalid default miss RSS");
4239                 ctx->action_rss.func = RTE_ETH_HASH_FUNCTION_DEFAULT,
4240                 ctx->action_rss.level = 0,
4241                 ctx->action_rss.types = priv->rss_conf.rss_hf,
4242                 ctx->action_rss.key_len = priv->rss_conf.rss_key_len,
4243                 ctx->action_rss.queue_num = priv->reta_idx_n,
4244                 ctx->action_rss.key = priv->rss_conf.rss_key,
4245                 ctx->action_rss.queue = ctx->queue;
4246                 if (!priv->reta_idx_n || !priv->rxqs_n)
4247                         return rte_flow_error_set
4248                                 (error, EINVAL,
4249                                 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
4250                                 NULL, "invalid port configuration");
4251                 if (!(dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG))
4252                         ctx->action_rss.types = 0;
4253                 for (i = 0; i != priv->reta_idx_n; ++i)
4254                         ctx->queue[i] = (*priv->reta_idx)[i];
4255         } else {
4256                 miss_actions[1].type = RTE_FLOW_ACTION_TYPE_JUMP;
4257                 ctx->miss_jump.group = MLX5_TNL_MISS_FDB_JUMP_GRP;
4258         }
4259         miss_actions[1].conf = (typeof(miss_actions[1].conf))ctx->raw;
4260         for (; app_actions->type != RTE_FLOW_ACTION_TYPE_JUMP; app_actions++);
4261         jump_data = app_actions->conf;
4262         miss_attr.priority = MLX5_TNL_MISS_RULE_PRIORITY;
4263         miss_attr.group = jump_data->group;
4264         ret = mlx5_flow_group_to_table(dev, tunnel, jump_data->group,
4265                                        &flow_table, grp_info, error);
4266         if (ret)
4267                 return rte_flow_error_set(error, EINVAL,
4268                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
4269                                           NULL, "invalid tunnel id");
4270         mark_id.app_reserve = 0;
4271         mark_id.table_id = tunnel_flow_tbl_to_id(flow_table);
4272         mark_id.transfer = !!attr->transfer;
4273         mark_id._unused_ = 0;
4274         miss_mark.id = mark_id.val;
4275         dev_flow = flow_drv_prepare(dev, flow, &miss_attr,
4276                                     miss_items, miss_actions, flow_idx, error);
4277         if (!dev_flow)
4278                 return -rte_errno;
4279         dev_flow->flow = flow;
4280         dev_flow->external = true;
4281         dev_flow->tunnel = tunnel;
4282         /* Subflow object was created, we must include one in the list. */
4283         SILIST_INSERT(&flow->dev_handles, dev_flow->handle_idx,
4284                       dev_flow->handle, next);
4285         DRV_LOG(DEBUG,
4286                 "port %u tunnel type=%d id=%u miss rule priority=%u group=%u",
4287                 dev->data->port_id, tunnel->app_tunnel.type,
4288                 tunnel->tunnel_id, miss_attr.priority, miss_attr.group);
4289         ret = flow_drv_translate(dev, dev_flow, &miss_attr, miss_items,
4290                                   miss_actions, error);
4291         if (!ret)
4292                 ret = flow_mreg_update_copy_table(dev, flow, miss_actions,
4293                                                   error);
4294
4295         return ret;
4296 }
4297
4298 /**
4299  * The last stage of splitting chain, just creates the subflow
4300  * without any modification.
4301  *
4302  * @param[in] dev
4303  *   Pointer to Ethernet device.
4304  * @param[in] flow
4305  *   Parent flow structure pointer.
4306  * @param[in, out] sub_flow
4307  *   Pointer to return the created subflow, may be NULL.
4308  * @param[in] attr
4309  *   Flow rule attributes.
4310  * @param[in] items
4311  *   Pattern specification (list terminated by the END pattern item).
4312  * @param[in] actions
4313  *   Associated actions (list terminated by the END action).
4314  * @param[in] flow_split_info
4315  *   Pointer to flow split info structure.
4316  * @param[out] error
4317  *   Perform verbose error reporting if not NULL.
4318  * @return
4319  *   0 on success, negative value otherwise
4320  */
4321 static int
4322 flow_create_split_inner(struct rte_eth_dev *dev,
4323                         struct rte_flow *flow,
4324                         struct mlx5_flow **sub_flow,
4325                         const struct rte_flow_attr *attr,
4326                         const struct rte_flow_item items[],
4327                         const struct rte_flow_action actions[],
4328                         struct mlx5_flow_split_info *flow_split_info,
4329                         struct rte_flow_error *error)
4330 {
4331         struct mlx5_flow *dev_flow;
4332
4333         dev_flow = flow_drv_prepare(dev, flow, attr, items, actions,
4334                                     flow_split_info->flow_idx, error);
4335         if (!dev_flow)
4336                 return -rte_errno;
4337         dev_flow->flow = flow;
4338         dev_flow->external = flow_split_info->external;
4339         dev_flow->skip_scale = flow_split_info->skip_scale;
4340         /* Subflow object was created, we must include one in the list. */
4341         SILIST_INSERT(&flow->dev_handles, dev_flow->handle_idx,
4342                       dev_flow->handle, next);
4343         /*
4344          * If dev_flow is as one of the suffix flow, some actions in suffix
4345          * flow may need some user defined item layer flags, and pass the
4346          * Metadate rxq mark flag to suffix flow as well.
4347          */
4348         if (flow_split_info->prefix_layers)
4349                 dev_flow->handle->layers = flow_split_info->prefix_layers;
4350         if (flow_split_info->prefix_mark)
4351                 dev_flow->handle->mark = 1;
4352         if (sub_flow)
4353                 *sub_flow = dev_flow;
4354         return flow_drv_translate(dev, dev_flow, attr, items, actions, error);
4355 }
4356
4357 /**
4358  * Split the meter flow.
4359  *
4360  * As meter flow will split to three sub flow, other than meter
4361  * action, the other actions make sense to only meter accepts
4362  * the packet. If it need to be dropped, no other additional
4363  * actions should be take.
4364  *
4365  * One kind of special action which decapsulates the L3 tunnel
4366  * header will be in the prefix sub flow, as not to take the
4367  * L3 tunnel header into account.
4368  *
4369  * @param dev
4370  *   Pointer to Ethernet device.
4371  * @param[in] items
4372  *   Pattern specification (list terminated by the END pattern item).
4373  * @param[out] sfx_items
4374  *   Suffix flow match items (list terminated by the END pattern item).
4375  * @param[in] actions
4376  *   Associated actions (list terminated by the END action).
4377  * @param[out] actions_sfx
4378  *   Suffix flow actions.
4379  * @param[out] actions_pre
4380  *   Prefix flow actions.
4381  * @param[out] pattern_sfx
4382  *   The pattern items for the suffix flow.
4383  * @param[out] tag_sfx
4384  *   Pointer to suffix flow tag.
4385  *
4386  * @return
4387  *   0 on success.
4388  */
4389 static int
4390 flow_meter_split_prep(struct rte_eth_dev *dev,
4391                  const struct rte_flow_item items[],
4392                  struct rte_flow_item sfx_items[],
4393                  const struct rte_flow_action actions[],
4394                  struct rte_flow_action actions_sfx[],
4395                  struct rte_flow_action actions_pre[])
4396 {
4397         struct mlx5_priv *priv = dev->data->dev_private;
4398         struct rte_flow_action *tag_action = NULL;
4399         struct rte_flow_item *tag_item;
4400         struct mlx5_rte_flow_action_set_tag *set_tag;
4401         struct rte_flow_error error;
4402         const struct rte_flow_action_raw_encap *raw_encap;
4403         const struct rte_flow_action_raw_decap *raw_decap;
4404         struct mlx5_rte_flow_item_tag *tag_spec;
4405         struct mlx5_rte_flow_item_tag *tag_mask;
4406         uint32_t tag_id = 0;
4407         bool copy_vlan = false;
4408
4409         /* Prepare the actions for prefix and suffix flow. */
4410         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
4411                 struct rte_flow_action **action_cur = NULL;
4412
4413                 switch (actions->type) {
4414                 case RTE_FLOW_ACTION_TYPE_METER:
4415                         /* Add the extra tag action first. */
4416                         tag_action = actions_pre;
4417                         tag_action->type = (enum rte_flow_action_type)
4418                                            MLX5_RTE_FLOW_ACTION_TYPE_TAG;
4419                         actions_pre++;
4420                         action_cur = &actions_pre;
4421                         break;
4422                 case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP:
4423                 case RTE_FLOW_ACTION_TYPE_NVGRE_DECAP:
4424                         action_cur = &actions_pre;
4425                         break;
4426                 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
4427                         raw_encap = actions->conf;
4428                         if (raw_encap->size < MLX5_ENCAPSULATION_DECISION_SIZE)
4429                                 action_cur = &actions_pre;
4430                         break;
4431                 case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
4432                         raw_decap = actions->conf;
4433                         if (raw_decap->size > MLX5_ENCAPSULATION_DECISION_SIZE)
4434                                 action_cur = &actions_pre;
4435                         break;
4436                 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
4437                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
4438                         copy_vlan = true;
4439                         break;
4440                 default:
4441                         break;
4442                 }
4443                 if (!action_cur)
4444                         action_cur = &actions_sfx;
4445                 memcpy(*action_cur, actions, sizeof(struct rte_flow_action));
4446                 (*action_cur)++;
4447         }
4448         /* Add end action to the actions. */
4449         actions_sfx->type = RTE_FLOW_ACTION_TYPE_END;
4450         actions_pre->type = RTE_FLOW_ACTION_TYPE_END;
4451         actions_pre++;
4452         /* Set the tag. */
4453         set_tag = (void *)actions_pre;
4454         set_tag->id = mlx5_flow_get_reg_id(dev, MLX5_MTR_SFX, 0, &error);
4455         mlx5_ipool_malloc(priv->sh->ipool[MLX5_IPOOL_RSS_EXPANTION_FLOW_ID],
4456                           &tag_id);
4457         if (tag_id >= (1 << (sizeof(tag_id) * 8 - MLX5_MTR_COLOR_BITS))) {
4458                 DRV_LOG(ERR, "Port %u meter flow id exceed max limit.",
4459                         dev->data->port_id);
4460                 mlx5_ipool_free(priv->sh->ipool
4461                                 [MLX5_IPOOL_RSS_EXPANTION_FLOW_ID], tag_id);
4462                 return 0;
4463         } else if (!tag_id) {
4464                 return 0;
4465         }
4466         set_tag->data = tag_id << MLX5_MTR_COLOR_BITS;
4467         assert(tag_action);
4468         tag_action->conf = set_tag;
4469         /* Prepare the suffix subflow items. */
4470         tag_item = sfx_items++;
4471         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
4472                 int item_type = items->type;
4473
4474                 switch (item_type) {
4475                 case RTE_FLOW_ITEM_TYPE_PORT_ID:
4476                         memcpy(sfx_items, items, sizeof(*sfx_items));
4477                         sfx_items++;
4478                         break;
4479                 case RTE_FLOW_ITEM_TYPE_VLAN:
4480                         if (copy_vlan) {
4481                                 memcpy(sfx_items, items, sizeof(*sfx_items));
4482                                 /*
4483                                  * Convert to internal match item, it is used
4484                                  * for vlan push and set vid.
4485                                  */
4486                                 sfx_items->type = (enum rte_flow_item_type)
4487                                                   MLX5_RTE_FLOW_ITEM_TYPE_VLAN;
4488                                 sfx_items++;
4489                         }
4490                         break;
4491                 default:
4492                         break;
4493                 }
4494         }
4495         sfx_items->type = RTE_FLOW_ITEM_TYPE_END;
4496         sfx_items++;
4497         tag_spec = (struct mlx5_rte_flow_item_tag *)sfx_items;
4498         tag_spec->data = tag_id << MLX5_MTR_COLOR_BITS;
4499         tag_spec->id = mlx5_flow_get_reg_id(dev, MLX5_MTR_SFX, 0, &error);
4500         tag_mask = tag_spec + 1;
4501         tag_mask->data = 0xffffff00;
4502         tag_item->type = (enum rte_flow_item_type)
4503                          MLX5_RTE_FLOW_ITEM_TYPE_TAG;
4504         tag_item->spec = tag_spec;
4505         tag_item->last = NULL;
4506         tag_item->mask = tag_mask;
4507         return tag_id;
4508 }
4509
4510 /**
4511  * Split action list having QUEUE/RSS for metadata register copy.
4512  *
4513  * Once Q/RSS action is detected in user's action list, the flow action
4514  * should be split in order to copy metadata registers, which will happen in
4515  * RX_CP_TBL like,
4516  *   - CQE->flow_tag := reg_c[1] (MARK)
4517  *   - CQE->flow_table_metadata (reg_b) := reg_c[0] (META)
4518  * The Q/RSS action will be performed on RX_ACT_TBL after passing by RX_CP_TBL.
4519  * This is because the last action of each flow must be a terminal action
4520  * (QUEUE, RSS or DROP).
4521  *
4522  * Flow ID must be allocated to identify actions in the RX_ACT_TBL and it is
4523  * stored and kept in the mlx5_flow structure per each sub_flow.
4524  *
4525  * The Q/RSS action is replaced with,
4526  *   - SET_TAG, setting the allocated flow ID to reg_c[2].
4527  * And the following JUMP action is added at the end,
4528  *   - JUMP, to RX_CP_TBL.
4529  *
4530  * A flow to perform remained Q/RSS action will be created in RX_ACT_TBL by
4531  * flow_create_split_metadata() routine. The flow will look like,
4532  *   - If flow ID matches (reg_c[2]), perform Q/RSS.
4533  *
4534  * @param dev
4535  *   Pointer to Ethernet device.
4536  * @param[out] split_actions
4537  *   Pointer to store split actions to jump to CP_TBL.
4538  * @param[in] actions
4539  *   Pointer to the list of original flow actions.
4540  * @param[in] qrss
4541  *   Pointer to the Q/RSS action.
4542  * @param[in] actions_n
4543  *   Number of original actions.
4544  * @param[out] error
4545  *   Perform verbose error reporting if not NULL.
4546  *
4547  * @return
4548  *   non-zero unique flow_id on success, otherwise 0 and
4549  *   error/rte_error are set.
4550  */
4551 static uint32_t
4552 flow_mreg_split_qrss_prep(struct rte_eth_dev *dev,
4553                           struct rte_flow_action *split_actions,
4554                           const struct rte_flow_action *actions,
4555                           const struct rte_flow_action *qrss,
4556                           int actions_n, struct rte_flow_error *error)
4557 {
4558         struct mlx5_priv *priv = dev->data->dev_private;
4559         struct mlx5_rte_flow_action_set_tag *set_tag;
4560         struct rte_flow_action_jump *jump;
4561         const int qrss_idx = qrss - actions;
4562         uint32_t flow_id = 0;
4563         int ret = 0;
4564
4565         /*
4566          * Given actions will be split
4567          * - Replace QUEUE/RSS action with SET_TAG to set flow ID.
4568          * - Add jump to mreg CP_TBL.
4569          * As a result, there will be one more action.
4570          */
4571         ++actions_n;
4572         memcpy(split_actions, actions, sizeof(*split_actions) * actions_n);
4573         set_tag = (void *)(split_actions + actions_n);
4574         /*
4575          * If tag action is not set to void(it means we are not the meter
4576          * suffix flow), add the tag action. Since meter suffix flow already
4577          * has the tag added.
4578          */
4579         if (split_actions[qrss_idx].type != RTE_FLOW_ACTION_TYPE_VOID) {
4580                 /*
4581                  * Allocate the new subflow ID. This one is unique within
4582                  * device and not shared with representors. Otherwise,
4583                  * we would have to resolve multi-thread access synch
4584                  * issue. Each flow on the shared device is appended
4585                  * with source vport identifier, so the resulting
4586                  * flows will be unique in the shared (by master and
4587                  * representors) domain even if they have coinciding
4588                  * IDs.
4589                  */
4590                 mlx5_ipool_malloc(priv->sh->ipool
4591                                   [MLX5_IPOOL_RSS_EXPANTION_FLOW_ID], &flow_id);
4592                 if (!flow_id)
4593                         return rte_flow_error_set(error, ENOMEM,
4594                                                   RTE_FLOW_ERROR_TYPE_ACTION,
4595                                                   NULL, "can't allocate id "
4596                                                   "for split Q/RSS subflow");
4597                 /* Internal SET_TAG action to set flow ID. */
4598                 *set_tag = (struct mlx5_rte_flow_action_set_tag){
4599                         .data = flow_id,
4600                 };
4601                 ret = mlx5_flow_get_reg_id(dev, MLX5_COPY_MARK, 0, error);
4602                 if (ret < 0)
4603                         return ret;
4604                 set_tag->id = ret;
4605                 /* Construct new actions array. */
4606                 /* Replace QUEUE/RSS action. */
4607                 split_actions[qrss_idx] = (struct rte_flow_action){
4608                         .type = (enum rte_flow_action_type)
4609                                 MLX5_RTE_FLOW_ACTION_TYPE_TAG,
4610                         .conf = set_tag,
4611                 };
4612         }
4613         /* JUMP action to jump to mreg copy table (CP_TBL). */
4614         jump = (void *)(set_tag + 1);
4615         *jump = (struct rte_flow_action_jump){
4616                 .group = MLX5_FLOW_MREG_CP_TABLE_GROUP,
4617         };
4618         split_actions[actions_n - 2] = (struct rte_flow_action){
4619                 .type = RTE_FLOW_ACTION_TYPE_JUMP,
4620                 .conf = jump,
4621         };
4622         split_actions[actions_n - 1] = (struct rte_flow_action){
4623                 .type = RTE_FLOW_ACTION_TYPE_END,
4624         };
4625         return flow_id;
4626 }
4627
4628 /**
4629  * Extend the given action list for Tx metadata copy.
4630  *
4631  * Copy the given action list to the ext_actions and add flow metadata register
4632  * copy action in order to copy reg_a set by WQE to reg_c[0].
4633  *
4634  * @param[out] ext_actions
4635  *   Pointer to the extended action list.
4636  * @param[in] actions
4637  *   Pointer to the list of actions.
4638  * @param[in] actions_n
4639  *   Number of actions in the list.
4640  * @param[out] error
4641  *   Perform verbose error reporting if not NULL.
4642  * @param[in] encap_idx
4643  *   The encap action inndex.
4644  *
4645  * @return
4646  *   0 on success, negative value otherwise
4647  */
4648 static int
4649 flow_mreg_tx_copy_prep(struct rte_eth_dev *dev,
4650                        struct rte_flow_action *ext_actions,
4651                        const struct rte_flow_action *actions,
4652                        int actions_n, struct rte_flow_error *error,
4653                        int encap_idx)
4654 {
4655         struct mlx5_flow_action_copy_mreg *cp_mreg =
4656                 (struct mlx5_flow_action_copy_mreg *)
4657                         (ext_actions + actions_n + 1);
4658         int ret;
4659
4660         ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_RX, 0, error);
4661         if (ret < 0)
4662                 return ret;
4663         cp_mreg->dst = ret;
4664         ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_TX, 0, error);
4665         if (ret < 0)
4666                 return ret;
4667         cp_mreg->src = ret;
4668         if (encap_idx != 0)
4669                 memcpy(ext_actions, actions, sizeof(*ext_actions) * encap_idx);
4670         if (encap_idx == actions_n - 1) {
4671                 ext_actions[actions_n - 1] = (struct rte_flow_action){
4672                         .type = (enum rte_flow_action_type)
4673                                 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
4674                         .conf = cp_mreg,
4675                 };
4676                 ext_actions[actions_n] = (struct rte_flow_action){
4677                         .type = RTE_FLOW_ACTION_TYPE_END,
4678                 };
4679         } else {
4680                 ext_actions[encap_idx] = (struct rte_flow_action){
4681                         .type = (enum rte_flow_action_type)
4682                                 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
4683                         .conf = cp_mreg,
4684                 };
4685                 memcpy(ext_actions + encap_idx + 1, actions + encap_idx,
4686                                 sizeof(*ext_actions) * (actions_n - encap_idx));
4687         }
4688         return 0;
4689 }
4690
4691 /**
4692  * Check the match action from the action list.
4693  *
4694  * @param[in] actions
4695  *   Pointer to the list of actions.
4696  * @param[in] attr
4697  *   Flow rule attributes.
4698  * @param[in] action
4699  *   The action to be check if exist.
4700  * @param[out] match_action_pos
4701  *   Pointer to the position of the matched action if exists, otherwise is -1.
4702  * @param[out] qrss_action_pos
4703  *   Pointer to the position of the Queue/RSS action if exists, otherwise is -1.
4704  *
4705  * @return
4706  *   > 0 the total number of actions.
4707  *   0 if not found match action in action list.
4708  */
4709 static int
4710 flow_check_match_action(const struct rte_flow_action actions[],
4711                         const struct rte_flow_attr *attr,
4712                         enum rte_flow_action_type action,
4713                         int *match_action_pos, int *qrss_action_pos)
4714 {
4715         const struct rte_flow_action_sample *sample;
4716         int actions_n = 0;
4717         int jump_flag = 0;
4718         uint32_t ratio = 0;
4719         int sub_type = 0;
4720         int flag = 0;
4721
4722         *match_action_pos = -1;
4723         *qrss_action_pos = -1;
4724         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
4725                 if (actions->type == action) {
4726                         flag = 1;
4727                         *match_action_pos = actions_n;
4728                 }
4729                 if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE ||
4730                     actions->type == RTE_FLOW_ACTION_TYPE_RSS)
4731                         *qrss_action_pos = actions_n;
4732                 if (actions->type == RTE_FLOW_ACTION_TYPE_JUMP)
4733                         jump_flag = 1;
4734                 if (actions->type == RTE_FLOW_ACTION_TYPE_SAMPLE) {
4735                         sample = actions->conf;
4736                         ratio = sample->ratio;
4737                         sub_type = ((const struct rte_flow_action *)
4738                                         (sample->actions))->type;
4739                 }
4740                 actions_n++;
4741         }
4742         if (flag && action == RTE_FLOW_ACTION_TYPE_SAMPLE && attr->transfer) {
4743                 if (ratio == 1) {
4744                         /* JUMP Action not support for Mirroring;
4745                          * Mirroring support multi-destination;
4746                          */
4747                         if (!jump_flag && sub_type != RTE_FLOW_ACTION_TYPE_END)
4748                                 flag = 0;
4749                 }
4750         }
4751         /* Count RTE_FLOW_ACTION_TYPE_END. */
4752         return flag ? actions_n + 1 : 0;
4753 }
4754
4755 #define SAMPLE_SUFFIX_ITEM 2
4756
4757 /**
4758  * Split the sample flow.
4759  *
4760  * As sample flow will split to two sub flow, sample flow with
4761  * sample action, the other actions will move to new suffix flow.
4762  *
4763  * Also add unique tag id with tag action in the sample flow,
4764  * the same tag id will be as match in the suffix flow.
4765  *
4766  * @param dev
4767  *   Pointer to Ethernet device.
4768  * @param[in] fdb_tx
4769  *   FDB egress flow flag.
4770  * @param[out] sfx_items
4771  *   Suffix flow match items (list terminated by the END pattern item).
4772  * @param[in] actions
4773  *   Associated actions (list terminated by the END action).
4774  * @param[out] actions_sfx
4775  *   Suffix flow actions.
4776  * @param[out] actions_pre
4777  *   Prefix flow actions.
4778  * @param[in] actions_n
4779  *  The total number of actions.
4780  * @param[in] sample_action_pos
4781  *   The sample action position.
4782  * @param[in] qrss_action_pos
4783  *   The Queue/RSS action position.
4784  * @param[out] error
4785  *   Perform verbose error reporting if not NULL.
4786  *
4787  * @return
4788  *   0 on success, or unique flow_id, a negative errno value
4789  *   otherwise and rte_errno is set.
4790  */
4791 static int
4792 flow_sample_split_prep(struct rte_eth_dev *dev,
4793                        uint32_t fdb_tx,
4794                        struct rte_flow_item sfx_items[],
4795                        const struct rte_flow_action actions[],
4796                        struct rte_flow_action actions_sfx[],
4797                        struct rte_flow_action actions_pre[],
4798                        int actions_n,
4799                        int sample_action_pos,
4800                        int qrss_action_pos,
4801                        struct rte_flow_error *error)
4802 {
4803         struct mlx5_priv *priv = dev->data->dev_private;
4804         struct mlx5_rte_flow_action_set_tag *set_tag;
4805         struct mlx5_rte_flow_item_tag *tag_spec;
4806         struct mlx5_rte_flow_item_tag *tag_mask;
4807         uint32_t tag_id = 0;
4808         int index;
4809         int ret;
4810
4811         if (sample_action_pos < 0)
4812                 return rte_flow_error_set(error, EINVAL,
4813                                           RTE_FLOW_ERROR_TYPE_ACTION,
4814                                           NULL, "invalid position of sample "
4815                                           "action in list");
4816         if (!fdb_tx) {
4817                 /* Prepare the prefix tag action. */
4818                 set_tag = (void *)(actions_pre + actions_n + 1);
4819                 ret = mlx5_flow_get_reg_id(dev, MLX5_APP_TAG, 0, error);
4820                 if (ret < 0)
4821                         return ret;
4822                 set_tag->id = ret;
4823                 mlx5_ipool_malloc(priv->sh->ipool
4824                                   [MLX5_IPOOL_RSS_EXPANTION_FLOW_ID], &tag_id);
4825                 set_tag->data = tag_id;
4826                 /* Prepare the suffix subflow items. */
4827                 tag_spec = (void *)(sfx_items + SAMPLE_SUFFIX_ITEM);
4828                 tag_spec->data = tag_id;
4829                 tag_spec->id = set_tag->id;
4830                 tag_mask = tag_spec + 1;
4831                 tag_mask->data = UINT32_MAX;
4832                 sfx_items[0] = (struct rte_flow_item){
4833                         .type = (enum rte_flow_item_type)
4834                                 MLX5_RTE_FLOW_ITEM_TYPE_TAG,
4835                         .spec = tag_spec,
4836                         .last = NULL,
4837                         .mask = tag_mask,
4838                 };
4839                 sfx_items[1] = (struct rte_flow_item){
4840                         .type = (enum rte_flow_item_type)
4841                                 RTE_FLOW_ITEM_TYPE_END,
4842                 };
4843         }
4844         /* Prepare the actions for prefix and suffix flow. */
4845         if (qrss_action_pos >= 0 && qrss_action_pos < sample_action_pos) {
4846                 index = qrss_action_pos;
4847                 /* Put the preceding the Queue/RSS action into prefix flow. */
4848                 if (index != 0)
4849                         memcpy(actions_pre, actions,
4850                                sizeof(struct rte_flow_action) * index);
4851                 /* Put others preceding the sample action into prefix flow. */
4852                 if (sample_action_pos > index + 1)
4853                         memcpy(actions_pre + index, actions + index + 1,
4854                                sizeof(struct rte_flow_action) *
4855                                (sample_action_pos - index - 1));
4856                 index = sample_action_pos - 1;
4857                 /* Put Queue/RSS action into Suffix flow. */
4858                 memcpy(actions_sfx, actions + qrss_action_pos,
4859                        sizeof(struct rte_flow_action));
4860                 actions_sfx++;
4861         } else {
4862                 index = sample_action_pos;
4863                 if (index != 0)
4864                         memcpy(actions_pre, actions,
4865                                sizeof(struct rte_flow_action) * index);
4866         }
4867         /* Add the extra tag action for NIC-RX and E-Switch ingress. */
4868         if (!fdb_tx) {
4869                 actions_pre[index++] =
4870                         (struct rte_flow_action){
4871                         .type = (enum rte_flow_action_type)
4872                                 MLX5_RTE_FLOW_ACTION_TYPE_TAG,
4873                         .conf = set_tag,
4874                 };
4875         }
4876         memcpy(actions_pre + index, actions + sample_action_pos,
4877                sizeof(struct rte_flow_action));
4878         index += 1;
4879         actions_pre[index] = (struct rte_flow_action){
4880                 .type = (enum rte_flow_action_type)
4881                         RTE_FLOW_ACTION_TYPE_END,
4882         };
4883         /* Put the actions after sample into Suffix flow. */
4884         memcpy(actions_sfx, actions + sample_action_pos + 1,
4885                sizeof(struct rte_flow_action) *
4886                (actions_n - sample_action_pos - 1));
4887         return tag_id;
4888 }
4889
4890 /**
4891  * The splitting for metadata feature.
4892  *
4893  * - Q/RSS action on NIC Rx should be split in order to pass by
4894  *   the mreg copy table (RX_CP_TBL) and then it jumps to the
4895  *   action table (RX_ACT_TBL) which has the split Q/RSS action.
4896  *
4897  * - All the actions on NIC Tx should have a mreg copy action to
4898  *   copy reg_a from WQE to reg_c[0].
4899  *
4900  * @param dev
4901  *   Pointer to Ethernet device.
4902  * @param[in] flow
4903  *   Parent flow structure pointer.
4904  * @param[in] attr
4905  *   Flow rule attributes.
4906  * @param[in] items
4907  *   Pattern specification (list terminated by the END pattern item).
4908  * @param[in] actions
4909  *   Associated actions (list terminated by the END action).
4910  * @param[in] flow_split_info
4911  *   Pointer to flow split info structure.
4912  * @param[out] error
4913  *   Perform verbose error reporting if not NULL.
4914  * @return
4915  *   0 on success, negative value otherwise
4916  */
4917 static int
4918 flow_create_split_metadata(struct rte_eth_dev *dev,
4919                            struct rte_flow *flow,
4920                            const struct rte_flow_attr *attr,
4921                            const struct rte_flow_item items[],
4922                            const struct rte_flow_action actions[],
4923                            struct mlx5_flow_split_info *flow_split_info,
4924                            struct rte_flow_error *error)
4925 {
4926         struct mlx5_priv *priv = dev->data->dev_private;
4927         struct mlx5_dev_config *config = &priv->config;
4928         const struct rte_flow_action *qrss = NULL;
4929         struct rte_flow_action *ext_actions = NULL;
4930         struct mlx5_flow *dev_flow = NULL;
4931         uint32_t qrss_id = 0;
4932         int mtr_sfx = 0;
4933         size_t act_size;
4934         int actions_n;
4935         int encap_idx;
4936         int ret;
4937
4938         /* Check whether extensive metadata feature is engaged. */
4939         if (!config->dv_flow_en ||
4940             config->dv_xmeta_en == MLX5_XMETA_MODE_LEGACY ||
4941             !mlx5_flow_ext_mreg_supported(dev))
4942                 return flow_create_split_inner(dev, flow, NULL, attr, items,
4943                                                actions, flow_split_info, error);
4944         actions_n = flow_parse_metadata_split_actions_info(actions, &qrss,
4945                                                            &encap_idx);
4946         if (qrss) {
4947                 /* Exclude hairpin flows from splitting. */
4948                 if (qrss->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
4949                         const struct rte_flow_action_queue *queue;
4950
4951                         queue = qrss->conf;
4952                         if (mlx5_rxq_get_type(dev, queue->index) ==
4953                             MLX5_RXQ_TYPE_HAIRPIN)
4954                                 qrss = NULL;
4955                 } else if (qrss->type == RTE_FLOW_ACTION_TYPE_RSS) {
4956                         const struct rte_flow_action_rss *rss;
4957
4958                         rss = qrss->conf;
4959                         if (mlx5_rxq_get_type(dev, rss->queue[0]) ==
4960                             MLX5_RXQ_TYPE_HAIRPIN)
4961                                 qrss = NULL;
4962                 }
4963         }
4964         if (qrss) {
4965                 /* Check if it is in meter suffix table. */
4966                 mtr_sfx = attr->group == (attr->transfer ?
4967                           (MLX5_FLOW_TABLE_LEVEL_SUFFIX - 1) :
4968                           MLX5_FLOW_TABLE_LEVEL_SUFFIX);
4969                 /*
4970                  * Q/RSS action on NIC Rx should be split in order to pass by
4971                  * the mreg copy table (RX_CP_TBL) and then it jumps to the
4972                  * action table (RX_ACT_TBL) which has the split Q/RSS action.
4973                  */
4974                 act_size = sizeof(struct rte_flow_action) * (actions_n + 1) +
4975                            sizeof(struct rte_flow_action_set_tag) +
4976                            sizeof(struct rte_flow_action_jump);
4977                 ext_actions = mlx5_malloc(MLX5_MEM_ZERO, act_size, 0,
4978                                           SOCKET_ID_ANY);
4979                 if (!ext_actions)
4980                         return rte_flow_error_set(error, ENOMEM,
4981                                                   RTE_FLOW_ERROR_TYPE_ACTION,
4982                                                   NULL, "no memory to split "
4983                                                   "metadata flow");
4984                 /*
4985                  * If we are the suffix flow of meter, tag already exist.
4986                  * Set the tag action to void.
4987                  */
4988                 if (mtr_sfx)
4989                         ext_actions[qrss - actions].type =
4990                                                 RTE_FLOW_ACTION_TYPE_VOID;
4991                 else
4992                         ext_actions[qrss - actions].type =
4993                                                 (enum rte_flow_action_type)
4994                                                 MLX5_RTE_FLOW_ACTION_TYPE_TAG;
4995                 /*
4996                  * Create the new actions list with removed Q/RSS action
4997                  * and appended set tag and jump to register copy table
4998                  * (RX_CP_TBL). We should preallocate unique tag ID here
4999                  * in advance, because it is needed for set tag action.
5000                  */
5001                 qrss_id = flow_mreg_split_qrss_prep(dev, ext_actions, actions,
5002                                                     qrss, actions_n, error);
5003                 if (!mtr_sfx && !qrss_id) {
5004                         ret = -rte_errno;
5005                         goto exit;
5006                 }
5007         } else if (attr->egress && !attr->transfer) {
5008                 /*
5009                  * All the actions on NIC Tx should have a metadata register
5010                  * copy action to copy reg_a from WQE to reg_c[meta]
5011                  */
5012                 act_size = sizeof(struct rte_flow_action) * (actions_n + 1) +
5013                            sizeof(struct mlx5_flow_action_copy_mreg);
5014                 ext_actions = mlx5_malloc(MLX5_MEM_ZERO, act_size, 0,
5015                                           SOCKET_ID_ANY);
5016                 if (!ext_actions)
5017                         return rte_flow_error_set(error, ENOMEM,
5018                                                   RTE_FLOW_ERROR_TYPE_ACTION,
5019                                                   NULL, "no memory to split "
5020                                                   "metadata flow");
5021                 /* Create the action list appended with copy register. */
5022                 ret = flow_mreg_tx_copy_prep(dev, ext_actions, actions,
5023                                              actions_n, error, encap_idx);
5024                 if (ret < 0)
5025                         goto exit;
5026         }
5027         /* Add the unmodified original or prefix subflow. */
5028         ret = flow_create_split_inner(dev, flow, &dev_flow, attr,
5029                                       items, ext_actions ? ext_actions :
5030                                       actions, flow_split_info, error);
5031         if (ret < 0)
5032                 goto exit;
5033         MLX5_ASSERT(dev_flow);
5034         if (qrss) {
5035                 const struct rte_flow_attr q_attr = {
5036                         .group = MLX5_FLOW_MREG_ACT_TABLE_GROUP,
5037                         .ingress = 1,
5038                 };
5039                 /* Internal PMD action to set register. */
5040                 struct mlx5_rte_flow_item_tag q_tag_spec = {
5041                         .data = qrss_id,
5042                         .id = REG_NON,
5043                 };
5044                 struct rte_flow_item q_items[] = {
5045                         {
5046                                 .type = (enum rte_flow_item_type)
5047                                         MLX5_RTE_FLOW_ITEM_TYPE_TAG,
5048                                 .spec = &q_tag_spec,
5049                                 .last = NULL,
5050                                 .mask = NULL,
5051                         },
5052                         {
5053                                 .type = RTE_FLOW_ITEM_TYPE_END,
5054                         },
5055                 };
5056                 struct rte_flow_action q_actions[] = {
5057                         {
5058                                 .type = qrss->type,
5059                                 .conf = qrss->conf,
5060                         },
5061                         {
5062                                 .type = RTE_FLOW_ACTION_TYPE_END,
5063                         },
5064                 };
5065                 uint64_t layers = flow_get_prefix_layer_flags(dev_flow);
5066
5067                 /*
5068                  * Configure the tag item only if there is no meter subflow.
5069                  * Since tag is already marked in the meter suffix subflow
5070                  * we can just use the meter suffix items as is.
5071                  */
5072                 if (qrss_id) {
5073                         /* Not meter subflow. */
5074                         MLX5_ASSERT(!mtr_sfx);
5075                         /*
5076                          * Put unique id in prefix flow due to it is destroyed
5077                          * after suffix flow and id will be freed after there
5078                          * is no actual flows with this id and identifier
5079                          * reallocation becomes possible (for example, for
5080                          * other flows in other threads).
5081                          */
5082                         dev_flow->handle->split_flow_id = qrss_id;
5083                         ret = mlx5_flow_get_reg_id(dev, MLX5_COPY_MARK, 0,
5084                                                    error);
5085                         if (ret < 0)
5086                                 goto exit;
5087                         q_tag_spec.id = ret;
5088                 }
5089                 dev_flow = NULL;
5090                 /* Add suffix subflow to execute Q/RSS. */
5091                 flow_split_info->prefix_layers = layers;
5092                 flow_split_info->prefix_mark = 0;
5093                 ret = flow_create_split_inner(dev, flow, &dev_flow,
5094                                               &q_attr, mtr_sfx ? items :
5095                                               q_items, q_actions,
5096                                               flow_split_info, error);
5097                 if (ret < 0)
5098                         goto exit;
5099                 /* qrss ID should be freed if failed. */
5100                 qrss_id = 0;
5101                 MLX5_ASSERT(dev_flow);
5102         }
5103
5104 exit:
5105         /*
5106          * We do not destroy the partially created sub_flows in case of error.
5107          * These ones are included into parent flow list and will be destroyed
5108          * by flow_drv_destroy.
5109          */
5110         mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_RSS_EXPANTION_FLOW_ID],
5111                         qrss_id);
5112         mlx5_free(ext_actions);
5113         return ret;
5114 }
5115
5116 /**
5117  * The splitting for meter feature.
5118  *
5119  * - The meter flow will be split to two flows as prefix and
5120  *   suffix flow. The packets make sense only it pass the prefix
5121  *   meter action.
5122  *
5123  * - Reg_C_5 is used for the packet to match betweend prefix and
5124  *   suffix flow.
5125  *
5126  * @param dev
5127  *   Pointer to Ethernet device.
5128  * @param[in] flow
5129  *   Parent flow structure pointer.
5130  * @param[in] attr
5131  *   Flow rule attributes.
5132  * @param[in] items
5133  *   Pattern specification (list terminated by the END pattern item).
5134  * @param[in] actions
5135  *   Associated actions (list terminated by the END action).
5136  * @param[in] flow_split_info
5137  *   Pointer to flow split info structure.
5138  * @param[out] error
5139  *   Perform verbose error reporting if not NULL.
5140  * @return
5141  *   0 on success, negative value otherwise
5142  */
5143 static int
5144 flow_create_split_meter(struct rte_eth_dev *dev,
5145                         struct rte_flow *flow,
5146                         const struct rte_flow_attr *attr,
5147                         const struct rte_flow_item items[],
5148                         const struct rte_flow_action actions[],
5149                         struct mlx5_flow_split_info *flow_split_info,
5150                         struct rte_flow_error *error)
5151 {
5152         struct mlx5_priv *priv = dev->data->dev_private;
5153         struct rte_flow_action *sfx_actions = NULL;
5154         struct rte_flow_action *pre_actions = NULL;
5155         struct rte_flow_item *sfx_items = NULL;
5156         struct mlx5_flow *dev_flow = NULL;
5157         struct rte_flow_attr sfx_attr = *attr;
5158         uint32_t mtr = 0;
5159         uint32_t mtr_tag_id = 0;
5160         size_t act_size;
5161         size_t item_size;
5162         int actions_n = 0;
5163         int ret;
5164
5165         if (priv->mtr_en)
5166                 actions_n = flow_check_meter_action(actions, &mtr);
5167         if (mtr) {
5168                 /* The five prefix actions: meter, decap, encap, tag, end. */
5169                 act_size = sizeof(struct rte_flow_action) * (actions_n + 5) +
5170                            sizeof(struct mlx5_rte_flow_action_set_tag);
5171                 /* tag, vlan, port id, end. */
5172 #define METER_SUFFIX_ITEM 4
5173                 item_size = sizeof(struct rte_flow_item) * METER_SUFFIX_ITEM +
5174                             sizeof(struct mlx5_rte_flow_item_tag) * 2;
5175                 sfx_actions = mlx5_malloc(MLX5_MEM_ZERO, (act_size + item_size),
5176                                           0, SOCKET_ID_ANY);
5177                 if (!sfx_actions)
5178                         return rte_flow_error_set(error, ENOMEM,
5179                                                   RTE_FLOW_ERROR_TYPE_ACTION,
5180                                                   NULL, "no memory to split "
5181                                                   "meter flow");
5182                 sfx_items = (struct rte_flow_item *)((char *)sfx_actions +
5183                              act_size);
5184                 pre_actions = sfx_actions + actions_n;
5185                 mtr_tag_id = flow_meter_split_prep(dev, items, sfx_items,
5186                                                    actions, sfx_actions,
5187                                                    pre_actions);
5188                 if (!mtr_tag_id) {
5189                         ret = -rte_errno;
5190                         goto exit;
5191                 }
5192                 /* Add the prefix subflow. */
5193                 flow_split_info->prefix_mark = 0;
5194                 ret = flow_create_split_inner(dev, flow, &dev_flow,
5195                                               attr, items, pre_actions,
5196                                               flow_split_info, error);
5197                 if (ret) {
5198                         ret = -rte_errno;
5199                         goto exit;
5200                 }
5201                 dev_flow->handle->split_flow_id = mtr_tag_id;
5202                 /* Setting the sfx group atrr. */
5203                 sfx_attr.group = sfx_attr.transfer ?
5204                                 (MLX5_FLOW_TABLE_LEVEL_SUFFIX - 1) :
5205                                  MLX5_FLOW_TABLE_LEVEL_SUFFIX;
5206                 flow_split_info->prefix_layers =
5207                                 flow_get_prefix_layer_flags(dev_flow);
5208                 flow_split_info->prefix_mark = dev_flow->handle->mark;
5209         }
5210         /* Add the prefix subflow. */
5211         ret = flow_create_split_metadata(dev, flow,
5212                                          &sfx_attr, sfx_items ?
5213                                          sfx_items : items,
5214                                          sfx_actions ? sfx_actions : actions,
5215                                          flow_split_info, error);
5216 exit:
5217         if (sfx_actions)
5218                 mlx5_free(sfx_actions);
5219         return ret;
5220 }
5221
5222 /**
5223  * The splitting for sample feature.
5224  *
5225  * Once Sample action is detected in the action list, the flow actions should
5226  * be split into prefix sub flow and suffix sub flow.
5227  *
5228  * The original items remain in the prefix sub flow, all actions preceding the
5229  * sample action and the sample action itself will be copied to the prefix
5230  * sub flow, the actions following the sample action will be copied to the
5231  * suffix sub flow, Queue action always be located in the suffix sub flow.
5232  *
5233  * In order to make the packet from prefix sub flow matches with suffix sub
5234  * flow, an extra tag action be added into prefix sub flow, and the suffix sub
5235  * flow uses tag item with the unique flow id.
5236  *
5237  * @param dev
5238  *   Pointer to Ethernet device.
5239  * @param[in] flow
5240  *   Parent flow structure pointer.
5241  * @param[in] attr
5242  *   Flow rule attributes.
5243  * @param[in] items
5244  *   Pattern specification (list terminated by the END pattern item).
5245  * @param[in] actions
5246  *   Associated actions (list terminated by the END action).
5247  * @param[in] flow_split_info
5248  *   Pointer to flow split info structure.
5249  * @param[out] error
5250  *   Perform verbose error reporting if not NULL.
5251  * @return
5252  *   0 on success, negative value otherwise
5253  */
5254 static int
5255 flow_create_split_sample(struct rte_eth_dev *dev,
5256                          struct rte_flow *flow,
5257                          const struct rte_flow_attr *attr,
5258                          const struct rte_flow_item items[],
5259                          const struct rte_flow_action actions[],
5260                          struct mlx5_flow_split_info *flow_split_info,
5261                          struct rte_flow_error *error)
5262 {
5263         struct mlx5_priv *priv = dev->data->dev_private;
5264         struct rte_flow_action *sfx_actions = NULL;
5265         struct rte_flow_action *pre_actions = NULL;
5266         struct rte_flow_item *sfx_items = NULL;
5267         struct mlx5_flow *dev_flow = NULL;
5268         struct rte_flow_attr sfx_attr = *attr;
5269 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
5270         struct mlx5_flow_dv_sample_resource *sample_res;
5271         struct mlx5_flow_tbl_data_entry *sfx_tbl_data;
5272         struct mlx5_flow_tbl_resource *sfx_tbl;
5273         union mlx5_flow_tbl_key sfx_table_key;
5274 #endif
5275         size_t act_size;
5276         size_t item_size;
5277         uint32_t fdb_tx = 0;
5278         int32_t tag_id = 0;
5279         int actions_n = 0;
5280         int sample_action_pos;
5281         int qrss_action_pos;
5282         int ret = 0;
5283
5284         if (priv->sampler_en)
5285                 actions_n = flow_check_match_action(actions, attr,
5286                                         RTE_FLOW_ACTION_TYPE_SAMPLE,
5287                                         &sample_action_pos, &qrss_action_pos);
5288         if (actions_n) {
5289                 /* The prefix actions must includes sample, tag, end. */
5290                 act_size = sizeof(struct rte_flow_action) * (actions_n * 2 + 1)
5291                            + sizeof(struct mlx5_rte_flow_action_set_tag);
5292                 item_size = sizeof(struct rte_flow_item) * SAMPLE_SUFFIX_ITEM +
5293                             sizeof(struct mlx5_rte_flow_item_tag) * 2;
5294                 sfx_actions = mlx5_malloc(MLX5_MEM_ZERO, (act_size +
5295                                           item_size), 0, SOCKET_ID_ANY);
5296                 if (!sfx_actions)
5297                         return rte_flow_error_set(error, ENOMEM,
5298                                                   RTE_FLOW_ERROR_TYPE_ACTION,
5299                                                   NULL, "no memory to split "
5300                                                   "sample flow");
5301                 /* The representor_id is -1 for uplink. */
5302                 fdb_tx = (attr->transfer && priv->representor_id != -1);
5303                 if (!fdb_tx)
5304                         sfx_items = (struct rte_flow_item *)((char *)sfx_actions
5305                                         + act_size);
5306                 pre_actions = sfx_actions + actions_n;
5307                 tag_id = flow_sample_split_prep(dev, fdb_tx, sfx_items,
5308                                                 actions, sfx_actions,
5309                                                 pre_actions, actions_n,
5310                                                 sample_action_pos,
5311                                                 qrss_action_pos, error);
5312                 if (tag_id < 0 || (!fdb_tx && !tag_id)) {
5313                         ret = -rte_errno;
5314                         goto exit;
5315                 }
5316                 /* Add the prefix subflow. */
5317                 ret = flow_create_split_inner(dev, flow, &dev_flow, attr,
5318                                               items, pre_actions,
5319                                               flow_split_info, error);
5320                 if (ret) {
5321                         ret = -rte_errno;
5322                         goto exit;
5323                 }
5324                 dev_flow->handle->split_flow_id = tag_id;
5325 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
5326                 /* Set the sfx group attr. */
5327                 sample_res = (struct mlx5_flow_dv_sample_resource *)
5328                                         dev_flow->dv.sample_res;
5329                 sfx_tbl = (struct mlx5_flow_tbl_resource *)
5330                                         sample_res->normal_path_tbl;
5331                 sfx_tbl_data = container_of(sfx_tbl,
5332                                         struct mlx5_flow_tbl_data_entry, tbl);
5333                 sfx_table_key.v64 = sfx_tbl_data->entry.key;
5334                 sfx_attr.group = sfx_attr.transfer ?
5335                                         (sfx_table_key.table_id - 1) :
5336                                          sfx_table_key.table_id;
5337                 flow_split_info->prefix_layers =
5338                                 flow_get_prefix_layer_flags(dev_flow);
5339                 flow_split_info->prefix_mark = dev_flow->handle->mark;
5340                 /* Suffix group level already be scaled with factor, set
5341                  * skip_scale to 1 to avoid scale again in translation.
5342                  */
5343                 flow_split_info->skip_scale = 1;
5344 #endif
5345         }
5346         /* Add the suffix subflow. */
5347         ret = flow_create_split_meter(dev, flow, &sfx_attr,
5348                                       sfx_items ? sfx_items : items,
5349                                       sfx_actions ? sfx_actions : actions,
5350                                       flow_split_info, error);
5351 exit:
5352         if (sfx_actions)
5353                 mlx5_free(sfx_actions);
5354         return ret;
5355 }
5356
5357 /**
5358  * Split the flow to subflow set. The splitters might be linked
5359  * in the chain, like this:
5360  * flow_create_split_outer() calls:
5361  *   flow_create_split_meter() calls:
5362  *     flow_create_split_metadata(meter_subflow_0) calls:
5363  *       flow_create_split_inner(metadata_subflow_0)
5364  *       flow_create_split_inner(metadata_subflow_1)
5365  *       flow_create_split_inner(metadata_subflow_2)
5366  *     flow_create_split_metadata(meter_subflow_1) calls:
5367  *       flow_create_split_inner(metadata_subflow_0)
5368  *       flow_create_split_inner(metadata_subflow_1)
5369  *       flow_create_split_inner(metadata_subflow_2)
5370  *
5371  * This provide flexible way to add new levels of flow splitting.
5372  * The all of successfully created subflows are included to the
5373  * parent flow dev_flow list.
5374  *
5375  * @param dev
5376  *   Pointer to Ethernet device.
5377  * @param[in] flow
5378  *   Parent flow structure pointer.
5379  * @param[in] attr
5380  *   Flow rule attributes.
5381  * @param[in] items
5382  *   Pattern specification (list terminated by the END pattern item).
5383  * @param[in] actions
5384  *   Associated actions (list terminated by the END action).
5385  * @param[in] flow_split_info
5386  *   Pointer to flow split info structure.
5387  * @param[out] error
5388  *   Perform verbose error reporting if not NULL.
5389  * @return
5390  *   0 on success, negative value otherwise
5391  */
5392 static int
5393 flow_create_split_outer(struct rte_eth_dev *dev,
5394                         struct rte_flow *flow,
5395                         const struct rte_flow_attr *attr,
5396                         const struct rte_flow_item items[],
5397                         const struct rte_flow_action actions[],
5398                         struct mlx5_flow_split_info *flow_split_info,
5399                         struct rte_flow_error *error)
5400 {
5401         int ret;
5402
5403         ret = flow_create_split_sample(dev, flow, attr, items,
5404                                        actions, flow_split_info, error);
5405         MLX5_ASSERT(ret <= 0);
5406         return ret;
5407 }
5408
5409 static struct mlx5_flow_tunnel *
5410 flow_tunnel_from_rule(struct rte_eth_dev *dev,
5411                       const struct rte_flow_attr *attr,
5412                       const struct rte_flow_item items[],
5413                       const struct rte_flow_action actions[])
5414 {
5415         struct mlx5_flow_tunnel *tunnel;
5416
5417 #pragma GCC diagnostic push
5418 #pragma GCC diagnostic ignored "-Wcast-qual"
5419         if (is_flow_tunnel_match_rule(dev, attr, items, actions))
5420                 tunnel = (struct mlx5_flow_tunnel *)items[0].spec;
5421         else if (is_flow_tunnel_steer_rule(dev, attr, items, actions))
5422                 tunnel = (struct mlx5_flow_tunnel *)actions[0].conf;
5423         else
5424                 tunnel = NULL;
5425 #pragma GCC diagnostic pop
5426
5427         return tunnel;
5428 }
5429
5430 /**
5431  * Adjust flow RSS workspace if needed.
5432  *
5433  * @param wks
5434  *   Pointer to thread flow work space.
5435  * @param rss_desc
5436  *   Pointer to RSS descriptor.
5437  * @param[in] nrssq_num
5438  *   New RSS queue number.
5439  *
5440  * @return
5441  *   0 on success, -1 otherwise and rte_errno is set.
5442  */
5443 static int
5444 flow_rss_workspace_adjust(struct mlx5_flow_workspace *wks,
5445                           struct mlx5_flow_rss_desc *rss_desc,
5446                           uint32_t nrssq_num)
5447 {
5448         bool fidx = !!wks->flow_idx;
5449
5450         if (likely(nrssq_num <= wks->rssq_num[fidx]))
5451                 return 0;
5452         rss_desc->queue = realloc(rss_desc->queue,
5453                           sizeof(rss_desc->queue[0]) * RTE_ALIGN(nrssq_num, 2));
5454         if (!rss_desc->queue) {
5455                 rte_errno = ENOMEM;
5456                 return -1;
5457         }
5458         wks->rssq_num[fidx] = RTE_ALIGN(nrssq_num, 2);
5459         return 0;
5460 }
5461
5462 /**
5463  * Create a flow and add it to @p list.
5464  *
5465  * @param dev
5466  *   Pointer to Ethernet device.
5467  * @param list
5468  *   Pointer to a TAILQ flow list. If this parameter NULL,
5469  *   no list insertion occurred, flow is just created,
5470  *   this is caller's responsibility to track the
5471  *   created flow.
5472  * @param[in] attr
5473  *   Flow rule attributes.
5474  * @param[in] items
5475  *   Pattern specification (list terminated by the END pattern item).
5476  * @param[in] actions
5477  *   Associated actions (list terminated by the END action).
5478  * @param[in] external
5479  *   This flow rule is created by request external to PMD.
5480  * @param[out] error
5481  *   Perform verbose error reporting if not NULL.
5482  *
5483  * @return
5484  *   A flow index on success, 0 otherwise and rte_errno is set.
5485  */
5486 static uint32_t
5487 flow_list_create(struct rte_eth_dev *dev, uint32_t *list,
5488                  const struct rte_flow_attr *attr,
5489                  const struct rte_flow_item items[],
5490                  const struct rte_flow_action original_actions[],
5491                  bool external, struct rte_flow_error *error)
5492 {
5493         struct mlx5_priv *priv = dev->data->dev_private;
5494         struct rte_flow *flow = NULL;
5495         struct mlx5_flow *dev_flow;
5496         const struct rte_flow_action_rss *rss;
5497         struct mlx5_translated_shared_action
5498                 shared_actions[MLX5_MAX_SHARED_ACTIONS];
5499         int shared_actions_n = MLX5_MAX_SHARED_ACTIONS;
5500         union {
5501                 struct mlx5_flow_expand_rss buf;
5502                 uint8_t buffer[2048];
5503         } expand_buffer;
5504         union {
5505                 struct rte_flow_action actions[MLX5_MAX_SPLIT_ACTIONS];
5506                 uint8_t buffer[2048];
5507         } actions_rx;
5508         union {
5509                 struct rte_flow_action actions[MLX5_MAX_SPLIT_ACTIONS];
5510                 uint8_t buffer[2048];
5511         } actions_hairpin_tx;
5512         union {
5513                 struct rte_flow_item items[MLX5_MAX_SPLIT_ITEMS];
5514                 uint8_t buffer[2048];
5515         } items_tx;
5516         struct mlx5_flow_expand_rss *buf = &expand_buffer.buf;
5517         struct mlx5_flow_rss_desc *rss_desc;
5518         const struct rte_flow_action *p_actions_rx;
5519         uint32_t i;
5520         uint32_t idx = 0;
5521         int hairpin_flow;
5522         struct rte_flow_attr attr_tx = { .priority = 0 };
5523         const struct rte_flow_action *actions;
5524         struct rte_flow_action *translated_actions = NULL;
5525         struct mlx5_flow_tunnel *tunnel;
5526         struct tunnel_default_miss_ctx default_miss_ctx = { 0, };
5527         struct mlx5_flow_split_info flow_split_info = {
5528                 .external = !!external,
5529                 .skip_scale = 0,
5530                 .flow_idx = 0,
5531                 .prefix_mark = 0,
5532                 .prefix_layers = 0
5533         };
5534         struct mlx5_flow_workspace *wks = mlx5_flow_get_thread_workspace();
5535         bool fidx = !!wks->flow_idx;
5536         int ret;
5537
5538         MLX5_ASSERT(wks);
5539         rss_desc = &wks->rss_desc[fidx];
5540         ret = flow_shared_actions_translate(dev, original_actions,
5541                                             shared_actions,
5542                                             &shared_actions_n,
5543                                             &translated_actions, error);
5544         if (ret < 0) {
5545                 MLX5_ASSERT(translated_actions == NULL);
5546                 return 0;
5547         }
5548         actions = translated_actions ? translated_actions : original_actions;
5549         p_actions_rx = actions;
5550         hairpin_flow = flow_check_hairpin_split(dev, attr, actions);
5551         ret = flow_drv_validate(dev, attr, items, p_actions_rx,
5552                                 external, hairpin_flow, error);
5553         if (ret < 0)
5554                 goto error_before_hairpin_split;
5555         flow = mlx5_ipool_zmalloc(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], &idx);
5556         if (!flow) {
5557                 rte_errno = ENOMEM;
5558                 goto error_before_hairpin_split;
5559         }
5560         if (hairpin_flow > 0) {
5561                 if (hairpin_flow > MLX5_MAX_SPLIT_ACTIONS) {
5562                         rte_errno = EINVAL;
5563                         goto error_before_hairpin_split;
5564                 }
5565                 flow_hairpin_split(dev, actions, actions_rx.actions,
5566                                    actions_hairpin_tx.actions, items_tx.items,
5567                                    idx);
5568                 p_actions_rx = actions_rx.actions;
5569         }
5570         flow_split_info.flow_idx = idx;
5571         flow->drv_type = flow_get_drv_type(dev, attr);
5572         MLX5_ASSERT(flow->drv_type > MLX5_FLOW_TYPE_MIN &&
5573                     flow->drv_type < MLX5_FLOW_TYPE_MAX);
5574         memset(rss_desc, 0, offsetof(struct mlx5_flow_rss_desc, queue));
5575         rss = flow_get_rss_action(p_actions_rx);
5576         if (rss) {
5577                 if (flow_rss_workspace_adjust(wks, rss_desc, rss->queue_num))
5578                         return 0;
5579                 /*
5580                  * The following information is required by
5581                  * mlx5_flow_hashfields_adjust() in advance.
5582                  */
5583                 rss_desc->level = rss->level;
5584                 /* RSS type 0 indicates default RSS type (ETH_RSS_IP). */
5585                 rss_desc->types = !rss->types ? ETH_RSS_IP : rss->types;
5586         }
5587         flow->dev_handles = 0;
5588         if (rss && rss->types) {
5589                 unsigned int graph_root;
5590
5591                 graph_root = find_graph_root(items, rss->level);
5592                 ret = mlx5_flow_expand_rss(buf, sizeof(expand_buffer.buffer),
5593                                            items, rss->types,
5594                                            mlx5_support_expansion, graph_root);
5595                 MLX5_ASSERT(ret > 0 &&
5596                        (unsigned int)ret < sizeof(expand_buffer.buffer));
5597         } else {
5598                 buf->entries = 1;
5599                 buf->entry[0].pattern = (void *)(uintptr_t)items;
5600         }
5601         rss_desc->shared_rss = flow_get_shared_rss_action(dev, shared_actions,
5602                                                       shared_actions_n);
5603         /*
5604          * Record the start index when there is a nested call. All sub-flows
5605          * need to be translated before another calling.
5606          * No need to use ping-pong buffer to save memory here.
5607          */
5608         if (fidx) {
5609                 MLX5_ASSERT(!wks->flow_nested_idx);
5610                 wks->flow_nested_idx = fidx;
5611         }
5612         for (i = 0; i < buf->entries; ++i) {
5613                 /* Initialize flow split data. */
5614                 flow_split_info.prefix_layers = 0;
5615                 flow_split_info.prefix_mark = 0;
5616                 flow_split_info.skip_scale = 0;
5617                 /*
5618                  * The splitter may create multiple dev_flows,
5619                  * depending on configuration. In the simplest
5620                  * case it just creates unmodified original flow.
5621                  */
5622                 ret = flow_create_split_outer(dev, flow, attr,
5623                                               buf->entry[i].pattern,
5624                                               p_actions_rx, &flow_split_info,
5625                                               error);
5626                 if (ret < 0)
5627                         goto error;
5628                 if (is_flow_tunnel_steer_rule(dev, attr,
5629                                               buf->entry[i].pattern,
5630                                               p_actions_rx)) {
5631                         ret = flow_tunnel_add_default_miss(dev, flow, attr,
5632                                                            p_actions_rx,
5633                                                            idx,
5634                                                            &default_miss_ctx,
5635                                                            error);
5636                         if (ret < 0) {
5637                                 mlx5_free(default_miss_ctx.queue);
5638                                 goto error;
5639                         }
5640                 }
5641         }
5642         /* Create the tx flow. */
5643         if (hairpin_flow) {
5644                 attr_tx.group = MLX5_HAIRPIN_TX_TABLE;
5645                 attr_tx.ingress = 0;
5646                 attr_tx.egress = 1;
5647                 dev_flow = flow_drv_prepare(dev, flow, &attr_tx, items_tx.items,
5648                                          actions_hairpin_tx.actions,
5649                                          idx, error);
5650                 if (!dev_flow)
5651                         goto error;
5652                 dev_flow->flow = flow;
5653                 dev_flow->external = 0;
5654                 SILIST_INSERT(&flow->dev_handles, dev_flow->handle_idx,
5655                               dev_flow->handle, next);
5656                 ret = flow_drv_translate(dev, dev_flow, &attr_tx,
5657                                          items_tx.items,
5658                                          actions_hairpin_tx.actions, error);
5659                 if (ret < 0)
5660                         goto error;
5661         }
5662         /*
5663          * Update the metadata register copy table. If extensive
5664          * metadata feature is enabled and registers are supported
5665          * we might create the extra rte_flow for each unique
5666          * MARK/FLAG action ID.
5667          *
5668          * The table is updated for ingress Flows only, because
5669          * the egress Flows belong to the different device and
5670          * copy table should be updated in peer NIC Rx domain.
5671          */
5672         if (attr->ingress &&
5673             (external || attr->group != MLX5_FLOW_MREG_CP_TABLE_GROUP)) {
5674                 ret = flow_mreg_update_copy_table(dev, flow, actions, error);
5675                 if (ret)
5676                         goto error;
5677         }
5678         /*
5679          * If the flow is external (from application) OR device is started, then
5680          * the flow will be applied immediately.
5681          */
5682         if (external || dev->data->dev_started) {
5683                 ret = flow_drv_apply(dev, flow, error);
5684                 if (ret < 0)
5685                         goto error;
5686         }
5687         if (list) {
5688                 rte_spinlock_lock(&priv->flow_list_lock);
5689                 ILIST_INSERT(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], list, idx,
5690                              flow, next);
5691                 rte_spinlock_unlock(&priv->flow_list_lock);
5692         }
5693         flow_rxq_flags_set(dev, flow);
5694         rte_free(translated_actions);
5695         /* Nested flow creation index recovery. */
5696         wks->flow_idx = wks->flow_nested_idx;
5697         if (wks->flow_nested_idx)
5698                 wks->flow_nested_idx = 0;
5699         tunnel = flow_tunnel_from_rule(dev, attr, items, actions);
5700         if (tunnel) {
5701                 flow->tunnel = 1;
5702                 flow->tunnel_id = tunnel->tunnel_id;
5703                 __atomic_add_fetch(&tunnel->refctn, 1, __ATOMIC_RELAXED);
5704                 mlx5_free(default_miss_ctx.queue);
5705         }
5706         return idx;
5707 error:
5708         MLX5_ASSERT(flow);
5709         ret = rte_errno; /* Save rte_errno before cleanup. */
5710         flow_mreg_del_copy_action(dev, flow);
5711         flow_drv_destroy(dev, flow);
5712         if (rss_desc->shared_rss)
5713                 __atomic_sub_fetch(&((struct mlx5_shared_action_rss *)
5714                         mlx5_ipool_get
5715                         (priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS],
5716                         rss_desc->shared_rss))->refcnt, 1, __ATOMIC_RELAXED);
5717         mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], idx);
5718         rte_errno = ret; /* Restore rte_errno. */
5719         ret = rte_errno;
5720         rte_errno = ret;
5721         wks->flow_idx = wks->flow_nested_idx;
5722         if (wks->flow_nested_idx)
5723                 wks->flow_nested_idx = 0;
5724 error_before_hairpin_split:
5725         rte_free(translated_actions);
5726         return 0;
5727 }
5728
5729 /**
5730  * Create a dedicated flow rule on e-switch table 0 (root table), to direct all
5731  * incoming packets to table 1.
5732  *
5733  * Other flow rules, requested for group n, will be created in
5734  * e-switch table n+1.
5735  * Jump action to e-switch group n will be created to group n+1.
5736  *
5737  * Used when working in switchdev mode, to utilise advantages of table 1
5738  * and above.
5739  *
5740  * @param dev
5741  *   Pointer to Ethernet device.
5742  *
5743  * @return
5744  *   Pointer to flow on success, NULL otherwise and rte_errno is set.
5745  */
5746 struct rte_flow *
5747 mlx5_flow_create_esw_table_zero_flow(struct rte_eth_dev *dev)
5748 {
5749         const struct rte_flow_attr attr = {
5750                 .group = 0,
5751                 .priority = 0,
5752                 .ingress = 1,
5753                 .egress = 0,
5754                 .transfer = 1,
5755         };
5756         const struct rte_flow_item pattern = {
5757                 .type = RTE_FLOW_ITEM_TYPE_END,
5758         };
5759         struct rte_flow_action_jump jump = {
5760                 .group = 1,
5761         };
5762         const struct rte_flow_action actions[] = {
5763                 {
5764                         .type = RTE_FLOW_ACTION_TYPE_JUMP,
5765                         .conf = &jump,
5766                 },
5767                 {
5768                         .type = RTE_FLOW_ACTION_TYPE_END,
5769                 },
5770         };
5771         struct mlx5_priv *priv = dev->data->dev_private;
5772         struct rte_flow_error error;
5773
5774         return (void *)(uintptr_t)flow_list_create(dev, &priv->ctrl_flows,
5775                                                    &attr, &pattern,
5776                                                    actions, false, &error);
5777 }
5778
5779 /**
5780  * Validate a flow supported by the NIC.
5781  *
5782  * @see rte_flow_validate()
5783  * @see rte_flow_ops
5784  */
5785 int
5786 mlx5_flow_validate(struct rte_eth_dev *dev,
5787                    const struct rte_flow_attr *attr,
5788                    const struct rte_flow_item items[],
5789                    const struct rte_flow_action original_actions[],
5790                    struct rte_flow_error *error)
5791 {
5792         int hairpin_flow;
5793         struct mlx5_translated_shared_action
5794                 shared_actions[MLX5_MAX_SHARED_ACTIONS];
5795         int shared_actions_n = MLX5_MAX_SHARED_ACTIONS;
5796         const struct rte_flow_action *actions;
5797         struct rte_flow_action *translated_actions = NULL;
5798         int ret = flow_shared_actions_translate(dev, original_actions,
5799                                                 shared_actions,
5800                                                 &shared_actions_n,
5801                                                 &translated_actions, error);
5802
5803         if (ret)
5804                 return ret;
5805         actions = translated_actions ? translated_actions : original_actions;
5806         hairpin_flow = flow_check_hairpin_split(dev, attr, actions);
5807         ret = flow_drv_validate(dev, attr, items, actions,
5808                                 true, hairpin_flow, error);
5809         rte_free(translated_actions);
5810         return ret;
5811 }
5812
5813 /**
5814  * Create a flow.
5815  *
5816  * @see rte_flow_create()
5817  * @see rte_flow_ops
5818  */
5819 struct rte_flow *
5820 mlx5_flow_create(struct rte_eth_dev *dev,
5821                  const struct rte_flow_attr *attr,
5822                  const struct rte_flow_item items[],
5823                  const struct rte_flow_action actions[],
5824                  struct rte_flow_error *error)
5825 {
5826         struct mlx5_priv *priv = dev->data->dev_private;
5827
5828         /*
5829          * If the device is not started yet, it is not allowed to created a
5830          * flow from application. PMD default flows and traffic control flows
5831          * are not affected.
5832          */
5833         if (unlikely(!dev->data->dev_started)) {
5834                 DRV_LOG(DEBUG, "port %u is not started when "
5835                         "inserting a flow", dev->data->port_id);
5836                 rte_flow_error_set(error, ENODEV,
5837                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
5838                                    NULL,
5839                                    "port not started");
5840                 return NULL;
5841         }
5842
5843         return (void *)(uintptr_t)flow_list_create(dev, &priv->flows,
5844                                   attr, items, actions, true, error);
5845 }
5846
5847 /**
5848  * Destroy a flow in a list.
5849  *
5850  * @param dev
5851  *   Pointer to Ethernet device.
5852  * @param list
5853  *   Pointer to the Indexed flow list. If this parameter NULL,
5854  *   there is no flow removal from the list. Be noted that as
5855  *   flow is add to the indexed list, memory of the indexed
5856  *   list points to maybe changed as flow destroyed.
5857  * @param[in] flow_idx
5858  *   Index of flow to destroy.
5859  */
5860 static void
5861 flow_list_destroy(struct rte_eth_dev *dev, uint32_t *list,
5862                   uint32_t flow_idx)
5863 {
5864         struct mlx5_priv *priv = dev->data->dev_private;
5865         struct rte_flow *flow = mlx5_ipool_get(priv->sh->ipool
5866                                                [MLX5_IPOOL_RTE_FLOW], flow_idx);
5867
5868         if (!flow)
5869                 return;
5870         /*
5871          * Update RX queue flags only if port is started, otherwise it is
5872          * already clean.
5873          */
5874         if (dev->data->dev_started)
5875                 flow_rxq_flags_trim(dev, flow);
5876         flow_drv_destroy(dev, flow);
5877         if (list) {
5878                 rte_spinlock_lock(&priv->flow_list_lock);
5879                 ILIST_REMOVE(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], list,
5880                              flow_idx, flow, next);
5881                 rte_spinlock_unlock(&priv->flow_list_lock);
5882         }
5883         if (flow->tunnel) {
5884                 struct mlx5_flow_tunnel *tunnel;
5885
5886                 rte_spinlock_lock(&mlx5_tunnel_hub(dev)->sl);
5887                 tunnel = mlx5_find_tunnel_id(dev, flow->tunnel_id);
5888                 RTE_VERIFY(tunnel);
5889                 LIST_REMOVE(tunnel, chain);
5890                 rte_spinlock_unlock(&mlx5_tunnel_hub(dev)->sl);
5891                 if (!__atomic_sub_fetch(&tunnel->refctn, 1, __ATOMIC_RELAXED))
5892                         mlx5_flow_tunnel_free(dev, tunnel);
5893         }
5894         flow_mreg_del_copy_action(dev, flow);
5895         mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], flow_idx);
5896 }
5897
5898 /**
5899  * Destroy all flows.
5900  *
5901  * @param dev
5902  *   Pointer to Ethernet device.
5903  * @param list
5904  *   Pointer to the Indexed flow list.
5905  * @param active
5906  *   If flushing is called avtively.
5907  */
5908 void
5909 mlx5_flow_list_flush(struct rte_eth_dev *dev, uint32_t *list, bool active)
5910 {
5911         uint32_t num_flushed = 0;
5912
5913         while (*list) {
5914                 flow_list_destroy(dev, list, *list);
5915                 num_flushed++;
5916         }
5917         if (active) {
5918                 DRV_LOG(INFO, "port %u: %u flows flushed before stopping",
5919                         dev->data->port_id, num_flushed);
5920         }
5921 }
5922
5923 /**
5924  * Stop all default actions for flows.
5925  *
5926  * @param dev
5927  *   Pointer to Ethernet device.
5928  */
5929 void
5930 mlx5_flow_stop_default(struct rte_eth_dev *dev)
5931 {
5932         flow_mreg_del_default_copy_action(dev);
5933         flow_rxq_flags_clear(dev);
5934 }
5935
5936 /**
5937  * Start all default actions for flows.
5938  *
5939  * @param dev
5940  *   Pointer to Ethernet device.
5941  * @return
5942  *   0 on success, a negative errno value otherwise and rte_errno is set.
5943  */
5944 int
5945 mlx5_flow_start_default(struct rte_eth_dev *dev)
5946 {
5947         struct rte_flow_error error;
5948
5949         /* Make sure default copy action (reg_c[0] -> reg_b) is created. */
5950         return flow_mreg_add_default_copy_action(dev, &error);
5951 }
5952
5953 /**
5954  * Release key of thread specific flow workspace data.
5955  */
5956 static void
5957 flow_release_workspace(void *data)
5958 {
5959         struct mlx5_flow_workspace *wks = data;
5960
5961         if (!wks)
5962                 return;
5963         free(wks->rss_desc[0].queue);
5964         free(wks->rss_desc[1].queue);
5965         free(wks);
5966 }
5967
5968 /**
5969  * Initialize key of thread specific flow workspace data.
5970  */
5971 static void
5972 flow_alloc_workspace(void)
5973 {
5974         if (pthread_key_create(&key_workspace, flow_release_workspace))
5975                 DRV_LOG(ERR, "Can't create flow workspace data thread key.");
5976 }
5977
5978 /**
5979  * Get thread specific flow workspace.
5980  *
5981  * @return pointer to thread specific flowworkspace data, NULL on error.
5982  */
5983 struct mlx5_flow_workspace*
5984 mlx5_flow_get_thread_workspace(void)
5985 {
5986         struct mlx5_flow_workspace *data;
5987
5988         if (pthread_once(&key_workspace_init, flow_alloc_workspace)) {
5989                 DRV_LOG(ERR, "Failed to init flow workspace data thread key.");
5990                 return NULL;
5991         }
5992         data = pthread_getspecific(key_workspace);
5993         if (!data) {
5994                 data = calloc(1, sizeof(*data));
5995                 if (!data) {
5996                         DRV_LOG(ERR, "Failed to allocate flow workspace "
5997                                 "memory.");
5998                         return NULL;
5999                 }
6000                 data->rss_desc[0].queue = calloc(1,
6001                                 sizeof(uint16_t) * MLX5_RSSQ_DEFAULT_NUM);
6002                 if (!data->rss_desc[0].queue)
6003                         goto err;
6004                 data->rss_desc[1].queue = calloc(1,
6005                                 sizeof(uint16_t) * MLX5_RSSQ_DEFAULT_NUM);
6006                 if (!data->rss_desc[1].queue)
6007                         goto err;
6008                 data->rssq_num[0] = MLX5_RSSQ_DEFAULT_NUM;
6009                 data->rssq_num[1] = MLX5_RSSQ_DEFAULT_NUM;
6010                 if (pthread_setspecific(key_workspace, data)) {
6011                         DRV_LOG(ERR, "Failed to set flow workspace to thread.");
6012                         goto err;
6013                 }
6014         }
6015         return data;
6016 err:
6017         if (data->rss_desc[0].queue)
6018                 free(data->rss_desc[0].queue);
6019         if (data->rss_desc[1].queue)
6020                 free(data->rss_desc[1].queue);
6021         free(data);
6022         return NULL;
6023 }
6024
6025 /**
6026  * Verify the flow list is empty
6027  *
6028  * @param dev
6029  *  Pointer to Ethernet device.
6030  *
6031  * @return the number of flows not released.
6032  */
6033 int
6034 mlx5_flow_verify(struct rte_eth_dev *dev)
6035 {
6036         struct mlx5_priv *priv = dev->data->dev_private;
6037         struct rte_flow *flow;
6038         uint32_t idx;
6039         int ret = 0;
6040
6041         ILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], priv->flows, idx,
6042                       flow, next) {
6043                 DRV_LOG(DEBUG, "port %u flow %p still referenced",
6044                         dev->data->port_id, (void *)flow);
6045                 ++ret;
6046         }
6047         return ret;
6048 }
6049
6050 /**
6051  * Enable default hairpin egress flow.
6052  *
6053  * @param dev
6054  *   Pointer to Ethernet device.
6055  * @param queue
6056  *   The queue index.
6057  *
6058  * @return
6059  *   0 on success, a negative errno value otherwise and rte_errno is set.
6060  */
6061 int
6062 mlx5_ctrl_flow_source_queue(struct rte_eth_dev *dev,
6063                             uint32_t queue)
6064 {
6065         struct mlx5_priv *priv = dev->data->dev_private;
6066         const struct rte_flow_attr attr = {
6067                 .egress = 1,
6068                 .priority = 0,
6069         };
6070         struct mlx5_rte_flow_item_tx_queue queue_spec = {
6071                 .queue = queue,
6072         };
6073         struct mlx5_rte_flow_item_tx_queue queue_mask = {
6074                 .queue = UINT32_MAX,
6075         };
6076         struct rte_flow_item items[] = {
6077                 {
6078                         .type = (enum rte_flow_item_type)
6079                                 MLX5_RTE_FLOW_ITEM_TYPE_TX_QUEUE,
6080                         .spec = &queue_spec,
6081                         .last = NULL,
6082                         .mask = &queue_mask,
6083                 },
6084                 {
6085                         .type = RTE_FLOW_ITEM_TYPE_END,
6086                 },
6087         };
6088         struct rte_flow_action_jump jump = {
6089                 .group = MLX5_HAIRPIN_TX_TABLE,
6090         };
6091         struct rte_flow_action actions[2];
6092         uint32_t flow_idx;
6093         struct rte_flow_error error;
6094
6095         actions[0].type = RTE_FLOW_ACTION_TYPE_JUMP;
6096         actions[0].conf = &jump;
6097         actions[1].type = RTE_FLOW_ACTION_TYPE_END;
6098         flow_idx = flow_list_create(dev, &priv->ctrl_flows,
6099                                 &attr, items, actions, false, &error);
6100         if (!flow_idx) {
6101                 DRV_LOG(DEBUG,
6102                         "Failed to create ctrl flow: rte_errno(%d),"
6103                         " type(%d), message(%s)",
6104                         rte_errno, error.type,
6105                         error.message ? error.message : " (no stated reason)");
6106                 return -rte_errno;
6107         }
6108         return 0;
6109 }
6110
6111 /**
6112  * Enable a control flow configured from the control plane.
6113  *
6114  * @param dev
6115  *   Pointer to Ethernet device.
6116  * @param eth_spec
6117  *   An Ethernet flow spec to apply.
6118  * @param eth_mask
6119  *   An Ethernet flow mask to apply.
6120  * @param vlan_spec
6121  *   A VLAN flow spec to apply.
6122  * @param vlan_mask
6123  *   A VLAN flow mask to apply.
6124  *
6125  * @return
6126  *   0 on success, a negative errno value otherwise and rte_errno is set.
6127  */
6128 int
6129 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
6130                     struct rte_flow_item_eth *eth_spec,
6131                     struct rte_flow_item_eth *eth_mask,
6132                     struct rte_flow_item_vlan *vlan_spec,
6133                     struct rte_flow_item_vlan *vlan_mask)
6134 {
6135         struct mlx5_priv *priv = dev->data->dev_private;
6136         const struct rte_flow_attr attr = {
6137                 .ingress = 1,
6138                 .priority = MLX5_FLOW_PRIO_RSVD,
6139         };
6140         struct rte_flow_item items[] = {
6141                 {
6142                         .type = RTE_FLOW_ITEM_TYPE_ETH,
6143                         .spec = eth_spec,
6144                         .last = NULL,
6145                         .mask = eth_mask,
6146                 },
6147                 {
6148                         .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
6149                                               RTE_FLOW_ITEM_TYPE_END,
6150                         .spec = vlan_spec,
6151                         .last = NULL,
6152                         .mask = vlan_mask,
6153                 },
6154                 {
6155                         .type = RTE_FLOW_ITEM_TYPE_END,
6156                 },
6157         };
6158         uint16_t queue[priv->reta_idx_n];
6159         struct rte_flow_action_rss action_rss = {
6160                 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
6161                 .level = 0,
6162                 .types = priv->rss_conf.rss_hf,
6163                 .key_len = priv->rss_conf.rss_key_len,
6164                 .queue_num = priv->reta_idx_n,
6165                 .key = priv->rss_conf.rss_key,
6166                 .queue = queue,
6167         };
6168         struct rte_flow_action actions[] = {
6169                 {
6170                         .type = RTE_FLOW_ACTION_TYPE_RSS,
6171                         .conf = &action_rss,
6172                 },
6173                 {
6174                         .type = RTE_FLOW_ACTION_TYPE_END,
6175                 },
6176         };
6177         uint32_t flow_idx;
6178         struct rte_flow_error error;
6179         unsigned int i;
6180
6181         if (!priv->reta_idx_n || !priv->rxqs_n) {
6182                 return 0;
6183         }
6184         if (!(dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG))
6185                 action_rss.types = 0;
6186         for (i = 0; i != priv->reta_idx_n; ++i)
6187                 queue[i] = (*priv->reta_idx)[i];
6188         flow_idx = flow_list_create(dev, &priv->ctrl_flows,
6189                                 &attr, items, actions, false, &error);
6190         if (!flow_idx)
6191                 return -rte_errno;
6192         return 0;
6193 }
6194
6195 /**
6196  * Enable a flow control configured from the control plane.
6197  *
6198  * @param dev
6199  *   Pointer to Ethernet device.
6200  * @param eth_spec
6201  *   An Ethernet flow spec to apply.
6202  * @param eth_mask
6203  *   An Ethernet flow mask to apply.
6204  *
6205  * @return
6206  *   0 on success, a negative errno value otherwise and rte_errno is set.
6207  */
6208 int
6209 mlx5_ctrl_flow(struct rte_eth_dev *dev,
6210                struct rte_flow_item_eth *eth_spec,
6211                struct rte_flow_item_eth *eth_mask)
6212 {
6213         return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
6214 }
6215
6216 /**
6217  * Create default miss flow rule matching lacp traffic
6218  *
6219  * @param dev
6220  *   Pointer to Ethernet device.
6221  * @param eth_spec
6222  *   An Ethernet flow spec to apply.
6223  *
6224  * @return
6225  *   0 on success, a negative errno value otherwise and rte_errno is set.
6226  */
6227 int
6228 mlx5_flow_lacp_miss(struct rte_eth_dev *dev)
6229 {
6230         struct mlx5_priv *priv = dev->data->dev_private;
6231         /*
6232          * The LACP matching is done by only using ether type since using
6233          * a multicast dst mac causes kernel to give low priority to this flow.
6234          */
6235         static const struct rte_flow_item_eth lacp_spec = {
6236                 .type = RTE_BE16(0x8809),
6237         };
6238         static const struct rte_flow_item_eth lacp_mask = {
6239                 .type = 0xffff,
6240         };
6241         const struct rte_flow_attr attr = {
6242                 .ingress = 1,
6243         };
6244         struct rte_flow_item items[] = {
6245                 {
6246                         .type = RTE_FLOW_ITEM_TYPE_ETH,
6247                         .spec = &lacp_spec,
6248                         .mask = &lacp_mask,
6249                 },
6250                 {
6251                         .type = RTE_FLOW_ITEM_TYPE_END,
6252                 },
6253         };
6254         struct rte_flow_action actions[] = {
6255                 {
6256                         .type = (enum rte_flow_action_type)
6257                                 MLX5_RTE_FLOW_ACTION_TYPE_DEFAULT_MISS,
6258                 },
6259                 {
6260                         .type = RTE_FLOW_ACTION_TYPE_END,
6261                 },
6262         };
6263         struct rte_flow_error error;
6264         uint32_t flow_idx = flow_list_create(dev, &priv->ctrl_flows,
6265                                 &attr, items, actions, false, &error);
6266
6267         if (!flow_idx)
6268                 return -rte_errno;
6269         return 0;
6270 }
6271
6272 /**
6273  * Destroy a flow.
6274  *
6275  * @see rte_flow_destroy()
6276  * @see rte_flow_ops
6277  */
6278 int
6279 mlx5_flow_destroy(struct rte_eth_dev *dev,
6280                   struct rte_flow *flow,
6281                   struct rte_flow_error *error __rte_unused)
6282 {
6283         struct mlx5_priv *priv = dev->data->dev_private;
6284
6285         flow_list_destroy(dev, &priv->flows, (uintptr_t)(void *)flow);
6286         return 0;
6287 }
6288
6289 /**
6290  * Destroy all flows.
6291  *
6292  * @see rte_flow_flush()
6293  * @see rte_flow_ops
6294  */
6295 int
6296 mlx5_flow_flush(struct rte_eth_dev *dev,
6297                 struct rte_flow_error *error __rte_unused)
6298 {
6299         struct mlx5_priv *priv = dev->data->dev_private;
6300
6301         mlx5_flow_list_flush(dev, &priv->flows, false);
6302         return 0;
6303 }
6304
6305 /**
6306  * Isolated mode.
6307  *
6308  * @see rte_flow_isolate()
6309  * @see rte_flow_ops
6310  */
6311 int
6312 mlx5_flow_isolate(struct rte_eth_dev *dev,
6313                   int enable,
6314                   struct rte_flow_error *error)
6315 {
6316         struct mlx5_priv *priv = dev->data->dev_private;
6317
6318         if (dev->data->dev_started) {
6319                 rte_flow_error_set(error, EBUSY,
6320                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
6321                                    NULL,
6322                                    "port must be stopped first");
6323                 return -rte_errno;
6324         }
6325         priv->isolated = !!enable;
6326         if (enable)
6327                 dev->dev_ops = &mlx5_os_dev_ops_isolate;
6328         else
6329                 dev->dev_ops = &mlx5_os_dev_ops;
6330
6331         dev->rx_descriptor_status = mlx5_rx_descriptor_status;
6332         dev->tx_descriptor_status = mlx5_tx_descriptor_status;
6333
6334         return 0;
6335 }
6336
6337 /**
6338  * Query a flow.
6339  *
6340  * @see rte_flow_query()
6341  * @see rte_flow_ops
6342  */
6343 static int
6344 flow_drv_query(struct rte_eth_dev *dev,
6345                uint32_t flow_idx,
6346                const struct rte_flow_action *actions,
6347                void *data,
6348                struct rte_flow_error *error)
6349 {
6350         struct mlx5_priv *priv = dev->data->dev_private;
6351         const struct mlx5_flow_driver_ops *fops;
6352         struct rte_flow *flow = mlx5_ipool_get(priv->sh->ipool
6353                                                [MLX5_IPOOL_RTE_FLOW],
6354                                                flow_idx);
6355         enum mlx5_flow_drv_type ftype;
6356
6357         if (!flow) {
6358                 return rte_flow_error_set(error, ENOENT,
6359                           RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
6360                           NULL,
6361                           "invalid flow handle");
6362         }
6363         ftype = flow->drv_type;
6364         MLX5_ASSERT(ftype > MLX5_FLOW_TYPE_MIN && ftype < MLX5_FLOW_TYPE_MAX);
6365         fops = flow_get_drv_ops(ftype);
6366
6367         return fops->query(dev, flow, actions, data, error);
6368 }
6369
6370 /**
6371  * Query a flow.
6372  *
6373  * @see rte_flow_query()
6374  * @see rte_flow_ops
6375  */
6376 int
6377 mlx5_flow_query(struct rte_eth_dev *dev,
6378                 struct rte_flow *flow,
6379                 const struct rte_flow_action *actions,
6380                 void *data,
6381                 struct rte_flow_error *error)
6382 {
6383         int ret;
6384
6385         ret = flow_drv_query(dev, (uintptr_t)(void *)flow, actions, data,
6386                              error);
6387         if (ret < 0)
6388                 return ret;
6389         return 0;
6390 }
6391
6392 /**
6393  * Manage filter operations.
6394  *
6395  * @param dev
6396  *   Pointer to Ethernet device structure.
6397  * @param filter_type
6398  *   Filter type.
6399  * @param filter_op
6400  *   Operation to perform.
6401  * @param arg
6402  *   Pointer to operation-specific structure.
6403  *
6404  * @return
6405  *   0 on success, a negative errno value otherwise and rte_errno is set.
6406  */
6407 int
6408 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
6409                      enum rte_filter_type filter_type,
6410                      enum rte_filter_op filter_op,
6411                      void *arg)
6412 {
6413         switch (filter_type) {
6414         case RTE_ETH_FILTER_GENERIC:
6415                 if (filter_op != RTE_ETH_FILTER_GET) {
6416                         rte_errno = EINVAL;
6417                         return -rte_errno;
6418                 }
6419                 *(const void **)arg = &mlx5_flow_ops;
6420                 return 0;
6421         default:
6422                 DRV_LOG(ERR, "port %u filter type (%d) not supported",
6423                         dev->data->port_id, filter_type);
6424                 rte_errno = ENOTSUP;
6425                 return -rte_errno;
6426         }
6427         return 0;
6428 }
6429
6430 /**
6431  * Create the needed meter and suffix tables.
6432  *
6433  * @param[in] dev
6434  *   Pointer to Ethernet device.
6435  * @param[in] fm
6436  *   Pointer to the flow meter.
6437  *
6438  * @return
6439  *   Pointer to table set on success, NULL otherwise.
6440  */
6441 struct mlx5_meter_domains_infos *
6442 mlx5_flow_create_mtr_tbls(struct rte_eth_dev *dev,
6443                           const struct mlx5_flow_meter *fm)
6444 {
6445         const struct mlx5_flow_driver_ops *fops;
6446
6447         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
6448         return fops->create_mtr_tbls(dev, fm);
6449 }
6450
6451 /**
6452  * Destroy the meter table set.
6453  *
6454  * @param[in] dev
6455  *   Pointer to Ethernet device.
6456  * @param[in] tbl
6457  *   Pointer to the meter table set.
6458  *
6459  * @return
6460  *   0 on success.
6461  */
6462 int
6463 mlx5_flow_destroy_mtr_tbls(struct rte_eth_dev *dev,
6464                            struct mlx5_meter_domains_infos *tbls)
6465 {
6466         const struct mlx5_flow_driver_ops *fops;
6467
6468         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
6469         return fops->destroy_mtr_tbls(dev, tbls);
6470 }
6471
6472 /**
6473  * Create policer rules.
6474  *
6475  * @param[in] dev
6476  *   Pointer to Ethernet device.
6477  * @param[in] fm
6478  *   Pointer to flow meter structure.
6479  * @param[in] attr
6480  *   Pointer to flow attributes.
6481  *
6482  * @return
6483  *   0 on success, -1 otherwise.
6484  */
6485 int
6486 mlx5_flow_create_policer_rules(struct rte_eth_dev *dev,
6487                                struct mlx5_flow_meter *fm,
6488                                const struct rte_flow_attr *attr)
6489 {
6490         const struct mlx5_flow_driver_ops *fops;
6491
6492         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
6493         return fops->create_policer_rules(dev, fm, attr);
6494 }
6495
6496 /**
6497  * Destroy policer rules.
6498  *
6499  * @param[in] fm
6500  *   Pointer to flow meter structure.
6501  * @param[in] attr
6502  *   Pointer to flow attributes.
6503  *
6504  * @return
6505  *   0 on success, -1 otherwise.
6506  */
6507 int
6508 mlx5_flow_destroy_policer_rules(struct rte_eth_dev *dev,
6509                                 struct mlx5_flow_meter *fm,
6510                                 const struct rte_flow_attr *attr)
6511 {
6512         const struct mlx5_flow_driver_ops *fops;
6513
6514         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
6515         return fops->destroy_policer_rules(dev, fm, attr);
6516 }
6517
6518 /**
6519  * Allocate a counter.
6520  *
6521  * @param[in] dev
6522  *   Pointer to Ethernet device structure.
6523  *
6524  * @return
6525  *   Index to allocated counter  on success, 0 otherwise.
6526  */
6527 uint32_t
6528 mlx5_counter_alloc(struct rte_eth_dev *dev)
6529 {
6530         const struct mlx5_flow_driver_ops *fops;
6531         struct rte_flow_attr attr = { .transfer = 0 };
6532
6533         if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
6534                 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
6535                 return fops->counter_alloc(dev);
6536         }
6537         DRV_LOG(ERR,
6538                 "port %u counter allocate is not supported.",
6539                  dev->data->port_id);
6540         return 0;
6541 }
6542
6543 /**
6544  * Free a counter.
6545  *
6546  * @param[in] dev
6547  *   Pointer to Ethernet device structure.
6548  * @param[in] cnt
6549  *   Index to counter to be free.
6550  */
6551 void
6552 mlx5_counter_free(struct rte_eth_dev *dev, uint32_t cnt)
6553 {
6554         const struct mlx5_flow_driver_ops *fops;
6555         struct rte_flow_attr attr = { .transfer = 0 };
6556
6557         if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
6558                 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
6559                 fops->counter_free(dev, cnt);
6560                 return;
6561         }
6562         DRV_LOG(ERR,
6563                 "port %u counter free is not supported.",
6564                  dev->data->port_id);
6565 }
6566
6567 /**
6568  * Query counter statistics.
6569  *
6570  * @param[in] dev
6571  *   Pointer to Ethernet device structure.
6572  * @param[in] cnt
6573  *   Index to counter to query.
6574  * @param[in] clear
6575  *   Set to clear counter statistics.
6576  * @param[out] pkts
6577  *   The counter hits packets number to save.
6578  * @param[out] bytes
6579  *   The counter hits bytes number to save.
6580  *
6581  * @return
6582  *   0 on success, a negative errno value otherwise.
6583  */
6584 int
6585 mlx5_counter_query(struct rte_eth_dev *dev, uint32_t cnt,
6586                    bool clear, uint64_t *pkts, uint64_t *bytes)
6587 {
6588         const struct mlx5_flow_driver_ops *fops;
6589         struct rte_flow_attr attr = { .transfer = 0 };
6590
6591         if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
6592                 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
6593                 return fops->counter_query(dev, cnt, clear, pkts, bytes);
6594         }
6595         DRV_LOG(ERR,
6596                 "port %u counter query is not supported.",
6597                  dev->data->port_id);
6598         return -ENOTSUP;
6599 }
6600
6601 /**
6602  * Allocate a new memory for the counter values wrapped by all the needed
6603  * management.
6604  *
6605  * @param[in] sh
6606  *   Pointer to mlx5_dev_ctx_shared object.
6607  *
6608  * @return
6609  *   0 on success, a negative errno value otherwise.
6610  */
6611 static int
6612 mlx5_flow_create_counter_stat_mem_mng(struct mlx5_dev_ctx_shared *sh)
6613 {
6614         struct mlx5_devx_mkey_attr mkey_attr;
6615         struct mlx5_counter_stats_mem_mng *mem_mng;
6616         volatile struct flow_counter_stats *raw_data;
6617         int raws_n = MLX5_CNT_CONTAINER_RESIZE + MLX5_MAX_PENDING_QUERIES;
6618         int size = (sizeof(struct flow_counter_stats) *
6619                         MLX5_COUNTERS_PER_POOL +
6620                         sizeof(struct mlx5_counter_stats_raw)) * raws_n +
6621                         sizeof(struct mlx5_counter_stats_mem_mng);
6622         size_t pgsize = rte_mem_page_size();
6623         uint8_t *mem;
6624         int i;
6625
6626         if (pgsize == (size_t)-1) {
6627                 DRV_LOG(ERR, "Failed to get mem page size");
6628                 rte_errno = ENOMEM;
6629                 return -ENOMEM;
6630         }
6631         mem = mlx5_malloc(MLX5_MEM_ZERO, size, pgsize, SOCKET_ID_ANY);
6632         if (!mem) {
6633                 rte_errno = ENOMEM;
6634                 return -ENOMEM;
6635         }
6636         mem_mng = (struct mlx5_counter_stats_mem_mng *)(mem + size) - 1;
6637         size = sizeof(*raw_data) * MLX5_COUNTERS_PER_POOL * raws_n;
6638         mem_mng->umem = mlx5_glue->devx_umem_reg(sh->ctx, mem, size,
6639                                                  IBV_ACCESS_LOCAL_WRITE);
6640         if (!mem_mng->umem) {
6641                 rte_errno = errno;
6642                 mlx5_free(mem);
6643                 return -rte_errno;
6644         }
6645         mkey_attr.addr = (uintptr_t)mem;
6646         mkey_attr.size = size;
6647         mkey_attr.umem_id = mlx5_os_get_umem_id(mem_mng->umem);
6648         mkey_attr.pd = sh->pdn;
6649         mkey_attr.log_entity_size = 0;
6650         mkey_attr.pg_access = 0;
6651         mkey_attr.klm_array = NULL;
6652         mkey_attr.klm_num = 0;
6653         mkey_attr.relaxed_ordering_write = sh->cmng.relaxed_ordering_write;
6654         mkey_attr.relaxed_ordering_read = sh->cmng.relaxed_ordering_read;
6655         mem_mng->dm = mlx5_devx_cmd_mkey_create(sh->ctx, &mkey_attr);
6656         if (!mem_mng->dm) {
6657                 mlx5_glue->devx_umem_dereg(mem_mng->umem);
6658                 rte_errno = errno;
6659                 mlx5_free(mem);
6660                 return -rte_errno;
6661         }
6662         mem_mng->raws = (struct mlx5_counter_stats_raw *)(mem + size);
6663         raw_data = (volatile struct flow_counter_stats *)mem;
6664         for (i = 0; i < raws_n; ++i) {
6665                 mem_mng->raws[i].mem_mng = mem_mng;
6666                 mem_mng->raws[i].data = raw_data + i * MLX5_COUNTERS_PER_POOL;
6667         }
6668         for (i = 0; i < MLX5_MAX_PENDING_QUERIES; ++i)
6669                 LIST_INSERT_HEAD(&sh->cmng.free_stat_raws,
6670                                  mem_mng->raws + MLX5_CNT_CONTAINER_RESIZE + i,
6671                                  next);
6672         LIST_INSERT_HEAD(&sh->cmng.mem_mngs, mem_mng, next);
6673         sh->cmng.mem_mng = mem_mng;
6674         return 0;
6675 }
6676
6677 /**
6678  * Set the statistic memory to the new counter pool.
6679  *
6680  * @param[in] sh
6681  *   Pointer to mlx5_dev_ctx_shared object.
6682  * @param[in] pool
6683  *   Pointer to the pool to set the statistic memory.
6684  *
6685  * @return
6686  *   0 on success, a negative errno value otherwise.
6687  */
6688 static int
6689 mlx5_flow_set_counter_stat_mem(struct mlx5_dev_ctx_shared *sh,
6690                                struct mlx5_flow_counter_pool *pool)
6691 {
6692         struct mlx5_flow_counter_mng *cmng = &sh->cmng;
6693         /* Resize statistic memory once used out. */
6694         if (!(pool->index % MLX5_CNT_CONTAINER_RESIZE) &&
6695             mlx5_flow_create_counter_stat_mem_mng(sh)) {
6696                 DRV_LOG(ERR, "Cannot resize counter stat mem.");
6697                 return -1;
6698         }
6699         rte_spinlock_lock(&pool->sl);
6700         pool->raw = cmng->mem_mng->raws + pool->index %
6701                     MLX5_CNT_CONTAINER_RESIZE;
6702         rte_spinlock_unlock(&pool->sl);
6703         pool->raw_hw = NULL;
6704         return 0;
6705 }
6706
6707 #define MLX5_POOL_QUERY_FREQ_US 1000000
6708
6709 /**
6710  * Set the periodic procedure for triggering asynchronous batch queries for all
6711  * the counter pools.
6712  *
6713  * @param[in] sh
6714  *   Pointer to mlx5_dev_ctx_shared object.
6715  */
6716 void
6717 mlx5_set_query_alarm(struct mlx5_dev_ctx_shared *sh)
6718 {
6719         uint32_t pools_n, us;
6720
6721         pools_n = __atomic_load_n(&sh->cmng.n_valid, __ATOMIC_RELAXED);
6722         us = MLX5_POOL_QUERY_FREQ_US / pools_n;
6723         DRV_LOG(DEBUG, "Set alarm for %u pools each %u us", pools_n, us);
6724         if (rte_eal_alarm_set(us, mlx5_flow_query_alarm, sh)) {
6725                 sh->cmng.query_thread_on = 0;
6726                 DRV_LOG(ERR, "Cannot reinitialize query alarm");
6727         } else {
6728                 sh->cmng.query_thread_on = 1;
6729         }
6730 }
6731
6732 /**
6733  * The periodic procedure for triggering asynchronous batch queries for all the
6734  * counter pools. This function is probably called by the host thread.
6735  *
6736  * @param[in] arg
6737  *   The parameter for the alarm process.
6738  */
6739 void
6740 mlx5_flow_query_alarm(void *arg)
6741 {
6742         struct mlx5_dev_ctx_shared *sh = arg;
6743         int ret;
6744         uint16_t pool_index = sh->cmng.pool_index;
6745         struct mlx5_flow_counter_mng *cmng = &sh->cmng;
6746         struct mlx5_flow_counter_pool *pool;
6747         uint16_t n_valid;
6748
6749         if (sh->cmng.pending_queries >= MLX5_MAX_PENDING_QUERIES)
6750                 goto set_alarm;
6751         rte_spinlock_lock(&cmng->pool_update_sl);
6752         pool = cmng->pools[pool_index];
6753         n_valid = cmng->n_valid;
6754         rte_spinlock_unlock(&cmng->pool_update_sl);
6755         /* Set the statistic memory to the new created pool. */
6756         if ((!pool->raw && mlx5_flow_set_counter_stat_mem(sh, pool)))
6757                 goto set_alarm;
6758         if (pool->raw_hw)
6759                 /* There is a pool query in progress. */
6760                 goto set_alarm;
6761         pool->raw_hw =
6762                 LIST_FIRST(&sh->cmng.free_stat_raws);
6763         if (!pool->raw_hw)
6764                 /* No free counter statistics raw memory. */
6765                 goto set_alarm;
6766         /*
6767          * Identify the counters released between query trigger and query
6768          * handle more efficiently. The counter released in this gap period
6769          * should wait for a new round of query as the new arrived packets
6770          * will not be taken into account.
6771          */
6772         pool->query_gen++;
6773         ret = mlx5_devx_cmd_flow_counter_query(pool->min_dcs, 0,
6774                                                MLX5_COUNTERS_PER_POOL,
6775                                                NULL, NULL,
6776                                                pool->raw_hw->mem_mng->dm->id,
6777                                                (void *)(uintptr_t)
6778                                                pool->raw_hw->data,
6779                                                sh->devx_comp,
6780                                                (uint64_t)(uintptr_t)pool);
6781         if (ret) {
6782                 DRV_LOG(ERR, "Failed to trigger asynchronous query for dcs ID"
6783                         " %d", pool->min_dcs->id);
6784                 pool->raw_hw = NULL;
6785                 goto set_alarm;
6786         }
6787         LIST_REMOVE(pool->raw_hw, next);
6788         sh->cmng.pending_queries++;
6789         pool_index++;
6790         if (pool_index >= n_valid)
6791                 pool_index = 0;
6792 set_alarm:
6793         sh->cmng.pool_index = pool_index;
6794         mlx5_set_query_alarm(sh);
6795 }
6796
6797 /**
6798  * Check and callback event for new aged flow in the counter pool
6799  *
6800  * @param[in] sh
6801  *   Pointer to mlx5_dev_ctx_shared object.
6802  * @param[in] pool
6803  *   Pointer to Current counter pool.
6804  */
6805 static void
6806 mlx5_flow_aging_check(struct mlx5_dev_ctx_shared *sh,
6807                    struct mlx5_flow_counter_pool *pool)
6808 {
6809         struct mlx5_priv *priv;
6810         struct mlx5_flow_counter *cnt;
6811         struct mlx5_age_info *age_info;
6812         struct mlx5_age_param *age_param;
6813         struct mlx5_counter_stats_raw *cur = pool->raw_hw;
6814         struct mlx5_counter_stats_raw *prev = pool->raw;
6815         const uint64_t curr_time = MLX5_CURR_TIME_SEC;
6816         const uint32_t time_delta = curr_time - pool->time_of_last_age_check;
6817         uint16_t expected = AGE_CANDIDATE;
6818         uint32_t i;
6819
6820         pool->time_of_last_age_check = curr_time;
6821         for (i = 0; i < MLX5_COUNTERS_PER_POOL; ++i) {
6822                 cnt = MLX5_POOL_GET_CNT(pool, i);
6823                 age_param = MLX5_CNT_TO_AGE(cnt);
6824                 if (__atomic_load_n(&age_param->state,
6825                                     __ATOMIC_RELAXED) != AGE_CANDIDATE)
6826                         continue;
6827                 if (cur->data[i].hits != prev->data[i].hits) {
6828                         __atomic_store_n(&age_param->sec_since_last_hit, 0,
6829                                          __ATOMIC_RELAXED);
6830                         continue;
6831                 }
6832                 if (__atomic_add_fetch(&age_param->sec_since_last_hit,
6833                                        time_delta,
6834                                        __ATOMIC_RELAXED) <= age_param->timeout)
6835                         continue;
6836                 /**
6837                  * Hold the lock first, or if between the
6838                  * state AGE_TMOUT and tailq operation the
6839                  * release happened, the release procedure
6840                  * may delete a non-existent tailq node.
6841                  */
6842                 priv = rte_eth_devices[age_param->port_id].data->dev_private;
6843                 age_info = GET_PORT_AGE_INFO(priv);
6844                 rte_spinlock_lock(&age_info->aged_sl);
6845                 if (__atomic_compare_exchange_n(&age_param->state, &expected,
6846                                                 AGE_TMOUT, false,
6847                                                 __ATOMIC_RELAXED,
6848                                                 __ATOMIC_RELAXED)) {
6849                         TAILQ_INSERT_TAIL(&age_info->aged_counters, cnt, next);
6850                         MLX5_AGE_SET(age_info, MLX5_AGE_EVENT_NEW);
6851                 }
6852                 rte_spinlock_unlock(&age_info->aged_sl);
6853         }
6854         mlx5_age_event_prepare(sh);
6855 }
6856
6857 /**
6858  * Handler for the HW respond about ready values from an asynchronous batch
6859  * query. This function is probably called by the host thread.
6860  *
6861  * @param[in] sh
6862  *   The pointer to the shared device context.
6863  * @param[in] async_id
6864  *   The Devx async ID.
6865  * @param[in] status
6866  *   The status of the completion.
6867  */
6868 void
6869 mlx5_flow_async_pool_query_handle(struct mlx5_dev_ctx_shared *sh,
6870                                   uint64_t async_id, int status)
6871 {
6872         struct mlx5_flow_counter_pool *pool =
6873                 (struct mlx5_flow_counter_pool *)(uintptr_t)async_id;
6874         struct mlx5_counter_stats_raw *raw_to_free;
6875         uint8_t query_gen = pool->query_gen ^ 1;
6876         struct mlx5_flow_counter_mng *cmng = &sh->cmng;
6877         enum mlx5_counter_type cnt_type =
6878                 pool->is_aged ? MLX5_COUNTER_TYPE_AGE :
6879                                 MLX5_COUNTER_TYPE_ORIGIN;
6880
6881         if (unlikely(status)) {
6882                 raw_to_free = pool->raw_hw;
6883         } else {
6884                 raw_to_free = pool->raw;
6885                 if (pool->is_aged)
6886                         mlx5_flow_aging_check(sh, pool);
6887                 rte_spinlock_lock(&pool->sl);
6888                 pool->raw = pool->raw_hw;
6889                 rte_spinlock_unlock(&pool->sl);
6890                 /* Be sure the new raw counters data is updated in memory. */
6891                 rte_io_wmb();
6892                 if (!TAILQ_EMPTY(&pool->counters[query_gen])) {
6893                         rte_spinlock_lock(&cmng->csl[cnt_type]);
6894                         TAILQ_CONCAT(&cmng->counters[cnt_type],
6895                                      &pool->counters[query_gen], next);
6896                         rte_spinlock_unlock(&cmng->csl[cnt_type]);
6897                 }
6898         }
6899         LIST_INSERT_HEAD(&sh->cmng.free_stat_raws, raw_to_free, next);
6900         pool->raw_hw = NULL;
6901         sh->cmng.pending_queries--;
6902 }
6903
6904 static const struct mlx5_flow_tbl_data_entry  *
6905 tunnel_mark_decode(struct rte_eth_dev *dev, uint32_t mark)
6906 {
6907         struct mlx5_priv *priv = dev->data->dev_private;
6908         struct mlx5_dev_ctx_shared *sh = priv->sh;
6909         struct mlx5_hlist_entry *he;
6910         union tunnel_offload_mark mbits = { .val = mark };
6911         union mlx5_flow_tbl_key table_key = {
6912                 {
6913                         .table_id = tunnel_id_to_flow_tbl(mbits.table_id),
6914                         .dummy = 0,
6915                         .domain = !!mbits.transfer,
6916                         .direction = 0,
6917                 }
6918         };
6919         he = mlx5_hlist_lookup(sh->flow_tbls, table_key.v64, NULL);
6920         return he ?
6921                container_of(he, struct mlx5_flow_tbl_data_entry, entry) : NULL;
6922 }
6923
6924 static void
6925 mlx5_flow_tunnel_grp2tbl_remove_cb(struct mlx5_hlist *list,
6926                                    struct mlx5_hlist_entry *entry)
6927 {
6928         struct mlx5_dev_ctx_shared *sh = list->ctx;
6929         struct tunnel_tbl_entry *tte = container_of(entry, typeof(*tte), hash);
6930
6931         mlx5_ipool_free(sh->ipool[MLX5_IPOOL_TNL_TBL_ID],
6932                         tunnel_flow_tbl_to_id(tte->flow_table));
6933         mlx5_free(tte);
6934 }
6935
6936 static struct mlx5_hlist_entry *
6937 mlx5_flow_tunnel_grp2tbl_create_cb(struct mlx5_hlist *list,
6938                                    uint64_t key __rte_unused,
6939                                    void *ctx __rte_unused)
6940 {
6941         struct mlx5_dev_ctx_shared *sh = list->ctx;
6942         struct tunnel_tbl_entry *tte;
6943
6944         tte = mlx5_malloc(MLX5_MEM_SYS | MLX5_MEM_ZERO,
6945                           sizeof(*tte), 0,
6946                           SOCKET_ID_ANY);
6947         if (!tte)
6948                 goto err;
6949         mlx5_ipool_malloc(sh->ipool[MLX5_IPOOL_TNL_TBL_ID],
6950                           &tte->flow_table);
6951         if (tte->flow_table >= MLX5_MAX_TABLES) {
6952                 DRV_LOG(ERR, "Tunnel TBL ID %d exceed max limit.",
6953                         tte->flow_table);
6954                 mlx5_ipool_free(sh->ipool[MLX5_IPOOL_TNL_TBL_ID],
6955                                 tte->flow_table);
6956                 goto err;
6957         } else if (!tte->flow_table) {
6958                 goto err;
6959         }
6960         tte->flow_table = tunnel_id_to_flow_tbl(tte->flow_table);
6961         return &tte->hash;
6962 err:
6963         if (tte)
6964                 mlx5_free(tte);
6965         return NULL;
6966 }
6967
6968 static uint32_t
6969 tunnel_flow_group_to_flow_table(struct rte_eth_dev *dev,
6970                                 const struct mlx5_flow_tunnel *tunnel,
6971                                 uint32_t group, uint32_t *table,
6972                                 struct rte_flow_error *error)
6973 {
6974         struct mlx5_hlist_entry *he;
6975         struct tunnel_tbl_entry *tte;
6976         union tunnel_tbl_key key = {
6977                 .tunnel_id = tunnel ? tunnel->tunnel_id : 0,
6978                 .group = group
6979         };
6980         struct mlx5_flow_tunnel_hub *thub = mlx5_tunnel_hub(dev);
6981         struct mlx5_hlist *group_hash;
6982
6983         group_hash = tunnel ? tunnel->groups : thub->groups;
6984         he = mlx5_hlist_register(group_hash, key.val, NULL);
6985         if (!he)
6986                 return rte_flow_error_set(error, EINVAL,
6987                                           RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
6988                                           NULL,
6989                                           "tunnel group index not supported");
6990         tte = container_of(he, typeof(*tte), hash);
6991         *table = tte->flow_table;
6992         DRV_LOG(DEBUG, "port %u tunnel %u group=%#x table=%#x",
6993                 dev->data->port_id, key.tunnel_id, group, *table);
6994         return 0;
6995 }
6996
6997 static int
6998 flow_group_to_table(uint32_t port_id, uint32_t group, uint32_t *table,
6999                     struct flow_grp_info grp_info, struct rte_flow_error *error)
7000 {
7001         if (grp_info.transfer && grp_info.external && grp_info.fdb_def_rule) {
7002                 if (group == UINT32_MAX)
7003                         return rte_flow_error_set
7004                                                 (error, EINVAL,
7005                                                  RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
7006                                                  NULL,
7007                                                  "group index not supported");
7008                 *table = group + 1;
7009         } else {
7010                 *table = group;
7011         }
7012         DRV_LOG(DEBUG, "port %u group=%#x table=%#x", port_id, group, *table);
7013         return 0;
7014 }
7015
7016 /**
7017  * Translate the rte_flow group index to HW table value.
7018  *
7019  * If tunnel offload is disabled, all group ids converted to flow table
7020  * id using the standard method.
7021  * If tunnel offload is enabled, group id can be converted using the
7022  * standard or tunnel conversion method. Group conversion method
7023  * selection depends on flags in `grp_info` parameter:
7024  * - Internal (grp_info.external == 0) groups conversion uses the
7025  *   standard method.
7026  * - Group ids in JUMP action converted with the tunnel conversion.
7027  * - Group id in rule attribute conversion depends on a rule type and
7028  *   group id value:
7029  *   ** non zero group attributes converted with the tunnel method
7030  *   ** zero group attribute in non-tunnel rule is converted using the
7031  *      standard method - there's only one root table
7032  *   ** zero group attribute in steer tunnel rule is converted with the
7033  *      standard method - single root table
7034  *   ** zero group attribute in match tunnel rule is a special OvS
7035  *      case: that value is used for portability reasons. That group
7036  *      id is converted with the tunnel conversion method.
7037  *
7038  * @param[in] dev
7039  *   Port device
7040  * @param[in] tunnel
7041  *   PMD tunnel offload object
7042  * @param[in] group
7043  *   rte_flow group index value.
7044  * @param[out] table
7045  *   HW table value.
7046  * @param[in] grp_info
7047  *   flags used for conversion
7048  * @param[out] error
7049  *   Pointer to error structure.
7050  *
7051  * @return
7052  *   0 on success, a negative errno value otherwise and rte_errno is set.
7053  */
7054 int
7055 mlx5_flow_group_to_table(struct rte_eth_dev *dev,
7056                          const struct mlx5_flow_tunnel *tunnel,
7057                          uint32_t group, uint32_t *table,
7058                          struct flow_grp_info grp_info,
7059                          struct rte_flow_error *error)
7060 {
7061         int ret;
7062         bool standard_translation;
7063
7064         if (!grp_info.skip_scale && grp_info.external &&
7065             group < MLX5_MAX_TABLES_EXTERNAL)
7066                 group *= MLX5_FLOW_TABLE_FACTOR;
7067         if (is_tunnel_offload_active(dev)) {
7068                 standard_translation = !grp_info.external ||
7069                                         grp_info.std_tbl_fix;
7070         } else {
7071                 standard_translation = true;
7072         }
7073         DRV_LOG(DEBUG,
7074                 "port %u group=%#x transfer=%d external=%d fdb_def_rule=%d translate=%s",
7075                 dev->data->port_id, group, grp_info.transfer,
7076                 grp_info.external, grp_info.fdb_def_rule,
7077                 standard_translation ? "STANDARD" : "TUNNEL");
7078         if (standard_translation)
7079                 ret = flow_group_to_table(dev->data->port_id, group, table,
7080                                           grp_info, error);
7081         else
7082                 ret = tunnel_flow_group_to_flow_table(dev, tunnel, group,
7083                                                       table, error);
7084
7085         return ret;
7086 }
7087
7088 /**
7089  * Discover availability of metadata reg_c's.
7090  *
7091  * Iteratively use test flows to check availability.
7092  *
7093  * @param[in] dev
7094  *   Pointer to the Ethernet device structure.
7095  *
7096  * @return
7097  *   0 on success, a negative errno value otherwise and rte_errno is set.
7098  */
7099 int
7100 mlx5_flow_discover_mreg_c(struct rte_eth_dev *dev)
7101 {
7102         struct mlx5_priv *priv = dev->data->dev_private;
7103         struct mlx5_dev_config *config = &priv->config;
7104         enum modify_reg idx;
7105         int n = 0;
7106
7107         /* reg_c[0] and reg_c[1] are reserved. */
7108         config->flow_mreg_c[n++] = REG_C_0;
7109         config->flow_mreg_c[n++] = REG_C_1;
7110         /* Discover availability of other reg_c's. */
7111         for (idx = REG_C_2; idx <= REG_C_7; ++idx) {
7112                 struct rte_flow_attr attr = {
7113                         .group = MLX5_FLOW_MREG_CP_TABLE_GROUP,
7114                         .priority = MLX5_FLOW_PRIO_RSVD,
7115                         .ingress = 1,
7116                 };
7117                 struct rte_flow_item items[] = {
7118                         [0] = {
7119                                 .type = RTE_FLOW_ITEM_TYPE_END,
7120                         },
7121                 };
7122                 struct rte_flow_action actions[] = {
7123                         [0] = {
7124                                 .type = (enum rte_flow_action_type)
7125                                         MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
7126                                 .conf = &(struct mlx5_flow_action_copy_mreg){
7127                                         .src = REG_C_1,
7128                                         .dst = idx,
7129                                 },
7130                         },
7131                         [1] = {
7132                                 .type = RTE_FLOW_ACTION_TYPE_JUMP,
7133                                 .conf = &(struct rte_flow_action_jump){
7134                                         .group = MLX5_FLOW_MREG_ACT_TABLE_GROUP,
7135                                 },
7136                         },
7137                         [2] = {
7138                                 .type = RTE_FLOW_ACTION_TYPE_END,
7139                         },
7140                 };
7141                 uint32_t flow_idx;
7142                 struct rte_flow *flow;
7143                 struct rte_flow_error error;
7144
7145                 if (!config->dv_flow_en)
7146                         break;
7147                 /* Create internal flow, validation skips copy action. */
7148                 flow_idx = flow_list_create(dev, NULL, &attr, items,
7149                                             actions, false, &error);
7150                 flow = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW],
7151                                       flow_idx);
7152                 if (!flow)
7153                         continue;
7154                 if (dev->data->dev_started || !flow_drv_apply(dev, flow, NULL))
7155                         config->flow_mreg_c[n++] = idx;
7156                 flow_list_destroy(dev, NULL, flow_idx);
7157         }
7158         for (; n < MLX5_MREG_C_NUM; ++n)
7159                 config->flow_mreg_c[n] = REG_NON;
7160         return 0;
7161 }
7162
7163 /**
7164  * Dump flow raw hw data to file
7165  *
7166  * @param[in] dev
7167  *    The pointer to Ethernet device.
7168  * @param[in] file
7169  *   A pointer to a file for output.
7170  * @param[out] error
7171  *   Perform verbose error reporting if not NULL. PMDs initialize this
7172  *   structure in case of error only.
7173  * @return
7174  *   0 on success, a nagative value otherwise.
7175  */
7176 int
7177 mlx5_flow_dev_dump(struct rte_eth_dev *dev,
7178                    FILE *file,
7179                    struct rte_flow_error *error __rte_unused)
7180 {
7181         struct mlx5_priv *priv = dev->data->dev_private;
7182         struct mlx5_dev_ctx_shared *sh = priv->sh;
7183
7184         if (!priv->config.dv_flow_en) {
7185                 if (fputs("device dv flow disabled\n", file) <= 0)
7186                         return -errno;
7187                 return -ENOTSUP;
7188         }
7189         return mlx5_devx_cmd_flow_dump(sh->fdb_domain, sh->rx_domain,
7190                                        sh->tx_domain, file);
7191 }
7192
7193 /**
7194  * Get aged-out flows.
7195  *
7196  * @param[in] dev
7197  *   Pointer to the Ethernet device structure.
7198  * @param[in] context
7199  *   The address of an array of pointers to the aged-out flows contexts.
7200  * @param[in] nb_countexts
7201  *   The length of context array pointers.
7202  * @param[out] error
7203  *   Perform verbose error reporting if not NULL. Initialized in case of
7204  *   error only.
7205  *
7206  * @return
7207  *   how many contexts get in success, otherwise negative errno value.
7208  *   if nb_contexts is 0, return the amount of all aged contexts.
7209  *   if nb_contexts is not 0 , return the amount of aged flows reported
7210  *   in the context array.
7211  */
7212 int
7213 mlx5_flow_get_aged_flows(struct rte_eth_dev *dev, void **contexts,
7214                         uint32_t nb_contexts, struct rte_flow_error *error)
7215 {
7216         const struct mlx5_flow_driver_ops *fops;
7217         struct rte_flow_attr attr = { .transfer = 0 };
7218
7219         if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
7220                 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7221                 return fops->get_aged_flows(dev, contexts, nb_contexts,
7222                                                     error);
7223         }
7224         DRV_LOG(ERR,
7225                 "port %u get aged flows is not supported.",
7226                  dev->data->port_id);
7227         return -ENOTSUP;
7228 }
7229
7230 /* Wrapper for driver action_validate op callback */
7231 static int
7232 flow_drv_action_validate(struct rte_eth_dev *dev,
7233                          const struct rte_flow_shared_action_conf *conf,
7234                          const struct rte_flow_action *action,
7235                          const struct mlx5_flow_driver_ops *fops,
7236                          struct rte_flow_error *error)
7237 {
7238         static const char err_msg[] = "shared action validation unsupported";
7239
7240         if (!fops->action_validate) {
7241                 DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
7242                 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
7243                                    NULL, err_msg);
7244                 return -rte_errno;
7245         }
7246         return fops->action_validate(dev, conf, action, error);
7247 }
7248
7249 /**
7250  * Destroys the shared action by handle.
7251  *
7252  * @param dev
7253  *   Pointer to Ethernet device structure.
7254  * @param[in] action
7255  *   Handle for the shared action to be destroyed.
7256  * @param[out] error
7257  *   Perform verbose error reporting if not NULL. PMDs initialize this
7258  *   structure in case of error only.
7259  *
7260  * @return
7261  *   0 on success, a negative errno value otherwise and rte_errno is set.
7262  *
7263  * @note: wrapper for driver action_create op callback.
7264  */
7265 static int
7266 mlx5_shared_action_destroy(struct rte_eth_dev *dev,
7267                            struct rte_flow_shared_action *action,
7268                            struct rte_flow_error *error)
7269 {
7270         static const char err_msg[] = "shared action destruction unsupported";
7271         struct rte_flow_attr attr = { .transfer = 0 };
7272         const struct mlx5_flow_driver_ops *fops =
7273                         flow_get_drv_ops(flow_get_drv_type(dev, &attr));
7274
7275         if (!fops->action_destroy) {
7276                 DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
7277                 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
7278                                    NULL, err_msg);
7279                 return -rte_errno;
7280         }
7281         return fops->action_destroy(dev, action, error);
7282 }
7283
7284 /* Wrapper for driver action_destroy op callback */
7285 static int
7286 flow_drv_action_update(struct rte_eth_dev *dev,
7287                        struct rte_flow_shared_action *action,
7288                        const void *action_conf,
7289                        const struct mlx5_flow_driver_ops *fops,
7290                        struct rte_flow_error *error)
7291 {
7292         static const char err_msg[] = "shared action update unsupported";
7293
7294         if (!fops->action_update) {
7295                 DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
7296                 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
7297                                    NULL, err_msg);
7298                 return -rte_errno;
7299         }
7300         return fops->action_update(dev, action, action_conf, error);
7301 }
7302
7303 /* Wrapper for driver action_destroy op callback */
7304 static int
7305 flow_drv_action_query(struct rte_eth_dev *dev,
7306                       const struct rte_flow_shared_action *action,
7307                       void *data,
7308                       const struct mlx5_flow_driver_ops *fops,
7309                       struct rte_flow_error *error)
7310 {
7311         static const char err_msg[] = "shared action query unsupported";
7312
7313         if (!fops->action_query) {
7314                 DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
7315                 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
7316                                    NULL, err_msg);
7317                 return -rte_errno;
7318         }
7319         return fops->action_query(dev, action, data, error);
7320 }
7321
7322 /**
7323  * Create shared action for reuse in multiple flow rules.
7324  *
7325  * @param dev
7326  *   Pointer to Ethernet device structure.
7327  * @param[in] action
7328  *   Action configuration for shared action creation.
7329  * @param[out] error
7330  *   Perform verbose error reporting if not NULL. PMDs initialize this
7331  *   structure in case of error only.
7332  * @return
7333  *   A valid handle in case of success, NULL otherwise and rte_errno is set.
7334  */
7335 static struct rte_flow_shared_action *
7336 mlx5_shared_action_create(struct rte_eth_dev *dev,
7337                           const struct rte_flow_shared_action_conf *conf,
7338                           const struct rte_flow_action *action,
7339                           struct rte_flow_error *error)
7340 {
7341         static const char err_msg[] = "shared action creation unsupported";
7342         struct rte_flow_attr attr = { .transfer = 0 };
7343         const struct mlx5_flow_driver_ops *fops =
7344                         flow_get_drv_ops(flow_get_drv_type(dev, &attr));
7345
7346         if (flow_drv_action_validate(dev, conf, action, fops, error))
7347                 return NULL;
7348         if (!fops->action_create) {
7349                 DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
7350                 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
7351                                    NULL, err_msg);
7352                 return NULL;
7353         }
7354         return fops->action_create(dev, conf, action, error);
7355 }
7356
7357 /**
7358  * Updates inplace the shared action configuration pointed by *action* handle
7359  * with the configuration provided as *action* argument.
7360  * The update of the shared action configuration effects all flow rules reusing
7361  * the action via handle.
7362  *
7363  * @param dev
7364  *   Pointer to Ethernet device structure.
7365  * @param[in] shared_action
7366  *   Handle for the shared action to be updated.
7367  * @param[in] action
7368  *   Action specification used to modify the action pointed by handle.
7369  *   *action* should be of same type with the action pointed by the *action*
7370  *   handle argument, otherwise considered as invalid.
7371  * @param[out] error
7372  *   Perform verbose error reporting if not NULL. PMDs initialize this
7373  *   structure in case of error only.
7374  *
7375  * @return
7376  *   0 on success, a negative errno value otherwise and rte_errno is set.
7377  */
7378 static int
7379 mlx5_shared_action_update(struct rte_eth_dev *dev,
7380                 struct rte_flow_shared_action *shared_action,
7381                 const struct rte_flow_action *action,
7382                 struct rte_flow_error *error)
7383 {
7384         struct rte_flow_attr attr = { .transfer = 0 };
7385         const struct mlx5_flow_driver_ops *fops =
7386                         flow_get_drv_ops(flow_get_drv_type(dev, &attr));
7387         int ret;
7388
7389         ret = flow_drv_action_validate(dev, NULL, action, fops, error);
7390         if (ret)
7391                 return ret;
7392         return flow_drv_action_update(dev, shared_action, action->conf, fops,
7393                                       error);
7394 }
7395
7396 /**
7397  * Query the shared action by handle.
7398  *
7399  * This function allows retrieving action-specific data such as counters.
7400  * Data is gathered by special action which may be present/referenced in
7401  * more than one flow rule definition.
7402  *
7403  * \see RTE_FLOW_ACTION_TYPE_COUNT
7404  *
7405  * @param dev
7406  *   Pointer to Ethernet device structure.
7407  * @param[in] action
7408  *   Handle for the shared action to query.
7409  * @param[in, out] data
7410  *   Pointer to storage for the associated query data type.
7411  * @param[out] error
7412  *   Perform verbose error reporting if not NULL. PMDs initialize this
7413  *   structure in case of error only.
7414  *
7415  * @return
7416  *   0 on success, a negative errno value otherwise and rte_errno is set.
7417  */
7418 static int
7419 mlx5_shared_action_query(struct rte_eth_dev *dev,
7420                          const struct rte_flow_shared_action *action,
7421                          void *data,
7422                          struct rte_flow_error *error)
7423 {
7424         struct rte_flow_attr attr = { .transfer = 0 };
7425         const struct mlx5_flow_driver_ops *fops =
7426                         flow_get_drv_ops(flow_get_drv_type(dev, &attr));
7427
7428         return flow_drv_action_query(dev, action, data, fops, error);
7429 }
7430
7431 /**
7432  * Destroy all shared actions.
7433  *
7434  * @param dev
7435  *   Pointer to Ethernet device.
7436  *
7437  * @return
7438  *   0 on success, a negative errno value otherwise and rte_errno is set.
7439  */
7440 int
7441 mlx5_shared_action_flush(struct rte_eth_dev *dev)
7442 {
7443         struct rte_flow_error error;
7444         struct mlx5_priv *priv = dev->data->dev_private;
7445         struct mlx5_shared_action_rss *action;
7446         int ret = 0;
7447         uint32_t idx;
7448
7449         ILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS],
7450                       priv->rss_shared_actions, idx, action, next) {
7451                 ret |= mlx5_shared_action_destroy(dev,
7452                        (struct rte_flow_shared_action *)(uintptr_t)idx, &error);
7453         }
7454         return ret;
7455 }
7456
7457 static void
7458 mlx5_flow_tunnel_free(struct rte_eth_dev *dev,
7459                       struct mlx5_flow_tunnel *tunnel)
7460 {
7461         struct mlx5_priv *priv = dev->data->dev_private;
7462
7463         DRV_LOG(DEBUG, "port %u release pmd tunnel id=0x%x",
7464                 dev->data->port_id, tunnel->tunnel_id);
7465         RTE_VERIFY(!__atomic_load_n(&tunnel->refctn, __ATOMIC_RELAXED));
7466         mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_TUNNEL_ID],
7467                         tunnel->tunnel_id);
7468         mlx5_hlist_destroy(tunnel->groups);
7469         mlx5_free(tunnel);
7470 }
7471
7472 static struct mlx5_flow_tunnel *
7473 mlx5_find_tunnel_id(struct rte_eth_dev *dev, uint32_t id)
7474 {
7475         struct mlx5_flow_tunnel_hub *thub = mlx5_tunnel_hub(dev);
7476         struct mlx5_flow_tunnel *tun;
7477
7478         LIST_FOREACH(tun, &thub->tunnels, chain) {
7479                 if (tun->tunnel_id == id)
7480                         break;
7481         }
7482
7483         return tun;
7484 }
7485
7486 static struct mlx5_flow_tunnel *
7487 mlx5_flow_tunnel_allocate(struct rte_eth_dev *dev,
7488                           const struct rte_flow_tunnel *app_tunnel)
7489 {
7490         struct mlx5_priv *priv = dev->data->dev_private;
7491         struct mlx5_flow_tunnel *tunnel;
7492         uint32_t id;
7493
7494         mlx5_ipool_malloc(priv->sh->ipool[MLX5_IPOOL_RSS_EXPANTION_FLOW_ID],
7495                           &id);
7496         if (id >= MLX5_MAX_TUNNELS) {
7497                 mlx5_ipool_free(priv->sh->ipool
7498                                 [MLX5_IPOOL_RSS_EXPANTION_FLOW_ID], id);
7499                 DRV_LOG(ERR, "Tunnel ID %d exceed max limit.", id);
7500                 return NULL;
7501         } else if (!id) {
7502                 return NULL;
7503         }
7504         /**
7505          * mlx5 flow tunnel is an auxlilary data structure
7506          * It's not part of IO. No need to allocate it from
7507          * huge pages pools dedicated for IO
7508          */
7509         tunnel = mlx5_malloc(MLX5_MEM_SYS | MLX5_MEM_ZERO, sizeof(*tunnel),
7510                              0, SOCKET_ID_ANY);
7511         if (!tunnel) {
7512                 mlx5_ipool_free(priv->sh->ipool
7513                                 [MLX5_IPOOL_RSS_EXPANTION_FLOW_ID], id);
7514                 return NULL;
7515         }
7516         tunnel->groups = mlx5_hlist_create("tunnel groups", 1024, 0, 0,
7517                                            mlx5_flow_tunnel_grp2tbl_create_cb,
7518                                            NULL,
7519                                            mlx5_flow_tunnel_grp2tbl_remove_cb);
7520         if (!tunnel->groups) {
7521                 mlx5_ipool_free(priv->sh->ipool
7522                                 [MLX5_IPOOL_RSS_EXPANTION_FLOW_ID], id);
7523                 mlx5_free(tunnel);
7524                 return NULL;
7525         }
7526         tunnel->groups->ctx = priv->sh;
7527         /* initiate new PMD tunnel */
7528         memcpy(&tunnel->app_tunnel, app_tunnel, sizeof(*app_tunnel));
7529         tunnel->tunnel_id = id;
7530         tunnel->action.type = (typeof(tunnel->action.type))
7531                               MLX5_RTE_FLOW_ACTION_TYPE_TUNNEL_SET;
7532         tunnel->action.conf = tunnel;
7533         tunnel->item.type = (typeof(tunnel->item.type))
7534                             MLX5_RTE_FLOW_ITEM_TYPE_TUNNEL;
7535         tunnel->item.spec = tunnel;
7536         tunnel->item.last = NULL;
7537         tunnel->item.mask = NULL;
7538
7539         DRV_LOG(DEBUG, "port %u new pmd tunnel id=0x%x",
7540                 dev->data->port_id, tunnel->tunnel_id);
7541
7542         return tunnel;
7543 }
7544
7545 static int
7546 mlx5_get_flow_tunnel(struct rte_eth_dev *dev,
7547                      const struct rte_flow_tunnel *app_tunnel,
7548                      struct mlx5_flow_tunnel **tunnel)
7549 {
7550         int ret;
7551         struct mlx5_flow_tunnel_hub *thub = mlx5_tunnel_hub(dev);
7552         struct mlx5_flow_tunnel *tun;
7553
7554         rte_spinlock_lock(&thub->sl);
7555         LIST_FOREACH(tun, &thub->tunnels, chain) {
7556                 if (!memcmp(app_tunnel, &tun->app_tunnel,
7557                             sizeof(*app_tunnel))) {
7558                         *tunnel = tun;
7559                         ret = 0;
7560                         break;
7561                 }
7562         }
7563         if (!tun) {
7564                 tun = mlx5_flow_tunnel_allocate(dev, app_tunnel);
7565                 if (tun) {
7566                         LIST_INSERT_HEAD(&thub->tunnels, tun, chain);
7567                         *tunnel = tun;
7568                 } else {
7569                         ret = -ENOMEM;
7570                 }
7571         }
7572         rte_spinlock_unlock(&thub->sl);
7573         if (tun)
7574                 __atomic_add_fetch(&tun->refctn, 1, __ATOMIC_RELAXED);
7575
7576         return ret;
7577 }
7578
7579 void mlx5_release_tunnel_hub(struct mlx5_dev_ctx_shared *sh, uint16_t port_id)
7580 {
7581         struct mlx5_flow_tunnel_hub *thub = sh->tunnel_hub;
7582
7583         if (!thub)
7584                 return;
7585         if (!LIST_EMPTY(&thub->tunnels))
7586                 DRV_LOG(WARNING, "port %u tunnels present\n", port_id);
7587         mlx5_hlist_destroy(thub->groups);
7588         mlx5_free(thub);
7589 }
7590
7591 int mlx5_alloc_tunnel_hub(struct mlx5_dev_ctx_shared *sh)
7592 {
7593         int err;
7594         struct mlx5_flow_tunnel_hub *thub;
7595
7596         thub = mlx5_malloc(MLX5_MEM_SYS | MLX5_MEM_ZERO, sizeof(*thub),
7597                            0, SOCKET_ID_ANY);
7598         if (!thub)
7599                 return -ENOMEM;
7600         LIST_INIT(&thub->tunnels);
7601         rte_spinlock_init(&thub->sl);
7602         thub->groups = mlx5_hlist_create("flow groups", MLX5_MAX_TABLES, 0,
7603                                          0, mlx5_flow_tunnel_grp2tbl_create_cb,
7604                                          NULL,
7605                                          mlx5_flow_tunnel_grp2tbl_remove_cb);
7606         if (!thub->groups) {
7607                 err = -rte_errno;
7608                 goto err;
7609         }
7610         thub->groups->ctx = sh;
7611         sh->tunnel_hub = thub;
7612
7613         return 0;
7614
7615 err:
7616         if (thub->groups)
7617                 mlx5_hlist_destroy(thub->groups);
7618         if (thub)
7619                 mlx5_free(thub);
7620         return err;
7621 }
7622
7623 #ifndef HAVE_MLX5DV_DR
7624 #define MLX5_DOMAIN_SYNC_FLOW ((1 << 0) | (1 << 1))
7625 #else
7626 #define MLX5_DOMAIN_SYNC_FLOW \
7627         (MLX5DV_DR_DOMAIN_SYNC_FLAGS_SW | MLX5DV_DR_DOMAIN_SYNC_FLAGS_HW)
7628 #endif
7629
7630 int rte_pmd_mlx5_sync_flow(uint16_t port_id, uint32_t domains)
7631 {
7632         struct rte_eth_dev *dev = &rte_eth_devices[port_id];
7633         const struct mlx5_flow_driver_ops *fops;
7634         int ret;
7635         struct rte_flow_attr attr = { .transfer = 0 };
7636
7637         fops = flow_get_drv_ops(flow_get_drv_type(dev, &attr));
7638         ret = fops->sync_domain(dev, domains, MLX5_DOMAIN_SYNC_FLOW);
7639         if (ret > 0)
7640                 ret = -ret;
7641         return ret;
7642 }