common/mlx5: remove extra line feed in log messages
[dpdk.git] / drivers / net / mlx5 / mlx5_flow.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2016 6WIND S.A.
3  * Copyright 2016 Mellanox Technologies, Ltd
4  */
5
6 #include <netinet/in.h>
7 #include <sys/queue.h>
8 #include <stdalign.h>
9 #include <stdint.h>
10 #include <string.h>
11 #include <stdbool.h>
12
13 #include <rte_common.h>
14 #include <rte_ether.h>
15 #include <ethdev_driver.h>
16 #include <rte_eal_paging.h>
17 #include <rte_flow.h>
18 #include <rte_cycles.h>
19 #include <rte_flow_driver.h>
20 #include <rte_malloc.h>
21 #include <rte_ip.h>
22
23 #include <mlx5_glue.h>
24 #include <mlx5_devx_cmds.h>
25 #include <mlx5_prm.h>
26 #include <mlx5_malloc.h>
27
28 #include "mlx5_defs.h"
29 #include "mlx5.h"
30 #include "mlx5_flow.h"
31 #include "mlx5_flow_os.h"
32 #include "mlx5_rxtx.h"
33 #include "mlx5_common_os.h"
34 #include "rte_pmd_mlx5.h"
35
36 struct tunnel_default_miss_ctx {
37         uint16_t *queue;
38         __extension__
39         union {
40                 struct rte_flow_action_rss action_rss;
41                 struct rte_flow_action_queue miss_queue;
42                 struct rte_flow_action_jump miss_jump;
43                 uint8_t raw[0];
44         };
45 };
46
47 static int
48 flow_tunnel_add_default_miss(struct rte_eth_dev *dev,
49                              struct rte_flow *flow,
50                              const struct rte_flow_attr *attr,
51                              const struct rte_flow_action *app_actions,
52                              uint32_t flow_idx,
53                              struct tunnel_default_miss_ctx *ctx,
54                              struct rte_flow_error *error);
55 static struct mlx5_flow_tunnel *
56 mlx5_find_tunnel_id(struct rte_eth_dev *dev, uint32_t id);
57 static void
58 mlx5_flow_tunnel_free(struct rte_eth_dev *dev, struct mlx5_flow_tunnel *tunnel);
59 static uint32_t
60 tunnel_flow_group_to_flow_table(struct rte_eth_dev *dev,
61                                 const struct mlx5_flow_tunnel *tunnel,
62                                 uint32_t group, uint32_t *table,
63                                 struct rte_flow_error *error);
64
65 static struct mlx5_flow_workspace *mlx5_flow_push_thread_workspace(void);
66 static void mlx5_flow_pop_thread_workspace(void);
67
68
69 /** Device flow drivers. */
70 extern const struct mlx5_flow_driver_ops mlx5_flow_verbs_drv_ops;
71
72 const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops;
73
74 const struct mlx5_flow_driver_ops *flow_drv_ops[] = {
75         [MLX5_FLOW_TYPE_MIN] = &mlx5_flow_null_drv_ops,
76 #if defined(HAVE_IBV_FLOW_DV_SUPPORT) || !defined(HAVE_INFINIBAND_VERBS_H)
77         [MLX5_FLOW_TYPE_DV] = &mlx5_flow_dv_drv_ops,
78 #endif
79         [MLX5_FLOW_TYPE_VERBS] = &mlx5_flow_verbs_drv_ops,
80         [MLX5_FLOW_TYPE_MAX] = &mlx5_flow_null_drv_ops
81 };
82
83 /** Helper macro to build input graph for mlx5_flow_expand_rss(). */
84 #define MLX5_FLOW_EXPAND_RSS_NEXT(...) \
85         (const int []){ \
86                 __VA_ARGS__, 0, \
87         }
88
89 /** Node object of input graph for mlx5_flow_expand_rss(). */
90 struct mlx5_flow_expand_node {
91         const int *const next;
92         /**<
93          * List of next node indexes. Index 0 is interpreted as a terminator.
94          */
95         const enum rte_flow_item_type type;
96         /**< Pattern item type of current node. */
97         uint64_t rss_types;
98         /**<
99          * RSS types bit-field associated with this node
100          * (see ETH_RSS_* definitions).
101          */
102 };
103
104 /** Object returned by mlx5_flow_expand_rss(). */
105 struct mlx5_flow_expand_rss {
106         uint32_t entries;
107         /**< Number of entries @p patterns and @p priorities. */
108         struct {
109                 struct rte_flow_item *pattern; /**< Expanded pattern array. */
110                 uint32_t priority; /**< Priority offset for each expansion. */
111         } entry[];
112 };
113
114 static enum rte_flow_item_type
115 mlx5_flow_expand_rss_item_complete(const struct rte_flow_item *item)
116 {
117         enum rte_flow_item_type ret = RTE_FLOW_ITEM_TYPE_VOID;
118         uint16_t ether_type = 0;
119         uint16_t ether_type_m;
120         uint8_t ip_next_proto = 0;
121         uint8_t ip_next_proto_m;
122
123         if (item == NULL || item->spec == NULL)
124                 return ret;
125         switch (item->type) {
126         case RTE_FLOW_ITEM_TYPE_ETH:
127                 if (item->mask)
128                         ether_type_m = ((const struct rte_flow_item_eth *)
129                                                 (item->mask))->type;
130                 else
131                         ether_type_m = rte_flow_item_eth_mask.type;
132                 if (ether_type_m != RTE_BE16(0xFFFF))
133                         break;
134                 ether_type = ((const struct rte_flow_item_eth *)
135                                 (item->spec))->type;
136                 if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_IPV4)
137                         ret = RTE_FLOW_ITEM_TYPE_IPV4;
138                 else if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_IPV6)
139                         ret = RTE_FLOW_ITEM_TYPE_IPV6;
140                 else if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_VLAN)
141                         ret = RTE_FLOW_ITEM_TYPE_VLAN;
142                 else
143                         ret = RTE_FLOW_ITEM_TYPE_END;
144                 break;
145         case RTE_FLOW_ITEM_TYPE_VLAN:
146                 if (item->mask)
147                         ether_type_m = ((const struct rte_flow_item_vlan *)
148                                                 (item->mask))->inner_type;
149                 else
150                         ether_type_m = rte_flow_item_vlan_mask.inner_type;
151                 if (ether_type_m != RTE_BE16(0xFFFF))
152                         break;
153                 ether_type = ((const struct rte_flow_item_vlan *)
154                                 (item->spec))->inner_type;
155                 if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_IPV4)
156                         ret = RTE_FLOW_ITEM_TYPE_IPV4;
157                 else if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_IPV6)
158                         ret = RTE_FLOW_ITEM_TYPE_IPV6;
159                 else if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_VLAN)
160                         ret = RTE_FLOW_ITEM_TYPE_VLAN;
161                 else
162                         ret = RTE_FLOW_ITEM_TYPE_END;
163                 break;
164         case RTE_FLOW_ITEM_TYPE_IPV4:
165                 if (item->mask)
166                         ip_next_proto_m = ((const struct rte_flow_item_ipv4 *)
167                                         (item->mask))->hdr.next_proto_id;
168                 else
169                         ip_next_proto_m =
170                                 rte_flow_item_ipv4_mask.hdr.next_proto_id;
171                 if (ip_next_proto_m != 0xFF)
172                         break;
173                 ip_next_proto = ((const struct rte_flow_item_ipv4 *)
174                                 (item->spec))->hdr.next_proto_id;
175                 if (ip_next_proto == IPPROTO_UDP)
176                         ret = RTE_FLOW_ITEM_TYPE_UDP;
177                 else if (ip_next_proto == IPPROTO_TCP)
178                         ret = RTE_FLOW_ITEM_TYPE_TCP;
179                 else if (ip_next_proto == IPPROTO_IP)
180                         ret = RTE_FLOW_ITEM_TYPE_IPV4;
181                 else if (ip_next_proto == IPPROTO_IPV6)
182                         ret = RTE_FLOW_ITEM_TYPE_IPV6;
183                 else
184                         ret = RTE_FLOW_ITEM_TYPE_END;
185                 break;
186         case RTE_FLOW_ITEM_TYPE_IPV6:
187                 if (item->mask)
188                         ip_next_proto_m = ((const struct rte_flow_item_ipv6 *)
189                                                 (item->mask))->hdr.proto;
190                 else
191                         ip_next_proto_m =
192                                 rte_flow_item_ipv6_mask.hdr.proto;
193                 if (ip_next_proto_m != 0xFF)
194                         break;
195                 ip_next_proto = ((const struct rte_flow_item_ipv6 *)
196                                 (item->spec))->hdr.proto;
197                 if (ip_next_proto == IPPROTO_UDP)
198                         ret = RTE_FLOW_ITEM_TYPE_UDP;
199                 else if (ip_next_proto == IPPROTO_TCP)
200                         ret = RTE_FLOW_ITEM_TYPE_TCP;
201                 else if (ip_next_proto == IPPROTO_IP)
202                         ret = RTE_FLOW_ITEM_TYPE_IPV4;
203                 else if (ip_next_proto == IPPROTO_IPV6)
204                         ret = RTE_FLOW_ITEM_TYPE_IPV6;
205                 else
206                         ret = RTE_FLOW_ITEM_TYPE_END;
207                 break;
208         default:
209                 ret = RTE_FLOW_ITEM_TYPE_VOID;
210                 break;
211         }
212         return ret;
213 }
214
215 #define MLX5_RSS_EXP_ELT_N 8
216
217 /**
218  * Expand RSS flows into several possible flows according to the RSS hash
219  * fields requested and the driver capabilities.
220  *
221  * @param[out] buf
222  *   Buffer to store the result expansion.
223  * @param[in] size
224  *   Buffer size in bytes. If 0, @p buf can be NULL.
225  * @param[in] pattern
226  *   User flow pattern.
227  * @param[in] types
228  *   RSS types to expand (see ETH_RSS_* definitions).
229  * @param[in] graph
230  *   Input graph to expand @p pattern according to @p types.
231  * @param[in] graph_root_index
232  *   Index of root node in @p graph, typically 0.
233  *
234  * @return
235  *   A positive value representing the size of @p buf in bytes regardless of
236  *   @p size on success, a negative errno value otherwise and rte_errno is
237  *   set, the following errors are defined:
238  *
239  *   -E2BIG: graph-depth @p graph is too deep.
240  */
241 static int
242 mlx5_flow_expand_rss(struct mlx5_flow_expand_rss *buf, size_t size,
243                      const struct rte_flow_item *pattern, uint64_t types,
244                      const struct mlx5_flow_expand_node graph[],
245                      int graph_root_index)
246 {
247         const struct rte_flow_item *item;
248         const struct mlx5_flow_expand_node *node = &graph[graph_root_index];
249         const int *next_node;
250         const int *stack[MLX5_RSS_EXP_ELT_N];
251         int stack_pos = 0;
252         struct rte_flow_item flow_items[MLX5_RSS_EXP_ELT_N];
253         unsigned int i;
254         size_t lsize;
255         size_t user_pattern_size = 0;
256         void *addr = NULL;
257         const struct mlx5_flow_expand_node *next = NULL;
258         struct rte_flow_item missed_item;
259         int missed = 0;
260         int elt = 0;
261         const struct rte_flow_item *last_item = NULL;
262
263         memset(&missed_item, 0, sizeof(missed_item));
264         lsize = offsetof(struct mlx5_flow_expand_rss, entry) +
265                 MLX5_RSS_EXP_ELT_N * sizeof(buf->entry[0]);
266         if (lsize <= size) {
267                 buf->entry[0].priority = 0;
268                 buf->entry[0].pattern = (void *)&buf->entry[MLX5_RSS_EXP_ELT_N];
269                 buf->entries = 0;
270                 addr = buf->entry[0].pattern;
271         }
272         for (item = pattern; item->type != RTE_FLOW_ITEM_TYPE_END; item++) {
273                 if (item->type != RTE_FLOW_ITEM_TYPE_VOID)
274                         last_item = item;
275                 for (i = 0; node->next && node->next[i]; ++i) {
276                         next = &graph[node->next[i]];
277                         if (next->type == item->type)
278                                 break;
279                 }
280                 if (next)
281                         node = next;
282                 user_pattern_size += sizeof(*item);
283         }
284         user_pattern_size += sizeof(*item); /* Handle END item. */
285         lsize += user_pattern_size;
286         /* Copy the user pattern in the first entry of the buffer. */
287         if (lsize <= size) {
288                 rte_memcpy(addr, pattern, user_pattern_size);
289                 addr = (void *)(((uintptr_t)addr) + user_pattern_size);
290                 buf->entries = 1;
291         }
292         /* Start expanding. */
293         memset(flow_items, 0, sizeof(flow_items));
294         user_pattern_size -= sizeof(*item);
295         /*
296          * Check if the last valid item has spec set, need complete pattern,
297          * and the pattern can be used for expansion.
298          */
299         missed_item.type = mlx5_flow_expand_rss_item_complete(last_item);
300         if (missed_item.type == RTE_FLOW_ITEM_TYPE_END) {
301                 /* Item type END indicates expansion is not required. */
302                 return lsize;
303         }
304         if (missed_item.type != RTE_FLOW_ITEM_TYPE_VOID) {
305                 next = NULL;
306                 missed = 1;
307                 for (i = 0; node->next && node->next[i]; ++i) {
308                         next = &graph[node->next[i]];
309                         if (next->type == missed_item.type) {
310                                 flow_items[0].type = missed_item.type;
311                                 flow_items[1].type = RTE_FLOW_ITEM_TYPE_END;
312                                 break;
313                         }
314                         next = NULL;
315                 }
316         }
317         if (next && missed) {
318                 elt = 2; /* missed item + item end. */
319                 node = next;
320                 lsize += elt * sizeof(*item) + user_pattern_size;
321                 if ((node->rss_types & types) && lsize <= size) {
322                         buf->entry[buf->entries].priority = 1;
323                         buf->entry[buf->entries].pattern = addr;
324                         buf->entries++;
325                         rte_memcpy(addr, buf->entry[0].pattern,
326                                    user_pattern_size);
327                         addr = (void *)(((uintptr_t)addr) + user_pattern_size);
328                         rte_memcpy(addr, flow_items, elt * sizeof(*item));
329                         addr = (void *)(((uintptr_t)addr) +
330                                         elt * sizeof(*item));
331                 }
332         }
333         memset(flow_items, 0, sizeof(flow_items));
334         next_node = node->next;
335         stack[stack_pos] = next_node;
336         node = next_node ? &graph[*next_node] : NULL;
337         while (node) {
338                 flow_items[stack_pos].type = node->type;
339                 if (node->rss_types & types) {
340                         /*
341                          * compute the number of items to copy from the
342                          * expansion and copy it.
343                          * When the stack_pos is 0, there are 1 element in it,
344                          * plus the addition END item.
345                          */
346                         elt = stack_pos + 2;
347                         flow_items[stack_pos + 1].type = RTE_FLOW_ITEM_TYPE_END;
348                         lsize += elt * sizeof(*item) + user_pattern_size;
349                         if (lsize <= size) {
350                                 size_t n = elt * sizeof(*item);
351
352                                 buf->entry[buf->entries].priority =
353                                         stack_pos + 1 + missed;
354                                 buf->entry[buf->entries].pattern = addr;
355                                 buf->entries++;
356                                 rte_memcpy(addr, buf->entry[0].pattern,
357                                            user_pattern_size);
358                                 addr = (void *)(((uintptr_t)addr) +
359                                                 user_pattern_size);
360                                 rte_memcpy(addr, &missed_item,
361                                            missed * sizeof(*item));
362                                 addr = (void *)(((uintptr_t)addr) +
363                                         missed * sizeof(*item));
364                                 rte_memcpy(addr, flow_items, n);
365                                 addr = (void *)(((uintptr_t)addr) + n);
366                         }
367                 }
368                 /* Go deeper. */
369                 if (node->next) {
370                         next_node = node->next;
371                         if (stack_pos++ == MLX5_RSS_EXP_ELT_N) {
372                                 rte_errno = E2BIG;
373                                 return -rte_errno;
374                         }
375                         stack[stack_pos] = next_node;
376                 } else if (*(next_node + 1)) {
377                         /* Follow up with the next possibility. */
378                         ++next_node;
379                 } else {
380                         /* Move to the next path. */
381                         if (stack_pos)
382                                 next_node = stack[--stack_pos];
383                         next_node++;
384                         stack[stack_pos] = next_node;
385                 }
386                 node = *next_node ? &graph[*next_node] : NULL;
387         };
388         /* no expanded flows but we have missed item, create one rule for it */
389         if (buf->entries == 1 && missed != 0) {
390                 elt = 2;
391                 lsize += elt * sizeof(*item) + user_pattern_size;
392                 if (lsize <= size) {
393                         buf->entry[buf->entries].priority = 1;
394                         buf->entry[buf->entries].pattern = addr;
395                         buf->entries++;
396                         flow_items[0].type = missed_item.type;
397                         flow_items[1].type = RTE_FLOW_ITEM_TYPE_END;
398                         rte_memcpy(addr, buf->entry[0].pattern,
399                                    user_pattern_size);
400                         addr = (void *)(((uintptr_t)addr) + user_pattern_size);
401                         rte_memcpy(addr, flow_items, elt * sizeof(*item));
402                 }
403         }
404         return lsize;
405 }
406
407 enum mlx5_expansion {
408         MLX5_EXPANSION_ROOT,
409         MLX5_EXPANSION_ROOT_OUTER,
410         MLX5_EXPANSION_ROOT_ETH_VLAN,
411         MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN,
412         MLX5_EXPANSION_OUTER_ETH,
413         MLX5_EXPANSION_OUTER_ETH_VLAN,
414         MLX5_EXPANSION_OUTER_VLAN,
415         MLX5_EXPANSION_OUTER_IPV4,
416         MLX5_EXPANSION_OUTER_IPV4_UDP,
417         MLX5_EXPANSION_OUTER_IPV4_TCP,
418         MLX5_EXPANSION_OUTER_IPV6,
419         MLX5_EXPANSION_OUTER_IPV6_UDP,
420         MLX5_EXPANSION_OUTER_IPV6_TCP,
421         MLX5_EXPANSION_VXLAN,
422         MLX5_EXPANSION_VXLAN_GPE,
423         MLX5_EXPANSION_GRE,
424         MLX5_EXPANSION_MPLS,
425         MLX5_EXPANSION_ETH,
426         MLX5_EXPANSION_ETH_VLAN,
427         MLX5_EXPANSION_VLAN,
428         MLX5_EXPANSION_IPV4,
429         MLX5_EXPANSION_IPV4_UDP,
430         MLX5_EXPANSION_IPV4_TCP,
431         MLX5_EXPANSION_IPV6,
432         MLX5_EXPANSION_IPV6_UDP,
433         MLX5_EXPANSION_IPV6_TCP,
434 };
435
436 /** Supported expansion of items. */
437 static const struct mlx5_flow_expand_node mlx5_support_expansion[] = {
438         [MLX5_EXPANSION_ROOT] = {
439                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
440                                                   MLX5_EXPANSION_IPV4,
441                                                   MLX5_EXPANSION_IPV6),
442                 .type = RTE_FLOW_ITEM_TYPE_END,
443         },
444         [MLX5_EXPANSION_ROOT_OUTER] = {
445                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_ETH,
446                                                   MLX5_EXPANSION_OUTER_IPV4,
447                                                   MLX5_EXPANSION_OUTER_IPV6),
448                 .type = RTE_FLOW_ITEM_TYPE_END,
449         },
450         [MLX5_EXPANSION_ROOT_ETH_VLAN] = {
451                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH_VLAN),
452                 .type = RTE_FLOW_ITEM_TYPE_END,
453         },
454         [MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN] = {
455                 .next = MLX5_FLOW_EXPAND_RSS_NEXT
456                                                 (MLX5_EXPANSION_OUTER_ETH_VLAN),
457                 .type = RTE_FLOW_ITEM_TYPE_END,
458         },
459         [MLX5_EXPANSION_OUTER_ETH] = {
460                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_IPV4,
461                                                   MLX5_EXPANSION_OUTER_IPV6,
462                                                   MLX5_EXPANSION_MPLS),
463                 .type = RTE_FLOW_ITEM_TYPE_ETH,
464                 .rss_types = 0,
465         },
466         [MLX5_EXPANSION_OUTER_ETH_VLAN] = {
467                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_VLAN),
468                 .type = RTE_FLOW_ITEM_TYPE_ETH,
469                 .rss_types = 0,
470         },
471         [MLX5_EXPANSION_OUTER_VLAN] = {
472                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_IPV4,
473                                                   MLX5_EXPANSION_OUTER_IPV6),
474                 .type = RTE_FLOW_ITEM_TYPE_VLAN,
475         },
476         [MLX5_EXPANSION_OUTER_IPV4] = {
477                 .next = MLX5_FLOW_EXPAND_RSS_NEXT
478                         (MLX5_EXPANSION_OUTER_IPV4_UDP,
479                          MLX5_EXPANSION_OUTER_IPV4_TCP,
480                          MLX5_EXPANSION_GRE,
481                          MLX5_EXPANSION_IPV4,
482                          MLX5_EXPANSION_IPV6),
483                 .type = RTE_FLOW_ITEM_TYPE_IPV4,
484                 .rss_types = ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
485                         ETH_RSS_NONFRAG_IPV4_OTHER,
486         },
487         [MLX5_EXPANSION_OUTER_IPV4_UDP] = {
488                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN,
489                                                   MLX5_EXPANSION_VXLAN_GPE),
490                 .type = RTE_FLOW_ITEM_TYPE_UDP,
491                 .rss_types = ETH_RSS_NONFRAG_IPV4_UDP,
492         },
493         [MLX5_EXPANSION_OUTER_IPV4_TCP] = {
494                 .type = RTE_FLOW_ITEM_TYPE_TCP,
495                 .rss_types = ETH_RSS_NONFRAG_IPV4_TCP,
496         },
497         [MLX5_EXPANSION_OUTER_IPV6] = {
498                 .next = MLX5_FLOW_EXPAND_RSS_NEXT
499                         (MLX5_EXPANSION_OUTER_IPV6_UDP,
500                          MLX5_EXPANSION_OUTER_IPV6_TCP,
501                          MLX5_EXPANSION_IPV4,
502                          MLX5_EXPANSION_IPV6),
503                 .type = RTE_FLOW_ITEM_TYPE_IPV6,
504                 .rss_types = ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 |
505                         ETH_RSS_NONFRAG_IPV6_OTHER,
506         },
507         [MLX5_EXPANSION_OUTER_IPV6_UDP] = {
508                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN,
509                                                   MLX5_EXPANSION_VXLAN_GPE),
510                 .type = RTE_FLOW_ITEM_TYPE_UDP,
511                 .rss_types = ETH_RSS_NONFRAG_IPV6_UDP,
512         },
513         [MLX5_EXPANSION_OUTER_IPV6_TCP] = {
514                 .type = RTE_FLOW_ITEM_TYPE_TCP,
515                 .rss_types = ETH_RSS_NONFRAG_IPV6_TCP,
516         },
517         [MLX5_EXPANSION_VXLAN] = {
518                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
519                                                   MLX5_EXPANSION_IPV4,
520                                                   MLX5_EXPANSION_IPV6),
521                 .type = RTE_FLOW_ITEM_TYPE_VXLAN,
522         },
523         [MLX5_EXPANSION_VXLAN_GPE] = {
524                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
525                                                   MLX5_EXPANSION_IPV4,
526                                                   MLX5_EXPANSION_IPV6),
527                 .type = RTE_FLOW_ITEM_TYPE_VXLAN_GPE,
528         },
529         [MLX5_EXPANSION_GRE] = {
530                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4),
531                 .type = RTE_FLOW_ITEM_TYPE_GRE,
532         },
533         [MLX5_EXPANSION_MPLS] = {
534                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
535                                                   MLX5_EXPANSION_IPV6),
536                 .type = RTE_FLOW_ITEM_TYPE_MPLS,
537         },
538         [MLX5_EXPANSION_ETH] = {
539                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
540                                                   MLX5_EXPANSION_IPV6),
541                 .type = RTE_FLOW_ITEM_TYPE_ETH,
542         },
543         [MLX5_EXPANSION_ETH_VLAN] = {
544                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VLAN),
545                 .type = RTE_FLOW_ITEM_TYPE_ETH,
546         },
547         [MLX5_EXPANSION_VLAN] = {
548                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
549                                                   MLX5_EXPANSION_IPV6),
550                 .type = RTE_FLOW_ITEM_TYPE_VLAN,
551         },
552         [MLX5_EXPANSION_IPV4] = {
553                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4_UDP,
554                                                   MLX5_EXPANSION_IPV4_TCP),
555                 .type = RTE_FLOW_ITEM_TYPE_IPV4,
556                 .rss_types = ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
557                         ETH_RSS_NONFRAG_IPV4_OTHER,
558         },
559         [MLX5_EXPANSION_IPV4_UDP] = {
560                 .type = RTE_FLOW_ITEM_TYPE_UDP,
561                 .rss_types = ETH_RSS_NONFRAG_IPV4_UDP,
562         },
563         [MLX5_EXPANSION_IPV4_TCP] = {
564                 .type = RTE_FLOW_ITEM_TYPE_TCP,
565                 .rss_types = ETH_RSS_NONFRAG_IPV4_TCP,
566         },
567         [MLX5_EXPANSION_IPV6] = {
568                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV6_UDP,
569                                                   MLX5_EXPANSION_IPV6_TCP),
570                 .type = RTE_FLOW_ITEM_TYPE_IPV6,
571                 .rss_types = ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 |
572                         ETH_RSS_NONFRAG_IPV6_OTHER,
573         },
574         [MLX5_EXPANSION_IPV6_UDP] = {
575                 .type = RTE_FLOW_ITEM_TYPE_UDP,
576                 .rss_types = ETH_RSS_NONFRAG_IPV6_UDP,
577         },
578         [MLX5_EXPANSION_IPV6_TCP] = {
579                 .type = RTE_FLOW_ITEM_TYPE_TCP,
580                 .rss_types = ETH_RSS_NONFRAG_IPV6_TCP,
581         },
582 };
583
584 static struct rte_flow_shared_action *
585 mlx5_shared_action_create(struct rte_eth_dev *dev,
586                           const struct rte_flow_shared_action_conf *conf,
587                           const struct rte_flow_action *action,
588                           struct rte_flow_error *error);
589 static int mlx5_shared_action_destroy
590                                 (struct rte_eth_dev *dev,
591                                  struct rte_flow_shared_action *shared_action,
592                                  struct rte_flow_error *error);
593 static int mlx5_shared_action_update
594                                 (struct rte_eth_dev *dev,
595                                  struct rte_flow_shared_action *shared_action,
596                                  const struct rte_flow_action *action,
597                                  struct rte_flow_error *error);
598 static int mlx5_shared_action_query
599                                 (struct rte_eth_dev *dev,
600                                  const struct rte_flow_shared_action *action,
601                                  void *data,
602                                  struct rte_flow_error *error);
603 static int
604 mlx5_flow_tunnel_decap_set(struct rte_eth_dev *dev,
605                     struct rte_flow_tunnel *app_tunnel,
606                     struct rte_flow_action **actions,
607                     uint32_t *num_of_actions,
608                     struct rte_flow_error *error);
609 static int
610 mlx5_flow_tunnel_match(struct rte_eth_dev *dev,
611                        struct rte_flow_tunnel *app_tunnel,
612                        struct rte_flow_item **items,
613                        uint32_t *num_of_items,
614                        struct rte_flow_error *error);
615 static int
616 mlx5_flow_tunnel_item_release(struct rte_eth_dev *dev,
617                               struct rte_flow_item *pmd_items,
618                               uint32_t num_items, struct rte_flow_error *err);
619 static int
620 mlx5_flow_tunnel_action_release(struct rte_eth_dev *dev,
621                                 struct rte_flow_action *pmd_actions,
622                                 uint32_t num_actions,
623                                 struct rte_flow_error *err);
624 static int
625 mlx5_flow_tunnel_get_restore_info(struct rte_eth_dev *dev,
626                                   struct rte_mbuf *m,
627                                   struct rte_flow_restore_info *info,
628                                   struct rte_flow_error *err);
629
630 static const struct rte_flow_ops mlx5_flow_ops = {
631         .validate = mlx5_flow_validate,
632         .create = mlx5_flow_create,
633         .destroy = mlx5_flow_destroy,
634         .flush = mlx5_flow_flush,
635         .isolate = mlx5_flow_isolate,
636         .query = mlx5_flow_query,
637         .dev_dump = mlx5_flow_dev_dump,
638         .get_aged_flows = mlx5_flow_get_aged_flows,
639         .shared_action_create = mlx5_shared_action_create,
640         .shared_action_destroy = mlx5_shared_action_destroy,
641         .shared_action_update = mlx5_shared_action_update,
642         .shared_action_query = mlx5_shared_action_query,
643         .tunnel_decap_set = mlx5_flow_tunnel_decap_set,
644         .tunnel_match = mlx5_flow_tunnel_match,
645         .tunnel_action_decap_release = mlx5_flow_tunnel_action_release,
646         .tunnel_item_release = mlx5_flow_tunnel_item_release,
647         .get_restore_info = mlx5_flow_tunnel_get_restore_info,
648 };
649
650 /* Tunnel information. */
651 struct mlx5_flow_tunnel_info {
652         uint64_t tunnel; /**< Tunnel bit (see MLX5_FLOW_*). */
653         uint32_t ptype; /**< Tunnel Ptype (see RTE_PTYPE_*). */
654 };
655
656 static struct mlx5_flow_tunnel_info tunnels_info[] = {
657         {
658                 .tunnel = MLX5_FLOW_LAYER_VXLAN,
659                 .ptype = RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L4_UDP,
660         },
661         {
662                 .tunnel = MLX5_FLOW_LAYER_GENEVE,
663                 .ptype = RTE_PTYPE_TUNNEL_GENEVE | RTE_PTYPE_L4_UDP,
664         },
665         {
666                 .tunnel = MLX5_FLOW_LAYER_VXLAN_GPE,
667                 .ptype = RTE_PTYPE_TUNNEL_VXLAN_GPE | RTE_PTYPE_L4_UDP,
668         },
669         {
670                 .tunnel = MLX5_FLOW_LAYER_GRE,
671                 .ptype = RTE_PTYPE_TUNNEL_GRE,
672         },
673         {
674                 .tunnel = MLX5_FLOW_LAYER_MPLS | MLX5_FLOW_LAYER_OUTER_L4_UDP,
675                 .ptype = RTE_PTYPE_TUNNEL_MPLS_IN_UDP | RTE_PTYPE_L4_UDP,
676         },
677         {
678                 .tunnel = MLX5_FLOW_LAYER_MPLS,
679                 .ptype = RTE_PTYPE_TUNNEL_MPLS_IN_GRE,
680         },
681         {
682                 .tunnel = MLX5_FLOW_LAYER_NVGRE,
683                 .ptype = RTE_PTYPE_TUNNEL_NVGRE,
684         },
685         {
686                 .tunnel = MLX5_FLOW_LAYER_IPIP,
687                 .ptype = RTE_PTYPE_TUNNEL_IP,
688         },
689         {
690                 .tunnel = MLX5_FLOW_LAYER_IPV6_ENCAP,
691                 .ptype = RTE_PTYPE_TUNNEL_IP,
692         },
693         {
694                 .tunnel = MLX5_FLOW_LAYER_GTP,
695                 .ptype = RTE_PTYPE_TUNNEL_GTPU,
696         },
697 };
698
699
700
701 /**
702  * Translate tag ID to register.
703  *
704  * @param[in] dev
705  *   Pointer to the Ethernet device structure.
706  * @param[in] feature
707  *   The feature that request the register.
708  * @param[in] id
709  *   The request register ID.
710  * @param[out] error
711  *   Error description in case of any.
712  *
713  * @return
714  *   The request register on success, a negative errno
715  *   value otherwise and rte_errno is set.
716  */
717 int
718 mlx5_flow_get_reg_id(struct rte_eth_dev *dev,
719                      enum mlx5_feature_name feature,
720                      uint32_t id,
721                      struct rte_flow_error *error)
722 {
723         struct mlx5_priv *priv = dev->data->dev_private;
724         struct mlx5_dev_config *config = &priv->config;
725         enum modify_reg start_reg;
726         bool skip_mtr_reg = false;
727
728         switch (feature) {
729         case MLX5_HAIRPIN_RX:
730                 return REG_B;
731         case MLX5_HAIRPIN_TX:
732                 return REG_A;
733         case MLX5_METADATA_RX:
734                 switch (config->dv_xmeta_en) {
735                 case MLX5_XMETA_MODE_LEGACY:
736                         return REG_B;
737                 case MLX5_XMETA_MODE_META16:
738                         return REG_C_0;
739                 case MLX5_XMETA_MODE_META32:
740                         return REG_C_1;
741                 }
742                 break;
743         case MLX5_METADATA_TX:
744                 return REG_A;
745         case MLX5_METADATA_FDB:
746                 switch (config->dv_xmeta_en) {
747                 case MLX5_XMETA_MODE_LEGACY:
748                         return REG_NON;
749                 case MLX5_XMETA_MODE_META16:
750                         return REG_C_0;
751                 case MLX5_XMETA_MODE_META32:
752                         return REG_C_1;
753                 }
754                 break;
755         case MLX5_FLOW_MARK:
756                 switch (config->dv_xmeta_en) {
757                 case MLX5_XMETA_MODE_LEGACY:
758                         return REG_NON;
759                 case MLX5_XMETA_MODE_META16:
760                         return REG_C_1;
761                 case MLX5_XMETA_MODE_META32:
762                         return REG_C_0;
763                 }
764                 break;
765         case MLX5_MTR_SFX:
766                 /*
767                  * If meter color and flow match share one register, flow match
768                  * should use the meter color register for match.
769                  */
770                 if (priv->mtr_reg_share)
771                         return priv->mtr_color_reg;
772                 else
773                         return priv->mtr_color_reg != REG_C_2 ? REG_C_2 :
774                                REG_C_3;
775         case MLX5_MTR_COLOR:
776         case MLX5_ASO_FLOW_HIT: /* Both features use the same REG_C. */
777                 MLX5_ASSERT(priv->mtr_color_reg != REG_NON);
778                 return priv->mtr_color_reg;
779         case MLX5_COPY_MARK:
780                 /*
781                  * Metadata COPY_MARK register using is in meter suffix sub
782                  * flow while with meter. It's safe to share the same register.
783                  */
784                 return priv->mtr_color_reg != REG_C_2 ? REG_C_2 : REG_C_3;
785         case MLX5_APP_TAG:
786                 /*
787                  * If meter is enable, it will engage the register for color
788                  * match and flow match. If meter color match is not using the
789                  * REG_C_2, need to skip the REG_C_x be used by meter color
790                  * match.
791                  * If meter is disable, free to use all available registers.
792                  */
793                 start_reg = priv->mtr_color_reg != REG_C_2 ? REG_C_2 :
794                             (priv->mtr_reg_share ? REG_C_3 : REG_C_4);
795                 skip_mtr_reg = !!(priv->mtr_en && start_reg == REG_C_2);
796                 if (id > (uint32_t)(REG_C_7 - start_reg))
797                         return rte_flow_error_set(error, EINVAL,
798                                                   RTE_FLOW_ERROR_TYPE_ITEM,
799                                                   NULL, "invalid tag id");
800                 if (config->flow_mreg_c[id + start_reg - REG_C_0] == REG_NON)
801                         return rte_flow_error_set(error, ENOTSUP,
802                                                   RTE_FLOW_ERROR_TYPE_ITEM,
803                                                   NULL, "unsupported tag id");
804                 /*
805                  * This case means meter is using the REG_C_x great than 2.
806                  * Take care not to conflict with meter color REG_C_x.
807                  * If the available index REG_C_y >= REG_C_x, skip the
808                  * color register.
809                  */
810                 if (skip_mtr_reg && config->flow_mreg_c
811                     [id + start_reg - REG_C_0] >= priv->mtr_color_reg) {
812                         if (id >= (uint32_t)(REG_C_7 - start_reg))
813                                 return rte_flow_error_set(error, EINVAL,
814                                                        RTE_FLOW_ERROR_TYPE_ITEM,
815                                                         NULL, "invalid tag id");
816                         if (config->flow_mreg_c
817                             [id + 1 + start_reg - REG_C_0] != REG_NON)
818                                 return config->flow_mreg_c
819                                                [id + 1 + start_reg - REG_C_0];
820                         return rte_flow_error_set(error, ENOTSUP,
821                                                   RTE_FLOW_ERROR_TYPE_ITEM,
822                                                   NULL, "unsupported tag id");
823                 }
824                 return config->flow_mreg_c[id + start_reg - REG_C_0];
825         }
826         MLX5_ASSERT(false);
827         return rte_flow_error_set(error, EINVAL,
828                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
829                                   NULL, "invalid feature name");
830 }
831
832 /**
833  * Check extensive flow metadata register support.
834  *
835  * @param dev
836  *   Pointer to rte_eth_dev structure.
837  *
838  * @return
839  *   True if device supports extensive flow metadata register, otherwise false.
840  */
841 bool
842 mlx5_flow_ext_mreg_supported(struct rte_eth_dev *dev)
843 {
844         struct mlx5_priv *priv = dev->data->dev_private;
845         struct mlx5_dev_config *config = &priv->config;
846
847         /*
848          * Having available reg_c can be regarded inclusively as supporting
849          * extensive flow metadata register, which could mean,
850          * - metadata register copy action by modify header.
851          * - 16 modify header actions is supported.
852          * - reg_c's are preserved across different domain (FDB and NIC) on
853          *   packet loopback by flow lookup miss.
854          */
855         return config->flow_mreg_c[2] != REG_NON;
856 }
857
858 /**
859  * Get the lowest priority.
860  *
861  * @param[in] dev
862  *   Pointer to the Ethernet device structure.
863  * @param[in] attributes
864  *   Pointer to device flow rule attributes.
865  *
866  * @return
867  *   The value of lowest priority of flow.
868  */
869 uint32_t
870 mlx5_get_lowest_priority(struct rte_eth_dev *dev,
871                           const struct rte_flow_attr *attr)
872 {
873         struct mlx5_priv *priv = dev->data->dev_private;
874
875         if (!attr->group && !attr->transfer)
876                 return priv->config.flow_prio - 2;
877         return MLX5_NON_ROOT_FLOW_MAX_PRIO - 1;
878 }
879
880 /**
881  * Calculate matcher priority of the flow.
882  *
883  * @param[in] dev
884  *   Pointer to the Ethernet device structure.
885  * @param[in] attr
886  *   Pointer to device flow rule attributes.
887  * @param[in] subpriority
888  *   The priority based on the items.
889  * @return
890  *   The matcher priority of the flow.
891  */
892 uint16_t
893 mlx5_get_matcher_priority(struct rte_eth_dev *dev,
894                           const struct rte_flow_attr *attr,
895                           uint32_t subpriority)
896 {
897         uint16_t priority = (uint16_t)attr->priority;
898         struct mlx5_priv *priv = dev->data->dev_private;
899
900         if (!attr->group && !attr->transfer) {
901                 if (attr->priority == MLX5_FLOW_LOWEST_PRIO_INDICATOR)
902                         priority = priv->config.flow_prio - 1;
903                 return mlx5_os_flow_adjust_priority(dev, priority, subpriority);
904         }
905         if (attr->priority == MLX5_FLOW_LOWEST_PRIO_INDICATOR)
906                 priority = MLX5_NON_ROOT_FLOW_MAX_PRIO;
907         return priority * 3 + subpriority;
908 }
909
910 /**
911  * Verify the @p item specifications (spec, last, mask) are compatible with the
912  * NIC capabilities.
913  *
914  * @param[in] item
915  *   Item specification.
916  * @param[in] mask
917  *   @p item->mask or flow default bit-masks.
918  * @param[in] nic_mask
919  *   Bit-masks covering supported fields by the NIC to compare with user mask.
920  * @param[in] size
921  *   Bit-masks size in bytes.
922  * @param[in] range_accepted
923  *   True if range of values is accepted for specific fields, false otherwise.
924  * @param[out] error
925  *   Pointer to error structure.
926  *
927  * @return
928  *   0 on success, a negative errno value otherwise and rte_errno is set.
929  */
930 int
931 mlx5_flow_item_acceptable(const struct rte_flow_item *item,
932                           const uint8_t *mask,
933                           const uint8_t *nic_mask,
934                           unsigned int size,
935                           bool range_accepted,
936                           struct rte_flow_error *error)
937 {
938         unsigned int i;
939
940         MLX5_ASSERT(nic_mask);
941         for (i = 0; i < size; ++i)
942                 if ((nic_mask[i] | mask[i]) != nic_mask[i])
943                         return rte_flow_error_set(error, ENOTSUP,
944                                                   RTE_FLOW_ERROR_TYPE_ITEM,
945                                                   item,
946                                                   "mask enables non supported"
947                                                   " bits");
948         if (!item->spec && (item->mask || item->last))
949                 return rte_flow_error_set(error, EINVAL,
950                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
951                                           "mask/last without a spec is not"
952                                           " supported");
953         if (item->spec && item->last && !range_accepted) {
954                 uint8_t spec[size];
955                 uint8_t last[size];
956                 unsigned int i;
957                 int ret;
958
959                 for (i = 0; i < size; ++i) {
960                         spec[i] = ((const uint8_t *)item->spec)[i] & mask[i];
961                         last[i] = ((const uint8_t *)item->last)[i] & mask[i];
962                 }
963                 ret = memcmp(spec, last, size);
964                 if (ret != 0)
965                         return rte_flow_error_set(error, EINVAL,
966                                                   RTE_FLOW_ERROR_TYPE_ITEM,
967                                                   item,
968                                                   "range is not valid");
969         }
970         return 0;
971 }
972
973 /**
974  * Adjust the hash fields according to the @p flow information.
975  *
976  * @param[in] dev_flow.
977  *   Pointer to the mlx5_flow.
978  * @param[in] tunnel
979  *   1 when the hash field is for a tunnel item.
980  * @param[in] layer_types
981  *   ETH_RSS_* types.
982  * @param[in] hash_fields
983  *   Item hash fields.
984  *
985  * @return
986  *   The hash fields that should be used.
987  */
988 uint64_t
989 mlx5_flow_hashfields_adjust(struct mlx5_flow_rss_desc *rss_desc,
990                             int tunnel __rte_unused, uint64_t layer_types,
991                             uint64_t hash_fields)
992 {
993 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
994         int rss_request_inner = rss_desc->level >= 2;
995
996         /* Check RSS hash level for tunnel. */
997         if (tunnel && rss_request_inner)
998                 hash_fields |= IBV_RX_HASH_INNER;
999         else if (tunnel || rss_request_inner)
1000                 return 0;
1001 #endif
1002         /* Check if requested layer matches RSS hash fields. */
1003         if (!(rss_desc->types & layer_types))
1004                 return 0;
1005         return hash_fields;
1006 }
1007
1008 /**
1009  * Lookup and set the ptype in the data Rx part.  A single Ptype can be used,
1010  * if several tunnel rules are used on this queue, the tunnel ptype will be
1011  * cleared.
1012  *
1013  * @param rxq_ctrl
1014  *   Rx queue to update.
1015  */
1016 static void
1017 flow_rxq_tunnel_ptype_update(struct mlx5_rxq_ctrl *rxq_ctrl)
1018 {
1019         unsigned int i;
1020         uint32_t tunnel_ptype = 0;
1021
1022         /* Look up for the ptype to use. */
1023         for (i = 0; i != MLX5_FLOW_TUNNEL; ++i) {
1024                 if (!rxq_ctrl->flow_tunnels_n[i])
1025                         continue;
1026                 if (!tunnel_ptype) {
1027                         tunnel_ptype = tunnels_info[i].ptype;
1028                 } else {
1029                         tunnel_ptype = 0;
1030                         break;
1031                 }
1032         }
1033         rxq_ctrl->rxq.tunnel = tunnel_ptype;
1034 }
1035
1036 /**
1037  * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) according to the devive
1038  * flow.
1039  *
1040  * @param[in] dev
1041  *   Pointer to the Ethernet device structure.
1042  * @param[in] dev_handle
1043  *   Pointer to device flow handle structure.
1044  */
1045 static void
1046 flow_drv_rxq_flags_set(struct rte_eth_dev *dev,
1047                        struct mlx5_flow_handle *dev_handle)
1048 {
1049         struct mlx5_priv *priv = dev->data->dev_private;
1050         const int mark = dev_handle->mark;
1051         const int tunnel = !!(dev_handle->layers & MLX5_FLOW_LAYER_TUNNEL);
1052         struct mlx5_ind_table_obj *ind_tbl = NULL;
1053         unsigned int i;
1054
1055         if (dev_handle->fate_action == MLX5_FLOW_FATE_QUEUE) {
1056                 struct mlx5_hrxq *hrxq;
1057
1058                 hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ],
1059                               dev_handle->rix_hrxq);
1060                 if (hrxq)
1061                         ind_tbl = hrxq->ind_table;
1062         } else if (dev_handle->fate_action == MLX5_FLOW_FATE_SHARED_RSS) {
1063                 struct mlx5_shared_action_rss *shared_rss;
1064
1065                 shared_rss = mlx5_ipool_get
1066                         (priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS],
1067                          dev_handle->rix_srss);
1068                 if (shared_rss)
1069                         ind_tbl = shared_rss->ind_tbl;
1070         }
1071         if (!ind_tbl)
1072                 return;
1073         for (i = 0; i != ind_tbl->queues_n; ++i) {
1074                 int idx = ind_tbl->queues[i];
1075                 struct mlx5_rxq_ctrl *rxq_ctrl =
1076                         container_of((*priv->rxqs)[idx],
1077                                      struct mlx5_rxq_ctrl, rxq);
1078
1079                 /*
1080                  * To support metadata register copy on Tx loopback,
1081                  * this must be always enabled (metadata may arive
1082                  * from other port - not from local flows only.
1083                  */
1084                 if (priv->config.dv_flow_en &&
1085                     priv->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY &&
1086                     mlx5_flow_ext_mreg_supported(dev)) {
1087                         rxq_ctrl->rxq.mark = 1;
1088                         rxq_ctrl->flow_mark_n = 1;
1089                 } else if (mark) {
1090                         rxq_ctrl->rxq.mark = 1;
1091                         rxq_ctrl->flow_mark_n++;
1092                 }
1093                 if (tunnel) {
1094                         unsigned int j;
1095
1096                         /* Increase the counter matching the flow. */
1097                         for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) {
1098                                 if ((tunnels_info[j].tunnel &
1099                                      dev_handle->layers) ==
1100                                     tunnels_info[j].tunnel) {
1101                                         rxq_ctrl->flow_tunnels_n[j]++;
1102                                         break;
1103                                 }
1104                         }
1105                         flow_rxq_tunnel_ptype_update(rxq_ctrl);
1106                 }
1107         }
1108 }
1109
1110 /**
1111  * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) for a flow
1112  *
1113  * @param[in] dev
1114  *   Pointer to the Ethernet device structure.
1115  * @param[in] flow
1116  *   Pointer to flow structure.
1117  */
1118 static void
1119 flow_rxq_flags_set(struct rte_eth_dev *dev, struct rte_flow *flow)
1120 {
1121         struct mlx5_priv *priv = dev->data->dev_private;
1122         uint32_t handle_idx;
1123         struct mlx5_flow_handle *dev_handle;
1124
1125         SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles,
1126                        handle_idx, dev_handle, next)
1127                 flow_drv_rxq_flags_set(dev, dev_handle);
1128 }
1129
1130 /**
1131  * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the
1132  * device flow if no other flow uses it with the same kind of request.
1133  *
1134  * @param dev
1135  *   Pointer to Ethernet device.
1136  * @param[in] dev_handle
1137  *   Pointer to the device flow handle structure.
1138  */
1139 static void
1140 flow_drv_rxq_flags_trim(struct rte_eth_dev *dev,
1141                         struct mlx5_flow_handle *dev_handle)
1142 {
1143         struct mlx5_priv *priv = dev->data->dev_private;
1144         const int mark = dev_handle->mark;
1145         const int tunnel = !!(dev_handle->layers & MLX5_FLOW_LAYER_TUNNEL);
1146         struct mlx5_ind_table_obj *ind_tbl = NULL;
1147         unsigned int i;
1148
1149         if (dev_handle->fate_action == MLX5_FLOW_FATE_QUEUE) {
1150                 struct mlx5_hrxq *hrxq;
1151
1152                 hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ],
1153                               dev_handle->rix_hrxq);
1154                 if (hrxq)
1155                         ind_tbl = hrxq->ind_table;
1156         } else if (dev_handle->fate_action == MLX5_FLOW_FATE_SHARED_RSS) {
1157                 struct mlx5_shared_action_rss *shared_rss;
1158
1159                 shared_rss = mlx5_ipool_get
1160                         (priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS],
1161                          dev_handle->rix_srss);
1162                 if (shared_rss)
1163                         ind_tbl = shared_rss->ind_tbl;
1164         }
1165         if (!ind_tbl)
1166                 return;
1167         MLX5_ASSERT(dev->data->dev_started);
1168         for (i = 0; i != ind_tbl->queues_n; ++i) {
1169                 int idx = ind_tbl->queues[i];
1170                 struct mlx5_rxq_ctrl *rxq_ctrl =
1171                         container_of((*priv->rxqs)[idx],
1172                                      struct mlx5_rxq_ctrl, rxq);
1173
1174                 if (priv->config.dv_flow_en &&
1175                     priv->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY &&
1176                     mlx5_flow_ext_mreg_supported(dev)) {
1177                         rxq_ctrl->rxq.mark = 1;
1178                         rxq_ctrl->flow_mark_n = 1;
1179                 } else if (mark) {
1180                         rxq_ctrl->flow_mark_n--;
1181                         rxq_ctrl->rxq.mark = !!rxq_ctrl->flow_mark_n;
1182                 }
1183                 if (tunnel) {
1184                         unsigned int j;
1185
1186                         /* Decrease the counter matching the flow. */
1187                         for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) {
1188                                 if ((tunnels_info[j].tunnel &
1189                                      dev_handle->layers) ==
1190                                     tunnels_info[j].tunnel) {
1191                                         rxq_ctrl->flow_tunnels_n[j]--;
1192                                         break;
1193                                 }
1194                         }
1195                         flow_rxq_tunnel_ptype_update(rxq_ctrl);
1196                 }
1197         }
1198 }
1199
1200 /**
1201  * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the
1202  * @p flow if no other flow uses it with the same kind of request.
1203  *
1204  * @param dev
1205  *   Pointer to Ethernet device.
1206  * @param[in] flow
1207  *   Pointer to the flow.
1208  */
1209 static void
1210 flow_rxq_flags_trim(struct rte_eth_dev *dev, struct rte_flow *flow)
1211 {
1212         struct mlx5_priv *priv = dev->data->dev_private;
1213         uint32_t handle_idx;
1214         struct mlx5_flow_handle *dev_handle;
1215
1216         SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles,
1217                        handle_idx, dev_handle, next)
1218                 flow_drv_rxq_flags_trim(dev, dev_handle);
1219 }
1220
1221 /**
1222  * Clear the Mark/Flag and Tunnel ptype information in all Rx queues.
1223  *
1224  * @param dev
1225  *   Pointer to Ethernet device.
1226  */
1227 static void
1228 flow_rxq_flags_clear(struct rte_eth_dev *dev)
1229 {
1230         struct mlx5_priv *priv = dev->data->dev_private;
1231         unsigned int i;
1232
1233         for (i = 0; i != priv->rxqs_n; ++i) {
1234                 struct mlx5_rxq_ctrl *rxq_ctrl;
1235                 unsigned int j;
1236
1237                 if (!(*priv->rxqs)[i])
1238                         continue;
1239                 rxq_ctrl = container_of((*priv->rxqs)[i],
1240                                         struct mlx5_rxq_ctrl, rxq);
1241                 rxq_ctrl->flow_mark_n = 0;
1242                 rxq_ctrl->rxq.mark = 0;
1243                 for (j = 0; j != MLX5_FLOW_TUNNEL; ++j)
1244                         rxq_ctrl->flow_tunnels_n[j] = 0;
1245                 rxq_ctrl->rxq.tunnel = 0;
1246         }
1247 }
1248
1249 /**
1250  * Set the Rx queue dynamic metadata (mask and offset) for a flow
1251  *
1252  * @param[in] dev
1253  *   Pointer to the Ethernet device structure.
1254  */
1255 void
1256 mlx5_flow_rxq_dynf_metadata_set(struct rte_eth_dev *dev)
1257 {
1258         struct mlx5_priv *priv = dev->data->dev_private;
1259         struct mlx5_rxq_data *data;
1260         unsigned int i;
1261
1262         for (i = 0; i != priv->rxqs_n; ++i) {
1263                 if (!(*priv->rxqs)[i])
1264                         continue;
1265                 data = (*priv->rxqs)[i];
1266                 if (!rte_flow_dynf_metadata_avail()) {
1267                         data->dynf_meta = 0;
1268                         data->flow_meta_mask = 0;
1269                         data->flow_meta_offset = -1;
1270                 } else {
1271                         data->dynf_meta = 1;
1272                         data->flow_meta_mask = rte_flow_dynf_metadata_mask;
1273                         data->flow_meta_offset = rte_flow_dynf_metadata_offs;
1274                 }
1275         }
1276 }
1277
1278 /*
1279  * return a pointer to the desired action in the list of actions.
1280  *
1281  * @param[in] actions
1282  *   The list of actions to search the action in.
1283  * @param[in] action
1284  *   The action to find.
1285  *
1286  * @return
1287  *   Pointer to the action in the list, if found. NULL otherwise.
1288  */
1289 const struct rte_flow_action *
1290 mlx5_flow_find_action(const struct rte_flow_action *actions,
1291                       enum rte_flow_action_type action)
1292 {
1293         if (actions == NULL)
1294                 return NULL;
1295         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++)
1296                 if (actions->type == action)
1297                         return actions;
1298         return NULL;
1299 }
1300
1301 /*
1302  * Validate the flag action.
1303  *
1304  * @param[in] action_flags
1305  *   Bit-fields that holds the actions detected until now.
1306  * @param[in] attr
1307  *   Attributes of flow that includes this action.
1308  * @param[out] error
1309  *   Pointer to error structure.
1310  *
1311  * @return
1312  *   0 on success, a negative errno value otherwise and rte_errno is set.
1313  */
1314 int
1315 mlx5_flow_validate_action_flag(uint64_t action_flags,
1316                                const struct rte_flow_attr *attr,
1317                                struct rte_flow_error *error)
1318 {
1319         if (action_flags & MLX5_FLOW_ACTION_MARK)
1320                 return rte_flow_error_set(error, EINVAL,
1321                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1322                                           "can't mark and flag in same flow");
1323         if (action_flags & MLX5_FLOW_ACTION_FLAG)
1324                 return rte_flow_error_set(error, EINVAL,
1325                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1326                                           "can't have 2 flag"
1327                                           " actions in same flow");
1328         if (attr->egress)
1329                 return rte_flow_error_set(error, ENOTSUP,
1330                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1331                                           "flag action not supported for "
1332                                           "egress");
1333         return 0;
1334 }
1335
1336 /*
1337  * Validate the mark action.
1338  *
1339  * @param[in] action
1340  *   Pointer to the queue action.
1341  * @param[in] action_flags
1342  *   Bit-fields that holds the actions detected until now.
1343  * @param[in] attr
1344  *   Attributes of flow that includes this action.
1345  * @param[out] error
1346  *   Pointer to error structure.
1347  *
1348  * @return
1349  *   0 on success, a negative errno value otherwise and rte_errno is set.
1350  */
1351 int
1352 mlx5_flow_validate_action_mark(const struct rte_flow_action *action,
1353                                uint64_t action_flags,
1354                                const struct rte_flow_attr *attr,
1355                                struct rte_flow_error *error)
1356 {
1357         const struct rte_flow_action_mark *mark = action->conf;
1358
1359         if (!mark)
1360                 return rte_flow_error_set(error, EINVAL,
1361                                           RTE_FLOW_ERROR_TYPE_ACTION,
1362                                           action,
1363                                           "configuration cannot be null");
1364         if (mark->id >= MLX5_FLOW_MARK_MAX)
1365                 return rte_flow_error_set(error, EINVAL,
1366                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1367                                           &mark->id,
1368                                           "mark id must in 0 <= id < "
1369                                           RTE_STR(MLX5_FLOW_MARK_MAX));
1370         if (action_flags & MLX5_FLOW_ACTION_FLAG)
1371                 return rte_flow_error_set(error, EINVAL,
1372                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1373                                           "can't flag and mark in same flow");
1374         if (action_flags & MLX5_FLOW_ACTION_MARK)
1375                 return rte_flow_error_set(error, EINVAL,
1376                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1377                                           "can't have 2 mark actions in same"
1378                                           " flow");
1379         if (attr->egress)
1380                 return rte_flow_error_set(error, ENOTSUP,
1381                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1382                                           "mark action not supported for "
1383                                           "egress");
1384         return 0;
1385 }
1386
1387 /*
1388  * Validate the drop action.
1389  *
1390  * @param[in] action_flags
1391  *   Bit-fields that holds the actions detected until now.
1392  * @param[in] attr
1393  *   Attributes of flow that includes this action.
1394  * @param[out] error
1395  *   Pointer to error structure.
1396  *
1397  * @return
1398  *   0 on success, a negative errno value otherwise and rte_errno is set.
1399  */
1400 int
1401 mlx5_flow_validate_action_drop(uint64_t action_flags __rte_unused,
1402                                const struct rte_flow_attr *attr,
1403                                struct rte_flow_error *error)
1404 {
1405         if (attr->egress)
1406                 return rte_flow_error_set(error, ENOTSUP,
1407                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1408                                           "drop action not supported for "
1409                                           "egress");
1410         return 0;
1411 }
1412
1413 /*
1414  * Validate the queue action.
1415  *
1416  * @param[in] action
1417  *   Pointer to the queue action.
1418  * @param[in] action_flags
1419  *   Bit-fields that holds the actions detected until now.
1420  * @param[in] dev
1421  *   Pointer to the Ethernet device structure.
1422  * @param[in] attr
1423  *   Attributes of flow that includes this action.
1424  * @param[out] error
1425  *   Pointer to error structure.
1426  *
1427  * @return
1428  *   0 on success, a negative errno value otherwise and rte_errno is set.
1429  */
1430 int
1431 mlx5_flow_validate_action_queue(const struct rte_flow_action *action,
1432                                 uint64_t action_flags,
1433                                 struct rte_eth_dev *dev,
1434                                 const struct rte_flow_attr *attr,
1435                                 struct rte_flow_error *error)
1436 {
1437         struct mlx5_priv *priv = dev->data->dev_private;
1438         const struct rte_flow_action_queue *queue = action->conf;
1439
1440         if (action_flags & MLX5_FLOW_FATE_ACTIONS)
1441                 return rte_flow_error_set(error, EINVAL,
1442                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1443                                           "can't have 2 fate actions in"
1444                                           " same flow");
1445         if (!priv->rxqs_n)
1446                 return rte_flow_error_set(error, EINVAL,
1447                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1448                                           NULL, "No Rx queues configured");
1449         if (queue->index >= priv->rxqs_n)
1450                 return rte_flow_error_set(error, EINVAL,
1451                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1452                                           &queue->index,
1453                                           "queue index out of range");
1454         if (!(*priv->rxqs)[queue->index])
1455                 return rte_flow_error_set(error, EINVAL,
1456                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1457                                           &queue->index,
1458                                           "queue is not configured");
1459         if (attr->egress)
1460                 return rte_flow_error_set(error, ENOTSUP,
1461                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1462                                           "queue action not supported for "
1463                                           "egress");
1464         return 0;
1465 }
1466
1467 /*
1468  * Validate the rss action.
1469  *
1470  * @param[in] dev
1471  *   Pointer to the Ethernet device structure.
1472  * @param[in] action
1473  *   Pointer to the queue action.
1474  * @param[out] error
1475  *   Pointer to error structure.
1476  *
1477  * @return
1478  *   0 on success, a negative errno value otherwise and rte_errno is set.
1479  */
1480 int
1481 mlx5_validate_action_rss(struct rte_eth_dev *dev,
1482                          const struct rte_flow_action *action,
1483                          struct rte_flow_error *error)
1484 {
1485         struct mlx5_priv *priv = dev->data->dev_private;
1486         const struct rte_flow_action_rss *rss = action->conf;
1487         enum mlx5_rxq_type rxq_type = MLX5_RXQ_TYPE_UNDEFINED;
1488         unsigned int i;
1489
1490         if (rss->func != RTE_ETH_HASH_FUNCTION_DEFAULT &&
1491             rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ)
1492                 return rte_flow_error_set(error, ENOTSUP,
1493                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1494                                           &rss->func,
1495                                           "RSS hash function not supported");
1496 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
1497         if (rss->level > 2)
1498 #else
1499         if (rss->level > 1)
1500 #endif
1501                 return rte_flow_error_set(error, ENOTSUP,
1502                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1503                                           &rss->level,
1504                                           "tunnel RSS is not supported");
1505         /* allow RSS key_len 0 in case of NULL (default) RSS key. */
1506         if (rss->key_len == 0 && rss->key != NULL)
1507                 return rte_flow_error_set(error, ENOTSUP,
1508                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1509                                           &rss->key_len,
1510                                           "RSS hash key length 0");
1511         if (rss->key_len > 0 && rss->key_len < MLX5_RSS_HASH_KEY_LEN)
1512                 return rte_flow_error_set(error, ENOTSUP,
1513                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1514                                           &rss->key_len,
1515                                           "RSS hash key too small");
1516         if (rss->key_len > MLX5_RSS_HASH_KEY_LEN)
1517                 return rte_flow_error_set(error, ENOTSUP,
1518                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1519                                           &rss->key_len,
1520                                           "RSS hash key too large");
1521         if (rss->queue_num > priv->config.ind_table_max_size)
1522                 return rte_flow_error_set(error, ENOTSUP,
1523                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1524                                           &rss->queue_num,
1525                                           "number of queues too large");
1526         if (rss->types & MLX5_RSS_HF_MASK)
1527                 return rte_flow_error_set(error, ENOTSUP,
1528                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1529                                           &rss->types,
1530                                           "some RSS protocols are not"
1531                                           " supported");
1532         if ((rss->types & (ETH_RSS_L3_SRC_ONLY | ETH_RSS_L3_DST_ONLY)) &&
1533             !(rss->types & ETH_RSS_IP))
1534                 return rte_flow_error_set(error, EINVAL,
1535                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
1536                                           "L3 partial RSS requested but L3 RSS"
1537                                           " type not specified");
1538         if ((rss->types & (ETH_RSS_L4_SRC_ONLY | ETH_RSS_L4_DST_ONLY)) &&
1539             !(rss->types & (ETH_RSS_UDP | ETH_RSS_TCP)))
1540                 return rte_flow_error_set(error, EINVAL,
1541                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
1542                                           "L4 partial RSS requested but L4 RSS"
1543                                           " type not specified");
1544         if (!priv->rxqs_n)
1545                 return rte_flow_error_set(error, EINVAL,
1546                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1547                                           NULL, "No Rx queues configured");
1548         if (!rss->queue_num)
1549                 return rte_flow_error_set(error, EINVAL,
1550                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1551                                           NULL, "No queues configured");
1552         for (i = 0; i != rss->queue_num; ++i) {
1553                 struct mlx5_rxq_ctrl *rxq_ctrl;
1554
1555                 if (rss->queue[i] >= priv->rxqs_n)
1556                         return rte_flow_error_set
1557                                 (error, EINVAL,
1558                                  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1559                                  &rss->queue[i], "queue index out of range");
1560                 if (!(*priv->rxqs)[rss->queue[i]])
1561                         return rte_flow_error_set
1562                                 (error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1563                                  &rss->queue[i], "queue is not configured");
1564                 rxq_ctrl = container_of((*priv->rxqs)[rss->queue[i]],
1565                                         struct mlx5_rxq_ctrl, rxq);
1566                 if (i == 0)
1567                         rxq_type = rxq_ctrl->type;
1568                 if (rxq_type != rxq_ctrl->type)
1569                         return rte_flow_error_set
1570                                 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1571                                  &rss->queue[i],
1572                                  "combining hairpin and regular RSS queues is not supported");
1573         }
1574         return 0;
1575 }
1576
1577 /*
1578  * Validate the rss action.
1579  *
1580  * @param[in] action
1581  *   Pointer to the queue action.
1582  * @param[in] action_flags
1583  *   Bit-fields that holds the actions detected until now.
1584  * @param[in] dev
1585  *   Pointer to the Ethernet device structure.
1586  * @param[in] attr
1587  *   Attributes of flow that includes this action.
1588  * @param[in] item_flags
1589  *   Items that were detected.
1590  * @param[out] error
1591  *   Pointer to error structure.
1592  *
1593  * @return
1594  *   0 on success, a negative errno value otherwise and rte_errno is set.
1595  */
1596 int
1597 mlx5_flow_validate_action_rss(const struct rte_flow_action *action,
1598                               uint64_t action_flags,
1599                               struct rte_eth_dev *dev,
1600                               const struct rte_flow_attr *attr,
1601                               uint64_t item_flags,
1602                               struct rte_flow_error *error)
1603 {
1604         const struct rte_flow_action_rss *rss = action->conf;
1605         int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1606         int ret;
1607
1608         if (action_flags & MLX5_FLOW_FATE_ACTIONS)
1609                 return rte_flow_error_set(error, EINVAL,
1610                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1611                                           "can't have 2 fate actions"
1612                                           " in same flow");
1613         ret = mlx5_validate_action_rss(dev, action, error);
1614         if (ret)
1615                 return ret;
1616         if (attr->egress)
1617                 return rte_flow_error_set(error, ENOTSUP,
1618                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1619                                           "rss action not supported for "
1620                                           "egress");
1621         if (rss->level > 1 && !tunnel)
1622                 return rte_flow_error_set(error, EINVAL,
1623                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
1624                                           "inner RSS is not supported for "
1625                                           "non-tunnel flows");
1626         if ((item_flags & MLX5_FLOW_LAYER_ECPRI) &&
1627             !(item_flags & MLX5_FLOW_LAYER_INNER_L4_UDP)) {
1628                 return rte_flow_error_set(error, EINVAL,
1629                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
1630                                           "RSS on eCPRI is not supported now");
1631         }
1632         return 0;
1633 }
1634
1635 /*
1636  * Validate the default miss action.
1637  *
1638  * @param[in] action_flags
1639  *   Bit-fields that holds the actions detected until now.
1640  * @param[out] error
1641  *   Pointer to error structure.
1642  *
1643  * @return
1644  *   0 on success, a negative errno value otherwise and rte_errno is set.
1645  */
1646 int
1647 mlx5_flow_validate_action_default_miss(uint64_t action_flags,
1648                                 const struct rte_flow_attr *attr,
1649                                 struct rte_flow_error *error)
1650 {
1651         if (action_flags & MLX5_FLOW_FATE_ACTIONS)
1652                 return rte_flow_error_set(error, EINVAL,
1653                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1654                                           "can't have 2 fate actions in"
1655                                           " same flow");
1656         if (attr->egress)
1657                 return rte_flow_error_set(error, ENOTSUP,
1658                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1659                                           "default miss action not supported "
1660                                           "for egress");
1661         if (attr->group)
1662                 return rte_flow_error_set(error, ENOTSUP,
1663                                           RTE_FLOW_ERROR_TYPE_ATTR_GROUP, NULL,
1664                                           "only group 0 is supported");
1665         if (attr->transfer)
1666                 return rte_flow_error_set(error, ENOTSUP,
1667                                           RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
1668                                           NULL, "transfer is not supported");
1669         return 0;
1670 }
1671
1672 /*
1673  * Validate the count action.
1674  *
1675  * @param[in] dev
1676  *   Pointer to the Ethernet device structure.
1677  * @param[in] attr
1678  *   Attributes of flow that includes this action.
1679  * @param[out] error
1680  *   Pointer to error structure.
1681  *
1682  * @return
1683  *   0 on success, a negative errno value otherwise and rte_errno is set.
1684  */
1685 int
1686 mlx5_flow_validate_action_count(struct rte_eth_dev *dev __rte_unused,
1687                                 const struct rte_flow_attr *attr,
1688                                 struct rte_flow_error *error)
1689 {
1690         if (attr->egress)
1691                 return rte_flow_error_set(error, ENOTSUP,
1692                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1693                                           "count action not supported for "
1694                                           "egress");
1695         return 0;
1696 }
1697
1698 /**
1699  * Verify the @p attributes will be correctly understood by the NIC and store
1700  * them in the @p flow if everything is correct.
1701  *
1702  * @param[in] dev
1703  *   Pointer to the Ethernet device structure.
1704  * @param[in] attributes
1705  *   Pointer to flow attributes
1706  * @param[out] error
1707  *   Pointer to error structure.
1708  *
1709  * @return
1710  *   0 on success, a negative errno value otherwise and rte_errno is set.
1711  */
1712 int
1713 mlx5_flow_validate_attributes(struct rte_eth_dev *dev,
1714                               const struct rte_flow_attr *attributes,
1715                               struct rte_flow_error *error)
1716 {
1717         struct mlx5_priv *priv = dev->data->dev_private;
1718         uint32_t priority_max = priv->config.flow_prio - 1;
1719
1720         if (attributes->group)
1721                 return rte_flow_error_set(error, ENOTSUP,
1722                                           RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
1723                                           NULL, "groups is not supported");
1724         if (attributes->priority != MLX5_FLOW_LOWEST_PRIO_INDICATOR &&
1725             attributes->priority >= priority_max)
1726                 return rte_flow_error_set(error, ENOTSUP,
1727                                           RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
1728                                           NULL, "priority out of range");
1729         if (attributes->egress)
1730                 return rte_flow_error_set(error, ENOTSUP,
1731                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1732                                           "egress is not supported");
1733         if (attributes->transfer && !priv->config.dv_esw_en)
1734                 return rte_flow_error_set(error, ENOTSUP,
1735                                           RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
1736                                           NULL, "transfer is not supported");
1737         if (!attributes->ingress)
1738                 return rte_flow_error_set(error, EINVAL,
1739                                           RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
1740                                           NULL,
1741                                           "ingress attribute is mandatory");
1742         return 0;
1743 }
1744
1745 /**
1746  * Validate ICMP6 item.
1747  *
1748  * @param[in] item
1749  *   Item specification.
1750  * @param[in] item_flags
1751  *   Bit-fields that holds the items detected until now.
1752  * @param[in] ext_vlan_sup
1753  *   Whether extended VLAN features are supported or not.
1754  * @param[out] error
1755  *   Pointer to error structure.
1756  *
1757  * @return
1758  *   0 on success, a negative errno value otherwise and rte_errno is set.
1759  */
1760 int
1761 mlx5_flow_validate_item_icmp6(const struct rte_flow_item *item,
1762                                uint64_t item_flags,
1763                                uint8_t target_protocol,
1764                                struct rte_flow_error *error)
1765 {
1766         const struct rte_flow_item_icmp6 *mask = item->mask;
1767         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1768         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
1769                                       MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1770         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
1771                                       MLX5_FLOW_LAYER_OUTER_L4;
1772         int ret;
1773
1774         if (target_protocol != 0xFF && target_protocol != IPPROTO_ICMPV6)
1775                 return rte_flow_error_set(error, EINVAL,
1776                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1777                                           "protocol filtering not compatible"
1778                                           " with ICMP6 layer");
1779         if (!(item_flags & l3m))
1780                 return rte_flow_error_set(error, EINVAL,
1781                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1782                                           "IPv6 is mandatory to filter on"
1783                                           " ICMP6");
1784         if (item_flags & l4m)
1785                 return rte_flow_error_set(error, EINVAL,
1786                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1787                                           "multiple L4 layers not supported");
1788         if (!mask)
1789                 mask = &rte_flow_item_icmp6_mask;
1790         ret = mlx5_flow_item_acceptable
1791                 (item, (const uint8_t *)mask,
1792                  (const uint8_t *)&rte_flow_item_icmp6_mask,
1793                  sizeof(struct rte_flow_item_icmp6),
1794                  MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
1795         if (ret < 0)
1796                 return ret;
1797         return 0;
1798 }
1799
1800 /**
1801  * Validate ICMP item.
1802  *
1803  * @param[in] item
1804  *   Item specification.
1805  * @param[in] item_flags
1806  *   Bit-fields that holds the items detected until now.
1807  * @param[out] error
1808  *   Pointer to error structure.
1809  *
1810  * @return
1811  *   0 on success, a negative errno value otherwise and rte_errno is set.
1812  */
1813 int
1814 mlx5_flow_validate_item_icmp(const struct rte_flow_item *item,
1815                              uint64_t item_flags,
1816                              uint8_t target_protocol,
1817                              struct rte_flow_error *error)
1818 {
1819         const struct rte_flow_item_icmp *mask = item->mask;
1820         const struct rte_flow_item_icmp nic_mask = {
1821                 .hdr.icmp_type = 0xff,
1822                 .hdr.icmp_code = 0xff,
1823                 .hdr.icmp_ident = RTE_BE16(0xffff),
1824                 .hdr.icmp_seq_nb = RTE_BE16(0xffff),
1825         };
1826         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1827         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
1828                                       MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1829         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
1830                                       MLX5_FLOW_LAYER_OUTER_L4;
1831         int ret;
1832
1833         if (target_protocol != 0xFF && target_protocol != IPPROTO_ICMP)
1834                 return rte_flow_error_set(error, EINVAL,
1835                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1836                                           "protocol filtering not compatible"
1837                                           " with ICMP layer");
1838         if (!(item_flags & l3m))
1839                 return rte_flow_error_set(error, EINVAL,
1840                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1841                                           "IPv4 is mandatory to filter"
1842                                           " on ICMP");
1843         if (item_flags & l4m)
1844                 return rte_flow_error_set(error, EINVAL,
1845                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1846                                           "multiple L4 layers not supported");
1847         if (!mask)
1848                 mask = &nic_mask;
1849         ret = mlx5_flow_item_acceptable
1850                 (item, (const uint8_t *)mask,
1851                  (const uint8_t *)&nic_mask,
1852                  sizeof(struct rte_flow_item_icmp),
1853                  MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
1854         if (ret < 0)
1855                 return ret;
1856         return 0;
1857 }
1858
1859 /**
1860  * Validate Ethernet item.
1861  *
1862  * @param[in] item
1863  *   Item specification.
1864  * @param[in] item_flags
1865  *   Bit-fields that holds the items detected until now.
1866  * @param[out] error
1867  *   Pointer to error structure.
1868  *
1869  * @return
1870  *   0 on success, a negative errno value otherwise and rte_errno is set.
1871  */
1872 int
1873 mlx5_flow_validate_item_eth(const struct rte_flow_item *item,
1874                             uint64_t item_flags, bool ext_vlan_sup,
1875                             struct rte_flow_error *error)
1876 {
1877         const struct rte_flow_item_eth *mask = item->mask;
1878         const struct rte_flow_item_eth nic_mask = {
1879                 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1880                 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1881                 .type = RTE_BE16(0xffff),
1882                 .has_vlan = ext_vlan_sup ? 1 : 0,
1883         };
1884         int ret;
1885         int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1886         const uint64_t ethm = tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
1887                                        MLX5_FLOW_LAYER_OUTER_L2;
1888
1889         if (item_flags & ethm)
1890                 return rte_flow_error_set(error, ENOTSUP,
1891                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1892                                           "multiple L2 layers not supported");
1893         if ((!tunnel && (item_flags & MLX5_FLOW_LAYER_OUTER_L3)) ||
1894             (tunnel && (item_flags & MLX5_FLOW_LAYER_INNER_L3)))
1895                 return rte_flow_error_set(error, EINVAL,
1896                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1897                                           "L2 layer should not follow "
1898                                           "L3 layers");
1899         if ((!tunnel && (item_flags & MLX5_FLOW_LAYER_OUTER_VLAN)) ||
1900             (tunnel && (item_flags & MLX5_FLOW_LAYER_INNER_VLAN)))
1901                 return rte_flow_error_set(error, EINVAL,
1902                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1903                                           "L2 layer should not follow VLAN");
1904         if (!mask)
1905                 mask = &rte_flow_item_eth_mask;
1906         ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
1907                                         (const uint8_t *)&nic_mask,
1908                                         sizeof(struct rte_flow_item_eth),
1909                                         MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
1910         return ret;
1911 }
1912
1913 /**
1914  * Validate VLAN item.
1915  *
1916  * @param[in] item
1917  *   Item specification.
1918  * @param[in] item_flags
1919  *   Bit-fields that holds the items detected until now.
1920  * @param[in] dev
1921  *   Ethernet device flow is being created on.
1922  * @param[out] error
1923  *   Pointer to error structure.
1924  *
1925  * @return
1926  *   0 on success, a negative errno value otherwise and rte_errno is set.
1927  */
1928 int
1929 mlx5_flow_validate_item_vlan(const struct rte_flow_item *item,
1930                              uint64_t item_flags,
1931                              struct rte_eth_dev *dev,
1932                              struct rte_flow_error *error)
1933 {
1934         const struct rte_flow_item_vlan *spec = item->spec;
1935         const struct rte_flow_item_vlan *mask = item->mask;
1936         const struct rte_flow_item_vlan nic_mask = {
1937                 .tci = RTE_BE16(UINT16_MAX),
1938                 .inner_type = RTE_BE16(UINT16_MAX),
1939         };
1940         uint16_t vlan_tag = 0;
1941         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1942         int ret;
1943         const uint64_t l34m = tunnel ? (MLX5_FLOW_LAYER_INNER_L3 |
1944                                         MLX5_FLOW_LAYER_INNER_L4) :
1945                                        (MLX5_FLOW_LAYER_OUTER_L3 |
1946                                         MLX5_FLOW_LAYER_OUTER_L4);
1947         const uint64_t vlanm = tunnel ? MLX5_FLOW_LAYER_INNER_VLAN :
1948                                         MLX5_FLOW_LAYER_OUTER_VLAN;
1949
1950         if (item_flags & vlanm)
1951                 return rte_flow_error_set(error, EINVAL,
1952                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1953                                           "multiple VLAN layers not supported");
1954         else if ((item_flags & l34m) != 0)
1955                 return rte_flow_error_set(error, EINVAL,
1956                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1957                                           "VLAN cannot follow L3/L4 layer");
1958         if (!mask)
1959                 mask = &rte_flow_item_vlan_mask;
1960         ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
1961                                         (const uint8_t *)&nic_mask,
1962                                         sizeof(struct rte_flow_item_vlan),
1963                                         MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
1964         if (ret)
1965                 return ret;
1966         if (!tunnel && mask->tci != RTE_BE16(0x0fff)) {
1967                 struct mlx5_priv *priv = dev->data->dev_private;
1968
1969                 if (priv->vmwa_context) {
1970                         /*
1971                          * Non-NULL context means we have a virtual machine
1972                          * and SR-IOV enabled, we have to create VLAN interface
1973                          * to make hypervisor to setup E-Switch vport
1974                          * context correctly. We avoid creating the multiple
1975                          * VLAN interfaces, so we cannot support VLAN tag mask.
1976                          */
1977                         return rte_flow_error_set(error, EINVAL,
1978                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1979                                                   item,
1980                                                   "VLAN tag mask is not"
1981                                                   " supported in virtual"
1982                                                   " environment");
1983                 }
1984         }
1985         if (spec) {
1986                 vlan_tag = spec->tci;
1987                 vlan_tag &= mask->tci;
1988         }
1989         /*
1990          * From verbs perspective an empty VLAN is equivalent
1991          * to a packet without VLAN layer.
1992          */
1993         if (!vlan_tag)
1994                 return rte_flow_error_set(error, EINVAL,
1995                                           RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
1996                                           item->spec,
1997                                           "VLAN cannot be empty");
1998         return 0;
1999 }
2000
2001 /**
2002  * Validate IPV4 item.
2003  *
2004  * @param[in] item
2005  *   Item specification.
2006  * @param[in] item_flags
2007  *   Bit-fields that holds the items detected until now.
2008  * @param[in] last_item
2009  *   Previous validated item in the pattern items.
2010  * @param[in] ether_type
2011  *   Type in the ethernet layer header (including dot1q).
2012  * @param[in] acc_mask
2013  *   Acceptable mask, if NULL default internal default mask
2014  *   will be used to check whether item fields are supported.
2015  * @param[in] range_accepted
2016  *   True if range of values is accepted for specific fields, false otherwise.
2017  * @param[out] error
2018  *   Pointer to error structure.
2019  *
2020  * @return
2021  *   0 on success, a negative errno value otherwise and rte_errno is set.
2022  */
2023 int
2024 mlx5_flow_validate_item_ipv4(const struct rte_flow_item *item,
2025                              uint64_t item_flags,
2026                              uint64_t last_item,
2027                              uint16_t ether_type,
2028                              const struct rte_flow_item_ipv4 *acc_mask,
2029                              bool range_accepted,
2030                              struct rte_flow_error *error)
2031 {
2032         const struct rte_flow_item_ipv4 *mask = item->mask;
2033         const struct rte_flow_item_ipv4 *spec = item->spec;
2034         const struct rte_flow_item_ipv4 nic_mask = {
2035                 .hdr = {
2036                         .src_addr = RTE_BE32(0xffffffff),
2037                         .dst_addr = RTE_BE32(0xffffffff),
2038                         .type_of_service = 0xff,
2039                         .next_proto_id = 0xff,
2040                 },
2041         };
2042         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2043         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
2044                                       MLX5_FLOW_LAYER_OUTER_L3;
2045         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2046                                       MLX5_FLOW_LAYER_OUTER_L4;
2047         int ret;
2048         uint8_t next_proto = 0xFF;
2049         const uint64_t l2_vlan = (MLX5_FLOW_LAYER_L2 |
2050                                   MLX5_FLOW_LAYER_OUTER_VLAN |
2051                                   MLX5_FLOW_LAYER_INNER_VLAN);
2052
2053         if ((last_item & l2_vlan) && ether_type &&
2054             ether_type != RTE_ETHER_TYPE_IPV4)
2055                 return rte_flow_error_set(error, EINVAL,
2056                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2057                                           "IPv4 cannot follow L2/VLAN layer "
2058                                           "which ether type is not IPv4");
2059         if (item_flags & MLX5_FLOW_LAYER_IPIP) {
2060                 if (mask && spec)
2061                         next_proto = mask->hdr.next_proto_id &
2062                                      spec->hdr.next_proto_id;
2063                 if (next_proto == IPPROTO_IPIP || next_proto == IPPROTO_IPV6)
2064                         return rte_flow_error_set(error, EINVAL,
2065                                                   RTE_FLOW_ERROR_TYPE_ITEM,
2066                                                   item,
2067                                                   "multiple tunnel "
2068                                                   "not supported");
2069         }
2070         if (item_flags & MLX5_FLOW_LAYER_IPV6_ENCAP)
2071                 return rte_flow_error_set(error, EINVAL,
2072                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2073                                           "wrong tunnel type - IPv6 specified "
2074                                           "but IPv4 item provided");
2075         if (item_flags & l3m)
2076                 return rte_flow_error_set(error, ENOTSUP,
2077                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2078                                           "multiple L3 layers not supported");
2079         else if (item_flags & l4m)
2080                 return rte_flow_error_set(error, EINVAL,
2081                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2082                                           "L3 cannot follow an L4 layer.");
2083         else if ((item_flags & MLX5_FLOW_LAYER_NVGRE) &&
2084                   !(item_flags & MLX5_FLOW_LAYER_INNER_L2))
2085                 return rte_flow_error_set(error, EINVAL,
2086                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2087                                           "L3 cannot follow an NVGRE layer.");
2088         if (!mask)
2089                 mask = &rte_flow_item_ipv4_mask;
2090         else if (mask->hdr.next_proto_id != 0 &&
2091                  mask->hdr.next_proto_id != 0xff)
2092                 return rte_flow_error_set(error, EINVAL,
2093                                           RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
2094                                           "partial mask is not supported"
2095                                           " for protocol");
2096         ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
2097                                         acc_mask ? (const uint8_t *)acc_mask
2098                                                  : (const uint8_t *)&nic_mask,
2099                                         sizeof(struct rte_flow_item_ipv4),
2100                                         range_accepted, error);
2101         if (ret < 0)
2102                 return ret;
2103         return 0;
2104 }
2105
2106 /**
2107  * Validate IPV6 item.
2108  *
2109  * @param[in] item
2110  *   Item specification.
2111  * @param[in] item_flags
2112  *   Bit-fields that holds the items detected until now.
2113  * @param[in] last_item
2114  *   Previous validated item in the pattern items.
2115  * @param[in] ether_type
2116  *   Type in the ethernet layer header (including dot1q).
2117  * @param[in] acc_mask
2118  *   Acceptable mask, if NULL default internal default mask
2119  *   will be used to check whether item fields are supported.
2120  * @param[out] error
2121  *   Pointer to error structure.
2122  *
2123  * @return
2124  *   0 on success, a negative errno value otherwise and rte_errno is set.
2125  */
2126 int
2127 mlx5_flow_validate_item_ipv6(const struct rte_flow_item *item,
2128                              uint64_t item_flags,
2129                              uint64_t last_item,
2130                              uint16_t ether_type,
2131                              const struct rte_flow_item_ipv6 *acc_mask,
2132                              struct rte_flow_error *error)
2133 {
2134         const struct rte_flow_item_ipv6 *mask = item->mask;
2135         const struct rte_flow_item_ipv6 *spec = item->spec;
2136         const struct rte_flow_item_ipv6 nic_mask = {
2137                 .hdr = {
2138                         .src_addr =
2139                                 "\xff\xff\xff\xff\xff\xff\xff\xff"
2140                                 "\xff\xff\xff\xff\xff\xff\xff\xff",
2141                         .dst_addr =
2142                                 "\xff\xff\xff\xff\xff\xff\xff\xff"
2143                                 "\xff\xff\xff\xff\xff\xff\xff\xff",
2144                         .vtc_flow = RTE_BE32(0xffffffff),
2145                         .proto = 0xff,
2146                 },
2147         };
2148         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2149         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
2150                                       MLX5_FLOW_LAYER_OUTER_L3;
2151         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2152                                       MLX5_FLOW_LAYER_OUTER_L4;
2153         int ret;
2154         uint8_t next_proto = 0xFF;
2155         const uint64_t l2_vlan = (MLX5_FLOW_LAYER_L2 |
2156                                   MLX5_FLOW_LAYER_OUTER_VLAN |
2157                                   MLX5_FLOW_LAYER_INNER_VLAN);
2158
2159         if ((last_item & l2_vlan) && ether_type &&
2160             ether_type != RTE_ETHER_TYPE_IPV6)
2161                 return rte_flow_error_set(error, EINVAL,
2162                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2163                                           "IPv6 cannot follow L2/VLAN layer "
2164                                           "which ether type is not IPv6");
2165         if (mask && mask->hdr.proto == UINT8_MAX && spec)
2166                 next_proto = spec->hdr.proto;
2167         if (item_flags & MLX5_FLOW_LAYER_IPV6_ENCAP) {
2168                 if (next_proto == IPPROTO_IPIP || next_proto == IPPROTO_IPV6)
2169                         return rte_flow_error_set(error, EINVAL,
2170                                                   RTE_FLOW_ERROR_TYPE_ITEM,
2171                                                   item,
2172                                                   "multiple tunnel "
2173                                                   "not supported");
2174         }
2175         if (next_proto == IPPROTO_HOPOPTS  ||
2176             next_proto == IPPROTO_ROUTING  ||
2177             next_proto == IPPROTO_FRAGMENT ||
2178             next_proto == IPPROTO_ESP      ||
2179             next_proto == IPPROTO_AH       ||
2180             next_proto == IPPROTO_DSTOPTS)
2181                 return rte_flow_error_set(error, EINVAL,
2182                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2183                                           "IPv6 proto (next header) should "
2184                                           "not be set as extension header");
2185         if (item_flags & MLX5_FLOW_LAYER_IPIP)
2186                 return rte_flow_error_set(error, EINVAL,
2187                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2188                                           "wrong tunnel type - IPv4 specified "
2189                                           "but IPv6 item provided");
2190         if (item_flags & l3m)
2191                 return rte_flow_error_set(error, ENOTSUP,
2192                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2193                                           "multiple L3 layers not supported");
2194         else if (item_flags & l4m)
2195                 return rte_flow_error_set(error, EINVAL,
2196                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2197                                           "L3 cannot follow an L4 layer.");
2198         else if ((item_flags & MLX5_FLOW_LAYER_NVGRE) &&
2199                   !(item_flags & MLX5_FLOW_LAYER_INNER_L2))
2200                 return rte_flow_error_set(error, EINVAL,
2201                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2202                                           "L3 cannot follow an NVGRE layer.");
2203         if (!mask)
2204                 mask = &rte_flow_item_ipv6_mask;
2205         ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
2206                                         acc_mask ? (const uint8_t *)acc_mask
2207                                                  : (const uint8_t *)&nic_mask,
2208                                         sizeof(struct rte_flow_item_ipv6),
2209                                         MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2210         if (ret < 0)
2211                 return ret;
2212         return 0;
2213 }
2214
2215 /**
2216  * Validate UDP item.
2217  *
2218  * @param[in] item
2219  *   Item specification.
2220  * @param[in] item_flags
2221  *   Bit-fields that holds the items detected until now.
2222  * @param[in] target_protocol
2223  *   The next protocol in the previous item.
2224  * @param[in] flow_mask
2225  *   mlx5 flow-specific (DV, verbs, etc.) supported header fields mask.
2226  * @param[out] error
2227  *   Pointer to error structure.
2228  *
2229  * @return
2230  *   0 on success, a negative errno value otherwise and rte_errno is set.
2231  */
2232 int
2233 mlx5_flow_validate_item_udp(const struct rte_flow_item *item,
2234                             uint64_t item_flags,
2235                             uint8_t target_protocol,
2236                             struct rte_flow_error *error)
2237 {
2238         const struct rte_flow_item_udp *mask = item->mask;
2239         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2240         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
2241                                       MLX5_FLOW_LAYER_OUTER_L3;
2242         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2243                                       MLX5_FLOW_LAYER_OUTER_L4;
2244         int ret;
2245
2246         if (target_protocol != 0xff && target_protocol != IPPROTO_UDP)
2247                 return rte_flow_error_set(error, EINVAL,
2248                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2249                                           "protocol filtering not compatible"
2250                                           " with UDP layer");
2251         if (!(item_flags & l3m))
2252                 return rte_flow_error_set(error, EINVAL,
2253                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2254                                           "L3 is mandatory to filter on L4");
2255         if (item_flags & l4m)
2256                 return rte_flow_error_set(error, EINVAL,
2257                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2258                                           "multiple L4 layers not supported");
2259         if (!mask)
2260                 mask = &rte_flow_item_udp_mask;
2261         ret = mlx5_flow_item_acceptable
2262                 (item, (const uint8_t *)mask,
2263                  (const uint8_t *)&rte_flow_item_udp_mask,
2264                  sizeof(struct rte_flow_item_udp), MLX5_ITEM_RANGE_NOT_ACCEPTED,
2265                  error);
2266         if (ret < 0)
2267                 return ret;
2268         return 0;
2269 }
2270
2271 /**
2272  * Validate TCP item.
2273  *
2274  * @param[in] item
2275  *   Item specification.
2276  * @param[in] item_flags
2277  *   Bit-fields that holds the items detected until now.
2278  * @param[in] target_protocol
2279  *   The next protocol in the previous item.
2280  * @param[out] error
2281  *   Pointer to error structure.
2282  *
2283  * @return
2284  *   0 on success, a negative errno value otherwise and rte_errno is set.
2285  */
2286 int
2287 mlx5_flow_validate_item_tcp(const struct rte_flow_item *item,
2288                             uint64_t item_flags,
2289                             uint8_t target_protocol,
2290                             const struct rte_flow_item_tcp *flow_mask,
2291                             struct rte_flow_error *error)
2292 {
2293         const struct rte_flow_item_tcp *mask = item->mask;
2294         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2295         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
2296                                       MLX5_FLOW_LAYER_OUTER_L3;
2297         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2298                                       MLX5_FLOW_LAYER_OUTER_L4;
2299         int ret;
2300
2301         MLX5_ASSERT(flow_mask);
2302         if (target_protocol != 0xff && target_protocol != IPPROTO_TCP)
2303                 return rte_flow_error_set(error, EINVAL,
2304                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2305                                           "protocol filtering not compatible"
2306                                           " with TCP layer");
2307         if (!(item_flags & l3m))
2308                 return rte_flow_error_set(error, EINVAL,
2309                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2310                                           "L3 is mandatory to filter on L4");
2311         if (item_flags & l4m)
2312                 return rte_flow_error_set(error, EINVAL,
2313                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2314                                           "multiple L4 layers not supported");
2315         if (!mask)
2316                 mask = &rte_flow_item_tcp_mask;
2317         ret = mlx5_flow_item_acceptable
2318                 (item, (const uint8_t *)mask,
2319                  (const uint8_t *)flow_mask,
2320                  sizeof(struct rte_flow_item_tcp), MLX5_ITEM_RANGE_NOT_ACCEPTED,
2321                  error);
2322         if (ret < 0)
2323                 return ret;
2324         return 0;
2325 }
2326
2327 /**
2328  * Validate VXLAN item.
2329  *
2330  * @param[in] item
2331  *   Item specification.
2332  * @param[in] item_flags
2333  *   Bit-fields that holds the items detected until now.
2334  * @param[in] target_protocol
2335  *   The next protocol in the previous item.
2336  * @param[out] error
2337  *   Pointer to error structure.
2338  *
2339  * @return
2340  *   0 on success, a negative errno value otherwise and rte_errno is set.
2341  */
2342 int
2343 mlx5_flow_validate_item_vxlan(const struct rte_flow_item *item,
2344                               uint64_t item_flags,
2345                               struct rte_flow_error *error)
2346 {
2347         const struct rte_flow_item_vxlan *spec = item->spec;
2348         const struct rte_flow_item_vxlan *mask = item->mask;
2349         int ret;
2350         union vni {
2351                 uint32_t vlan_id;
2352                 uint8_t vni[4];
2353         } id = { .vlan_id = 0, };
2354
2355
2356         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2357                 return rte_flow_error_set(error, ENOTSUP,
2358                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2359                                           "multiple tunnel layers not"
2360                                           " supported");
2361         /*
2362          * Verify only UDPv4 is present as defined in
2363          * https://tools.ietf.org/html/rfc7348
2364          */
2365         if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
2366                 return rte_flow_error_set(error, EINVAL,
2367                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2368                                           "no outer UDP layer found");
2369         if (!mask)
2370                 mask = &rte_flow_item_vxlan_mask;
2371         ret = mlx5_flow_item_acceptable
2372                 (item, (const uint8_t *)mask,
2373                  (const uint8_t *)&rte_flow_item_vxlan_mask,
2374                  sizeof(struct rte_flow_item_vxlan),
2375                  MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2376         if (ret < 0)
2377                 return ret;
2378         if (spec) {
2379                 memcpy(&id.vni[1], spec->vni, 3);
2380                 memcpy(&id.vni[1], mask->vni, 3);
2381         }
2382         if (!(item_flags & MLX5_FLOW_LAYER_OUTER))
2383                 return rte_flow_error_set(error, ENOTSUP,
2384                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2385                                           "VXLAN tunnel must be fully defined");
2386         return 0;
2387 }
2388
2389 /**
2390  * Validate VXLAN_GPE item.
2391  *
2392  * @param[in] item
2393  *   Item specification.
2394  * @param[in] item_flags
2395  *   Bit-fields that holds the items detected until now.
2396  * @param[in] priv
2397  *   Pointer to the private data structure.
2398  * @param[in] target_protocol
2399  *   The next protocol in the previous item.
2400  * @param[out] error
2401  *   Pointer to error structure.
2402  *
2403  * @return
2404  *   0 on success, a negative errno value otherwise and rte_errno is set.
2405  */
2406 int
2407 mlx5_flow_validate_item_vxlan_gpe(const struct rte_flow_item *item,
2408                                   uint64_t item_flags,
2409                                   struct rte_eth_dev *dev,
2410                                   struct rte_flow_error *error)
2411 {
2412         struct mlx5_priv *priv = dev->data->dev_private;
2413         const struct rte_flow_item_vxlan_gpe *spec = item->spec;
2414         const struct rte_flow_item_vxlan_gpe *mask = item->mask;
2415         int ret;
2416         union vni {
2417                 uint32_t vlan_id;
2418                 uint8_t vni[4];
2419         } id = { .vlan_id = 0, };
2420
2421         if (!priv->config.l3_vxlan_en)
2422                 return rte_flow_error_set(error, ENOTSUP,
2423                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2424                                           "L3 VXLAN is not enabled by device"
2425                                           " parameter and/or not configured in"
2426                                           " firmware");
2427         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2428                 return rte_flow_error_set(error, ENOTSUP,
2429                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2430                                           "multiple tunnel layers not"
2431                                           " supported");
2432         /*
2433          * Verify only UDPv4 is present as defined in
2434          * https://tools.ietf.org/html/rfc7348
2435          */
2436         if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
2437                 return rte_flow_error_set(error, EINVAL,
2438                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2439                                           "no outer UDP layer found");
2440         if (!mask)
2441                 mask = &rte_flow_item_vxlan_gpe_mask;
2442         ret = mlx5_flow_item_acceptable
2443                 (item, (const uint8_t *)mask,
2444                  (const uint8_t *)&rte_flow_item_vxlan_gpe_mask,
2445                  sizeof(struct rte_flow_item_vxlan_gpe),
2446                  MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2447         if (ret < 0)
2448                 return ret;
2449         if (spec) {
2450                 if (spec->protocol)
2451                         return rte_flow_error_set(error, ENOTSUP,
2452                                                   RTE_FLOW_ERROR_TYPE_ITEM,
2453                                                   item,
2454                                                   "VxLAN-GPE protocol"
2455                                                   " not supported");
2456                 memcpy(&id.vni[1], spec->vni, 3);
2457                 memcpy(&id.vni[1], mask->vni, 3);
2458         }
2459         if (!(item_flags & MLX5_FLOW_LAYER_OUTER))
2460                 return rte_flow_error_set(error, ENOTSUP,
2461                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2462                                           "VXLAN-GPE tunnel must be fully"
2463                                           " defined");
2464         return 0;
2465 }
2466 /**
2467  * Validate GRE Key item.
2468  *
2469  * @param[in] item
2470  *   Item specification.
2471  * @param[in] item_flags
2472  *   Bit flags to mark detected items.
2473  * @param[in] gre_item
2474  *   Pointer to gre_item
2475  * @param[out] error
2476  *   Pointer to error structure.
2477  *
2478  * @return
2479  *   0 on success, a negative errno value otherwise and rte_errno is set.
2480  */
2481 int
2482 mlx5_flow_validate_item_gre_key(const struct rte_flow_item *item,
2483                                 uint64_t item_flags,
2484                                 const struct rte_flow_item *gre_item,
2485                                 struct rte_flow_error *error)
2486 {
2487         const rte_be32_t *mask = item->mask;
2488         int ret = 0;
2489         rte_be32_t gre_key_default_mask = RTE_BE32(UINT32_MAX);
2490         const struct rte_flow_item_gre *gre_spec;
2491         const struct rte_flow_item_gre *gre_mask;
2492
2493         if (item_flags & MLX5_FLOW_LAYER_GRE_KEY)
2494                 return rte_flow_error_set(error, ENOTSUP,
2495                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2496                                           "Multiple GRE key not support");
2497         if (!(item_flags & MLX5_FLOW_LAYER_GRE))
2498                 return rte_flow_error_set(error, ENOTSUP,
2499                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2500                                           "No preceding GRE header");
2501         if (item_flags & MLX5_FLOW_LAYER_INNER)
2502                 return rte_flow_error_set(error, ENOTSUP,
2503                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2504                                           "GRE key following a wrong item");
2505         gre_mask = gre_item->mask;
2506         if (!gre_mask)
2507                 gre_mask = &rte_flow_item_gre_mask;
2508         gre_spec = gre_item->spec;
2509         if (gre_spec && (gre_mask->c_rsvd0_ver & RTE_BE16(0x2000)) &&
2510                          !(gre_spec->c_rsvd0_ver & RTE_BE16(0x2000)))
2511                 return rte_flow_error_set(error, EINVAL,
2512                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2513                                           "Key bit must be on");
2514
2515         if (!mask)
2516                 mask = &gre_key_default_mask;
2517         ret = mlx5_flow_item_acceptable
2518                 (item, (const uint8_t *)mask,
2519                  (const uint8_t *)&gre_key_default_mask,
2520                  sizeof(rte_be32_t), MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2521         return ret;
2522 }
2523
2524 /**
2525  * Validate GRE item.
2526  *
2527  * @param[in] item
2528  *   Item specification.
2529  * @param[in] item_flags
2530  *   Bit flags to mark detected items.
2531  * @param[in] target_protocol
2532  *   The next protocol in the previous item.
2533  * @param[out] error
2534  *   Pointer to error structure.
2535  *
2536  * @return
2537  *   0 on success, a negative errno value otherwise and rte_errno is set.
2538  */
2539 int
2540 mlx5_flow_validate_item_gre(const struct rte_flow_item *item,
2541                             uint64_t item_flags,
2542                             uint8_t target_protocol,
2543                             struct rte_flow_error *error)
2544 {
2545         const struct rte_flow_item_gre *spec __rte_unused = item->spec;
2546         const struct rte_flow_item_gre *mask = item->mask;
2547         int ret;
2548         const struct rte_flow_item_gre nic_mask = {
2549                 .c_rsvd0_ver = RTE_BE16(0xB000),
2550                 .protocol = RTE_BE16(UINT16_MAX),
2551         };
2552
2553         if (target_protocol != 0xff && target_protocol != IPPROTO_GRE)
2554                 return rte_flow_error_set(error, EINVAL,
2555                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2556                                           "protocol filtering not compatible"
2557                                           " with this GRE layer");
2558         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2559                 return rte_flow_error_set(error, ENOTSUP,
2560                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2561                                           "multiple tunnel layers not"
2562                                           " supported");
2563         if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3))
2564                 return rte_flow_error_set(error, ENOTSUP,
2565                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2566                                           "L3 Layer is missing");
2567         if (!mask)
2568                 mask = &rte_flow_item_gre_mask;
2569         ret = mlx5_flow_item_acceptable
2570                 (item, (const uint8_t *)mask,
2571                  (const uint8_t *)&nic_mask,
2572                  sizeof(struct rte_flow_item_gre), MLX5_ITEM_RANGE_NOT_ACCEPTED,
2573                  error);
2574         if (ret < 0)
2575                 return ret;
2576 #ifndef HAVE_MLX5DV_DR
2577 #ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT
2578         if (spec && (spec->protocol & mask->protocol))
2579                 return rte_flow_error_set(error, ENOTSUP,
2580                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2581                                           "without MPLS support the"
2582                                           " specification cannot be used for"
2583                                           " filtering");
2584 #endif
2585 #endif
2586         return 0;
2587 }
2588
2589 /**
2590  * Validate Geneve item.
2591  *
2592  * @param[in] item
2593  *   Item specification.
2594  * @param[in] itemFlags
2595  *   Bit-fields that holds the items detected until now.
2596  * @param[in] enPriv
2597  *   Pointer to the private data structure.
2598  * @param[out] error
2599  *   Pointer to error structure.
2600  *
2601  * @return
2602  *   0 on success, a negative errno value otherwise and rte_errno is set.
2603  */
2604
2605 int
2606 mlx5_flow_validate_item_geneve(const struct rte_flow_item *item,
2607                                uint64_t item_flags,
2608                                struct rte_eth_dev *dev,
2609                                struct rte_flow_error *error)
2610 {
2611         struct mlx5_priv *priv = dev->data->dev_private;
2612         const struct rte_flow_item_geneve *spec = item->spec;
2613         const struct rte_flow_item_geneve *mask = item->mask;
2614         int ret;
2615         uint16_t gbhdr;
2616         uint8_t opt_len = priv->config.hca_attr.geneve_max_opt_len ?
2617                           MLX5_GENEVE_OPT_LEN_1 : MLX5_GENEVE_OPT_LEN_0;
2618         const struct rte_flow_item_geneve nic_mask = {
2619                 .ver_opt_len_o_c_rsvd0 = RTE_BE16(0x3f80),
2620                 .vni = "\xff\xff\xff",
2621                 .protocol = RTE_BE16(UINT16_MAX),
2622         };
2623
2624         if (!priv->config.hca_attr.tunnel_stateless_geneve_rx)
2625                 return rte_flow_error_set(error, ENOTSUP,
2626                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2627                                           "L3 Geneve is not enabled by device"
2628                                           " parameter and/or not configured in"
2629                                           " firmware");
2630         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2631                 return rte_flow_error_set(error, ENOTSUP,
2632                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2633                                           "multiple tunnel layers not"
2634                                           " supported");
2635         /*
2636          * Verify only UDPv4 is present as defined in
2637          * https://tools.ietf.org/html/rfc7348
2638          */
2639         if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
2640                 return rte_flow_error_set(error, EINVAL,
2641                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2642                                           "no outer UDP layer found");
2643         if (!mask)
2644                 mask = &rte_flow_item_geneve_mask;
2645         ret = mlx5_flow_item_acceptable
2646                                   (item, (const uint8_t *)mask,
2647                                    (const uint8_t *)&nic_mask,
2648                                    sizeof(struct rte_flow_item_geneve),
2649                                    MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2650         if (ret)
2651                 return ret;
2652         if (spec) {
2653                 gbhdr = rte_be_to_cpu_16(spec->ver_opt_len_o_c_rsvd0);
2654                 if (MLX5_GENEVE_VER_VAL(gbhdr) ||
2655                      MLX5_GENEVE_CRITO_VAL(gbhdr) ||
2656                      MLX5_GENEVE_RSVD_VAL(gbhdr) || spec->rsvd1)
2657                         return rte_flow_error_set(error, ENOTSUP,
2658                                                   RTE_FLOW_ERROR_TYPE_ITEM,
2659                                                   item,
2660                                                   "Geneve protocol unsupported"
2661                                                   " fields are being used");
2662                 if (MLX5_GENEVE_OPTLEN_VAL(gbhdr) > opt_len)
2663                         return rte_flow_error_set
2664                                         (error, ENOTSUP,
2665                                          RTE_FLOW_ERROR_TYPE_ITEM,
2666                                          item,
2667                                          "Unsupported Geneve options length");
2668         }
2669         if (!(item_flags & MLX5_FLOW_LAYER_OUTER))
2670                 return rte_flow_error_set
2671                                     (error, ENOTSUP,
2672                                      RTE_FLOW_ERROR_TYPE_ITEM, item,
2673                                      "Geneve tunnel must be fully defined");
2674         return 0;
2675 }
2676
2677 /**
2678  * Validate Geneve TLV option item.
2679  *
2680  * @param[in] item
2681  *   Item specification.
2682  * @param[in] last_item
2683  *   Previous validated item in the pattern items.
2684  * @param[in] geneve_item
2685  *   Previous GENEVE item specification.
2686  * @param[in] dev
2687  *   Pointer to the rte_eth_dev structure.
2688  * @param[out] error
2689  *   Pointer to error structure.
2690  *
2691  * @return
2692  *   0 on success, a negative errno value otherwise and rte_errno is set.
2693  */
2694 int
2695 mlx5_flow_validate_item_geneve_opt(const struct rte_flow_item *item,
2696                                    uint64_t last_item,
2697                                    const struct rte_flow_item *geneve_item,
2698                                    struct rte_eth_dev *dev,
2699                                    struct rte_flow_error *error)
2700 {
2701         struct mlx5_priv *priv = dev->data->dev_private;
2702         struct mlx5_dev_ctx_shared *sh = priv->sh;
2703         struct mlx5_geneve_tlv_option_resource *geneve_opt_resource;
2704         struct mlx5_hca_attr *hca_attr = &priv->config.hca_attr;
2705         uint8_t data_max_supported =
2706                         hca_attr->max_geneve_tlv_option_data_len * 4;
2707         struct mlx5_dev_config *config = &priv->config;
2708         const struct rte_flow_item_geneve *geneve_spec;
2709         const struct rte_flow_item_geneve *geneve_mask;
2710         const struct rte_flow_item_geneve_opt *spec = item->spec;
2711         const struct rte_flow_item_geneve_opt *mask = item->mask;
2712         unsigned int i;
2713         unsigned int data_len;
2714         uint8_t tlv_option_len;
2715         uint16_t optlen_m, optlen_v;
2716         const struct rte_flow_item_geneve_opt full_mask = {
2717                 .option_class = RTE_BE16(0xffff),
2718                 .option_type = 0xff,
2719                 .option_len = 0x1f,
2720         };
2721
2722         if (!mask)
2723                 mask = &rte_flow_item_geneve_opt_mask;
2724         if (!spec)
2725                 return rte_flow_error_set
2726                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
2727                         "Geneve TLV opt class/type/length must be specified");
2728         if ((uint32_t)spec->option_len > MLX5_GENEVE_OPTLEN_MASK)
2729                 return rte_flow_error_set
2730                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
2731                         "Geneve TLV opt length exceeeds the limit (31)");
2732         /* Check if class type and length masks are full. */
2733         if (full_mask.option_class != mask->option_class ||
2734             full_mask.option_type != mask->option_type ||
2735             full_mask.option_len != (mask->option_len & full_mask.option_len))
2736                 return rte_flow_error_set
2737                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
2738                         "Geneve TLV opt class/type/length masks must be full");
2739         /* Check if length is supported */
2740         if ((uint32_t)spec->option_len >
2741                         config->hca_attr.max_geneve_tlv_option_data_len)
2742                 return rte_flow_error_set
2743                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
2744                         "Geneve TLV opt length not supported");
2745         if (config->hca_attr.max_geneve_tlv_options > 1)
2746                 DRV_LOG(DEBUG,
2747                         "max_geneve_tlv_options supports more than 1 option");
2748         /* Check GENEVE item preceding. */
2749         if (!geneve_item || !(last_item & MLX5_FLOW_LAYER_GENEVE))
2750                 return rte_flow_error_set
2751                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
2752                         "Geneve opt item must be preceded with Geneve item");
2753         geneve_spec = geneve_item->spec;
2754         geneve_mask = geneve_item->mask ? geneve_item->mask :
2755                                           &rte_flow_item_geneve_mask;
2756         /* Check if GENEVE TLV option size doesn't exceed option length */
2757         if (geneve_spec && (geneve_mask->ver_opt_len_o_c_rsvd0 ||
2758                             geneve_spec->ver_opt_len_o_c_rsvd0)) {
2759                 tlv_option_len = spec->option_len & mask->option_len;
2760                 optlen_v = rte_be_to_cpu_16(geneve_spec->ver_opt_len_o_c_rsvd0);
2761                 optlen_v = MLX5_GENEVE_OPTLEN_VAL(optlen_v);
2762                 optlen_m = rte_be_to_cpu_16(geneve_mask->ver_opt_len_o_c_rsvd0);
2763                 optlen_m = MLX5_GENEVE_OPTLEN_VAL(optlen_m);
2764                 if ((optlen_v & optlen_m) <= tlv_option_len)
2765                         return rte_flow_error_set
2766                                 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
2767                                  "GENEVE TLV option length exceeds optlen");
2768         }
2769         /* Check if length is 0 or data is 0. */
2770         if (spec->data == NULL || spec->option_len == 0)
2771                 return rte_flow_error_set
2772                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
2773                         "Geneve TLV opt with zero data/length not supported");
2774         /* Check not all data & mask are 0. */
2775         data_len = spec->option_len * 4;
2776         if (mask->data == NULL) {
2777                 for (i = 0; i < data_len; i++)
2778                         if (spec->data[i])
2779                                 break;
2780                 if (i == data_len)
2781                         return rte_flow_error_set(error, ENOTSUP,
2782                                 RTE_FLOW_ERROR_TYPE_ITEM, item,
2783                                 "Can't match on Geneve option data 0");
2784         } else {
2785                 for (i = 0; i < data_len; i++)
2786                         if (spec->data[i] & mask->data[i])
2787                                 break;
2788                 if (i == data_len)
2789                         return rte_flow_error_set(error, ENOTSUP,
2790                                 RTE_FLOW_ERROR_TYPE_ITEM, item,
2791                                 "Can't match on Geneve option data and mask 0");
2792                 /* Check data mask supported. */
2793                 for (i = data_max_supported; i < data_len ; i++)
2794                         if (mask->data[i])
2795                                 return rte_flow_error_set(error, ENOTSUP,
2796                                         RTE_FLOW_ERROR_TYPE_ITEM, item,
2797                                         "Data mask is of unsupported size");
2798         }
2799         /* Check GENEVE option is supported in NIC. */
2800         if (!config->hca_attr.geneve_tlv_opt)
2801                 return rte_flow_error_set
2802                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
2803                         "Geneve TLV opt not supported");
2804         /* Check if we already have geneve option with different type/class. */
2805         rte_spinlock_lock(&sh->geneve_tlv_opt_sl);
2806         geneve_opt_resource = sh->geneve_tlv_option_resource;
2807         if (geneve_opt_resource != NULL)
2808                 if (geneve_opt_resource->option_class != spec->option_class ||
2809                     geneve_opt_resource->option_type != spec->option_type ||
2810                     geneve_opt_resource->length != spec->option_len) {
2811                         rte_spinlock_unlock(&sh->geneve_tlv_opt_sl);
2812                         return rte_flow_error_set(error, ENOTSUP,
2813                                 RTE_FLOW_ERROR_TYPE_ITEM, item,
2814                                 "Only one Geneve TLV option supported");
2815                 }
2816         rte_spinlock_unlock(&sh->geneve_tlv_opt_sl);
2817         return 0;
2818 }
2819
2820 /**
2821  * Validate MPLS item.
2822  *
2823  * @param[in] dev
2824  *   Pointer to the rte_eth_dev structure.
2825  * @param[in] item
2826  *   Item specification.
2827  * @param[in] item_flags
2828  *   Bit-fields that holds the items detected until now.
2829  * @param[in] prev_layer
2830  *   The protocol layer indicated in previous item.
2831  * @param[out] error
2832  *   Pointer to error structure.
2833  *
2834  * @return
2835  *   0 on success, a negative errno value otherwise and rte_errno is set.
2836  */
2837 int
2838 mlx5_flow_validate_item_mpls(struct rte_eth_dev *dev __rte_unused,
2839                              const struct rte_flow_item *item __rte_unused,
2840                              uint64_t item_flags __rte_unused,
2841                              uint64_t prev_layer __rte_unused,
2842                              struct rte_flow_error *error)
2843 {
2844 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
2845         const struct rte_flow_item_mpls *mask = item->mask;
2846         struct mlx5_priv *priv = dev->data->dev_private;
2847         int ret;
2848
2849         if (!priv->config.mpls_en)
2850                 return rte_flow_error_set(error, ENOTSUP,
2851                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2852                                           "MPLS not supported or"
2853                                           " disabled in firmware"
2854                                           " configuration.");
2855         /* MPLS over IP, UDP, GRE is allowed */
2856         if (!(prev_layer & (MLX5_FLOW_LAYER_OUTER_L3 |
2857                             MLX5_FLOW_LAYER_OUTER_L4_UDP |
2858                             MLX5_FLOW_LAYER_GRE |
2859                             MLX5_FLOW_LAYER_GRE_KEY)))
2860                 return rte_flow_error_set(error, EINVAL,
2861                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2862                                           "protocol filtering not compatible"
2863                                           " with MPLS layer");
2864         /* Multi-tunnel isn't allowed but MPLS over GRE is an exception. */
2865         if ((item_flags & MLX5_FLOW_LAYER_TUNNEL) &&
2866             !(item_flags & MLX5_FLOW_LAYER_GRE))
2867                 return rte_flow_error_set(error, ENOTSUP,
2868                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2869                                           "multiple tunnel layers not"
2870                                           " supported");
2871         if (!mask)
2872                 mask = &rte_flow_item_mpls_mask;
2873         ret = mlx5_flow_item_acceptable
2874                 (item, (const uint8_t *)mask,
2875                  (const uint8_t *)&rte_flow_item_mpls_mask,
2876                  sizeof(struct rte_flow_item_mpls),
2877                  MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2878         if (ret < 0)
2879                 return ret;
2880         return 0;
2881 #else
2882         return rte_flow_error_set(error, ENOTSUP,
2883                                   RTE_FLOW_ERROR_TYPE_ITEM, item,
2884                                   "MPLS is not supported by Verbs, please"
2885                                   " update.");
2886 #endif
2887 }
2888
2889 /**
2890  * Validate NVGRE item.
2891  *
2892  * @param[in] item
2893  *   Item specification.
2894  * @param[in] item_flags
2895  *   Bit flags to mark detected items.
2896  * @param[in] target_protocol
2897  *   The next protocol in the previous item.
2898  * @param[out] error
2899  *   Pointer to error structure.
2900  *
2901  * @return
2902  *   0 on success, a negative errno value otherwise and rte_errno is set.
2903  */
2904 int
2905 mlx5_flow_validate_item_nvgre(const struct rte_flow_item *item,
2906                               uint64_t item_flags,
2907                               uint8_t target_protocol,
2908                               struct rte_flow_error *error)
2909 {
2910         const struct rte_flow_item_nvgre *mask = item->mask;
2911         int ret;
2912
2913         if (target_protocol != 0xff && target_protocol != IPPROTO_GRE)
2914                 return rte_flow_error_set(error, EINVAL,
2915                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2916                                           "protocol filtering not compatible"
2917                                           " with this GRE layer");
2918         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2919                 return rte_flow_error_set(error, ENOTSUP,
2920                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2921                                           "multiple tunnel layers not"
2922                                           " supported");
2923         if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3))
2924                 return rte_flow_error_set(error, ENOTSUP,
2925                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2926                                           "L3 Layer is missing");
2927         if (!mask)
2928                 mask = &rte_flow_item_nvgre_mask;
2929         ret = mlx5_flow_item_acceptable
2930                 (item, (const uint8_t *)mask,
2931                  (const uint8_t *)&rte_flow_item_nvgre_mask,
2932                  sizeof(struct rte_flow_item_nvgre),
2933                  MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2934         if (ret < 0)
2935                 return ret;
2936         return 0;
2937 }
2938
2939 /**
2940  * Validate eCPRI item.
2941  *
2942  * @param[in] item
2943  *   Item specification.
2944  * @param[in] item_flags
2945  *   Bit-fields that holds the items detected until now.
2946  * @param[in] last_item
2947  *   Previous validated item in the pattern items.
2948  * @param[in] ether_type
2949  *   Type in the ethernet layer header (including dot1q).
2950  * @param[in] acc_mask
2951  *   Acceptable mask, if NULL default internal default mask
2952  *   will be used to check whether item fields are supported.
2953  * @param[out] error
2954  *   Pointer to error structure.
2955  *
2956  * @return
2957  *   0 on success, a negative errno value otherwise and rte_errno is set.
2958  */
2959 int
2960 mlx5_flow_validate_item_ecpri(const struct rte_flow_item *item,
2961                               uint64_t item_flags,
2962                               uint64_t last_item,
2963                               uint16_t ether_type,
2964                               const struct rte_flow_item_ecpri *acc_mask,
2965                               struct rte_flow_error *error)
2966 {
2967         const struct rte_flow_item_ecpri *mask = item->mask;
2968         const struct rte_flow_item_ecpri nic_mask = {
2969                 .hdr = {
2970                         .common = {
2971                                 .u32 =
2972                                 RTE_BE32(((const struct rte_ecpri_common_hdr) {
2973                                         .type = 0xFF,
2974                                         }).u32),
2975                         },
2976                         .dummy[0] = 0xFFFFFFFF,
2977                 },
2978         };
2979         const uint64_t outer_l2_vlan = (MLX5_FLOW_LAYER_OUTER_L2 |
2980                                         MLX5_FLOW_LAYER_OUTER_VLAN);
2981         struct rte_flow_item_ecpri mask_lo;
2982
2983         if (!(last_item & outer_l2_vlan) &&
2984             last_item != MLX5_FLOW_LAYER_OUTER_L4_UDP)
2985                 return rte_flow_error_set(error, EINVAL,
2986                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2987                                           "eCPRI can only follow L2/VLAN layer or UDP layer");
2988         if ((last_item & outer_l2_vlan) && ether_type &&
2989             ether_type != RTE_ETHER_TYPE_ECPRI)
2990                 return rte_flow_error_set(error, EINVAL,
2991                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2992                                           "eCPRI cannot follow L2/VLAN layer which ether type is not 0xAEFE");
2993         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2994                 return rte_flow_error_set(error, EINVAL,
2995                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2996                                           "eCPRI with tunnel is not supported right now");
2997         if (item_flags & MLX5_FLOW_LAYER_OUTER_L3)
2998                 return rte_flow_error_set(error, ENOTSUP,
2999                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3000                                           "multiple L3 layers not supported");
3001         else if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_TCP)
3002                 return rte_flow_error_set(error, EINVAL,
3003                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3004                                           "eCPRI cannot coexist with a TCP layer");
3005         /* In specification, eCPRI could be over UDP layer. */
3006         else if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)
3007                 return rte_flow_error_set(error, EINVAL,
3008                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3009                                           "eCPRI over UDP layer is not yet supported right now");
3010         /* Mask for type field in common header could be zero. */
3011         if (!mask)
3012                 mask = &rte_flow_item_ecpri_mask;
3013         mask_lo.hdr.common.u32 = rte_be_to_cpu_32(mask->hdr.common.u32);
3014         /* Input mask is in big-endian format. */
3015         if (mask_lo.hdr.common.type != 0 && mask_lo.hdr.common.type != 0xff)
3016                 return rte_flow_error_set(error, EINVAL,
3017                                           RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
3018                                           "partial mask is not supported for protocol");
3019         else if (mask_lo.hdr.common.type == 0 && mask->hdr.dummy[0] != 0)
3020                 return rte_flow_error_set(error, EINVAL,
3021                                           RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
3022                                           "message header mask must be after a type mask");
3023         return mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
3024                                          acc_mask ? (const uint8_t *)acc_mask
3025                                                   : (const uint8_t *)&nic_mask,
3026                                          sizeof(struct rte_flow_item_ecpri),
3027                                          MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
3028 }
3029
3030 /**
3031  * Release resource related QUEUE/RSS action split.
3032  *
3033  * @param dev
3034  *   Pointer to Ethernet device.
3035  * @param flow
3036  *   Flow to release id's from.
3037  */
3038 static void
3039 flow_mreg_split_qrss_release(struct rte_eth_dev *dev,
3040                              struct rte_flow *flow)
3041 {
3042         struct mlx5_priv *priv = dev->data->dev_private;
3043         uint32_t handle_idx;
3044         struct mlx5_flow_handle *dev_handle;
3045
3046         SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles,
3047                        handle_idx, dev_handle, next)
3048                 if (dev_handle->split_flow_id)
3049                         mlx5_ipool_free(priv->sh->ipool
3050                                         [MLX5_IPOOL_RSS_EXPANTION_FLOW_ID],
3051                                         dev_handle->split_flow_id);
3052 }
3053
3054 static int
3055 flow_null_validate(struct rte_eth_dev *dev __rte_unused,
3056                    const struct rte_flow_attr *attr __rte_unused,
3057                    const struct rte_flow_item items[] __rte_unused,
3058                    const struct rte_flow_action actions[] __rte_unused,
3059                    bool external __rte_unused,
3060                    int hairpin __rte_unused,
3061                    struct rte_flow_error *error)
3062 {
3063         return rte_flow_error_set(error, ENOTSUP,
3064                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
3065 }
3066
3067 static struct mlx5_flow *
3068 flow_null_prepare(struct rte_eth_dev *dev __rte_unused,
3069                   const struct rte_flow_attr *attr __rte_unused,
3070                   const struct rte_flow_item items[] __rte_unused,
3071                   const struct rte_flow_action actions[] __rte_unused,
3072                   struct rte_flow_error *error)
3073 {
3074         rte_flow_error_set(error, ENOTSUP,
3075                            RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
3076         return NULL;
3077 }
3078
3079 static int
3080 flow_null_translate(struct rte_eth_dev *dev __rte_unused,
3081                     struct mlx5_flow *dev_flow __rte_unused,
3082                     const struct rte_flow_attr *attr __rte_unused,
3083                     const struct rte_flow_item items[] __rte_unused,
3084                     const struct rte_flow_action actions[] __rte_unused,
3085                     struct rte_flow_error *error)
3086 {
3087         return rte_flow_error_set(error, ENOTSUP,
3088                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
3089 }
3090
3091 static int
3092 flow_null_apply(struct rte_eth_dev *dev __rte_unused,
3093                 struct rte_flow *flow __rte_unused,
3094                 struct rte_flow_error *error)
3095 {
3096         return rte_flow_error_set(error, ENOTSUP,
3097                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
3098 }
3099
3100 static void
3101 flow_null_remove(struct rte_eth_dev *dev __rte_unused,
3102                  struct rte_flow *flow __rte_unused)
3103 {
3104 }
3105
3106 static void
3107 flow_null_destroy(struct rte_eth_dev *dev __rte_unused,
3108                   struct rte_flow *flow __rte_unused)
3109 {
3110 }
3111
3112 static int
3113 flow_null_query(struct rte_eth_dev *dev __rte_unused,
3114                 struct rte_flow *flow __rte_unused,
3115                 const struct rte_flow_action *actions __rte_unused,
3116                 void *data __rte_unused,
3117                 struct rte_flow_error *error)
3118 {
3119         return rte_flow_error_set(error, ENOTSUP,
3120                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
3121 }
3122
3123 static int
3124 flow_null_sync_domain(struct rte_eth_dev *dev __rte_unused,
3125                       uint32_t domains __rte_unused,
3126                       uint32_t flags __rte_unused)
3127 {
3128         return 0;
3129 }
3130
3131 /* Void driver to protect from null pointer reference. */
3132 const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops = {
3133         .validate = flow_null_validate,
3134         .prepare = flow_null_prepare,
3135         .translate = flow_null_translate,
3136         .apply = flow_null_apply,
3137         .remove = flow_null_remove,
3138         .destroy = flow_null_destroy,
3139         .query = flow_null_query,
3140         .sync_domain = flow_null_sync_domain,
3141 };
3142
3143 /**
3144  * Select flow driver type according to flow attributes and device
3145  * configuration.
3146  *
3147  * @param[in] dev
3148  *   Pointer to the dev structure.
3149  * @param[in] attr
3150  *   Pointer to the flow attributes.
3151  *
3152  * @return
3153  *   flow driver type, MLX5_FLOW_TYPE_MAX otherwise.
3154  */
3155 static enum mlx5_flow_drv_type
3156 flow_get_drv_type(struct rte_eth_dev *dev, const struct rte_flow_attr *attr)
3157 {
3158         struct mlx5_priv *priv = dev->data->dev_private;
3159         /* The OS can determine first a specific flow type (DV, VERBS) */
3160         enum mlx5_flow_drv_type type = mlx5_flow_os_get_type();
3161
3162         if (type != MLX5_FLOW_TYPE_MAX)
3163                 return type;
3164         /* If no OS specific type - continue with DV/VERBS selection */
3165         if (attr->transfer && priv->config.dv_esw_en)
3166                 type = MLX5_FLOW_TYPE_DV;
3167         if (!attr->transfer)
3168                 type = priv->config.dv_flow_en ? MLX5_FLOW_TYPE_DV :
3169                                                  MLX5_FLOW_TYPE_VERBS;
3170         return type;
3171 }
3172
3173 #define flow_get_drv_ops(type) flow_drv_ops[type]
3174
3175 /**
3176  * Flow driver validation API. This abstracts calling driver specific functions.
3177  * The type of flow driver is determined according to flow attributes.
3178  *
3179  * @param[in] dev
3180  *   Pointer to the dev structure.
3181  * @param[in] attr
3182  *   Pointer to the flow attributes.
3183  * @param[in] items
3184  *   Pointer to the list of items.
3185  * @param[in] actions
3186  *   Pointer to the list of actions.
3187  * @param[in] external
3188  *   This flow rule is created by request external to PMD.
3189  * @param[in] hairpin
3190  *   Number of hairpin TX actions, 0 means classic flow.
3191  * @param[out] error
3192  *   Pointer to the error structure.
3193  *
3194  * @return
3195  *   0 on success, a negative errno value otherwise and rte_errno is set.
3196  */
3197 static inline int
3198 flow_drv_validate(struct rte_eth_dev *dev,
3199                   const struct rte_flow_attr *attr,
3200                   const struct rte_flow_item items[],
3201                   const struct rte_flow_action actions[],
3202                   bool external, int hairpin, struct rte_flow_error *error)
3203 {
3204         const struct mlx5_flow_driver_ops *fops;
3205         enum mlx5_flow_drv_type type = flow_get_drv_type(dev, attr);
3206
3207         fops = flow_get_drv_ops(type);
3208         return fops->validate(dev, attr, items, actions, external,
3209                               hairpin, error);
3210 }
3211
3212 /**
3213  * Flow driver preparation API. This abstracts calling driver specific
3214  * functions. Parent flow (rte_flow) should have driver type (drv_type). It
3215  * calculates the size of memory required for device flow, allocates the memory,
3216  * initializes the device flow and returns the pointer.
3217  *
3218  * @note
3219  *   This function initializes device flow structure such as dv or verbs in
3220  *   struct mlx5_flow. However, it is caller's responsibility to initialize the
3221  *   rest. For example, adding returning device flow to flow->dev_flow list and
3222  *   setting backward reference to the flow should be done out of this function.
3223  *   layers field is not filled either.
3224  *
3225  * @param[in] dev
3226  *   Pointer to the dev structure.
3227  * @param[in] attr
3228  *   Pointer to the flow attributes.
3229  * @param[in] items
3230  *   Pointer to the list of items.
3231  * @param[in] actions
3232  *   Pointer to the list of actions.
3233  * @param[in] flow_idx
3234  *   This memory pool index to the flow.
3235  * @param[out] error
3236  *   Pointer to the error structure.
3237  *
3238  * @return
3239  *   Pointer to device flow on success, otherwise NULL and rte_errno is set.
3240  */
3241 static inline struct mlx5_flow *
3242 flow_drv_prepare(struct rte_eth_dev *dev,
3243                  const struct rte_flow *flow,
3244                  const struct rte_flow_attr *attr,
3245                  const struct rte_flow_item items[],
3246                  const struct rte_flow_action actions[],
3247                  uint32_t flow_idx,
3248                  struct rte_flow_error *error)
3249 {
3250         const struct mlx5_flow_driver_ops *fops;
3251         enum mlx5_flow_drv_type type = flow->drv_type;
3252         struct mlx5_flow *mlx5_flow = NULL;
3253
3254         MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
3255         fops = flow_get_drv_ops(type);
3256         mlx5_flow = fops->prepare(dev, attr, items, actions, error);
3257         if (mlx5_flow)
3258                 mlx5_flow->flow_idx = flow_idx;
3259         return mlx5_flow;
3260 }
3261
3262 /**
3263  * Flow driver translation API. This abstracts calling driver specific
3264  * functions. Parent flow (rte_flow) should have driver type (drv_type). It
3265  * translates a generic flow into a driver flow. flow_drv_prepare() must
3266  * precede.
3267  *
3268  * @note
3269  *   dev_flow->layers could be filled as a result of parsing during translation
3270  *   if needed by flow_drv_apply(). dev_flow->flow->actions can also be filled
3271  *   if necessary. As a flow can have multiple dev_flows by RSS flow expansion,
3272  *   flow->actions could be overwritten even though all the expanded dev_flows
3273  *   have the same actions.
3274  *
3275  * @param[in] dev
3276  *   Pointer to the rte dev structure.
3277  * @param[in, out] dev_flow
3278  *   Pointer to the mlx5 flow.
3279  * @param[in] attr
3280  *   Pointer to the flow attributes.
3281  * @param[in] items
3282  *   Pointer to the list of items.
3283  * @param[in] actions
3284  *   Pointer to the list of actions.
3285  * @param[out] error
3286  *   Pointer to the error structure.
3287  *
3288  * @return
3289  *   0 on success, a negative errno value otherwise and rte_errno is set.
3290  */
3291 static inline int
3292 flow_drv_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
3293                    const struct rte_flow_attr *attr,
3294                    const struct rte_flow_item items[],
3295                    const struct rte_flow_action actions[],
3296                    struct rte_flow_error *error)
3297 {
3298         const struct mlx5_flow_driver_ops *fops;
3299         enum mlx5_flow_drv_type type = dev_flow->flow->drv_type;
3300
3301         MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
3302         fops = flow_get_drv_ops(type);
3303         return fops->translate(dev, dev_flow, attr, items, actions, error);
3304 }
3305
3306 /**
3307  * Flow driver apply API. This abstracts calling driver specific functions.
3308  * Parent flow (rte_flow) should have driver type (drv_type). It applies
3309  * translated driver flows on to device. flow_drv_translate() must precede.
3310  *
3311  * @param[in] dev
3312  *   Pointer to Ethernet device structure.
3313  * @param[in, out] flow
3314  *   Pointer to flow structure.
3315  * @param[out] error
3316  *   Pointer to error structure.
3317  *
3318  * @return
3319  *   0 on success, a negative errno value otherwise and rte_errno is set.
3320  */
3321 static inline int
3322 flow_drv_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
3323                struct rte_flow_error *error)
3324 {
3325         const struct mlx5_flow_driver_ops *fops;
3326         enum mlx5_flow_drv_type type = flow->drv_type;
3327
3328         MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
3329         fops = flow_get_drv_ops(type);
3330         return fops->apply(dev, flow, error);
3331 }
3332
3333 /**
3334  * Flow driver destroy API. This abstracts calling driver specific functions.
3335  * Parent flow (rte_flow) should have driver type (drv_type). It removes a flow
3336  * on device and releases resources of the flow.
3337  *
3338  * @param[in] dev
3339  *   Pointer to Ethernet device.
3340  * @param[in, out] flow
3341  *   Pointer to flow structure.
3342  */
3343 static inline void
3344 flow_drv_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
3345 {
3346         const struct mlx5_flow_driver_ops *fops;
3347         enum mlx5_flow_drv_type type = flow->drv_type;
3348
3349         flow_mreg_split_qrss_release(dev, flow);
3350         MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
3351         fops = flow_get_drv_ops(type);
3352         fops->destroy(dev, flow);
3353 }
3354
3355 /**
3356  * Get RSS action from the action list.
3357  *
3358  * @param[in] actions
3359  *   Pointer to the list of actions.
3360  *
3361  * @return
3362  *   Pointer to the RSS action if exist, else return NULL.
3363  */
3364 static const struct rte_flow_action_rss*
3365 flow_get_rss_action(const struct rte_flow_action actions[])
3366 {
3367         const struct rte_flow_action_rss *rss = NULL;
3368
3369         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
3370                 switch (actions->type) {
3371                 case RTE_FLOW_ACTION_TYPE_RSS:
3372                         rss = actions->conf;
3373                         break;
3374                 case RTE_FLOW_ACTION_TYPE_SAMPLE:
3375                 {
3376                         const struct rte_flow_action_sample *sample =
3377                                                                 actions->conf;
3378                         const struct rte_flow_action *act = sample->actions;
3379                         for (; act->type != RTE_FLOW_ACTION_TYPE_END; act++)
3380                                 if (act->type == RTE_FLOW_ACTION_TYPE_RSS)
3381                                         rss = act->conf;
3382                         break;
3383                 }
3384                 default:
3385                         break;
3386                 }
3387         }
3388         return rss;
3389 }
3390
3391 /**
3392  * Get ASO age action by index.
3393  *
3394  * @param[in] dev
3395  *   Pointer to the Ethernet device structure.
3396  * @param[in] age_idx
3397  *   Index to the ASO age action.
3398  *
3399  * @return
3400  *   The specified ASO age action.
3401  */
3402 struct mlx5_aso_age_action*
3403 flow_aso_age_get_by_idx(struct rte_eth_dev *dev, uint32_t age_idx)
3404 {
3405         uint16_t pool_idx = age_idx & UINT16_MAX;
3406         uint16_t offset = (age_idx >> 16) & UINT16_MAX;
3407         struct mlx5_priv *priv = dev->data->dev_private;
3408         struct mlx5_aso_age_mng *mng = priv->sh->aso_age_mng;
3409         struct mlx5_aso_age_pool *pool = mng->pools[pool_idx];
3410
3411         return &pool->actions[offset - 1];
3412 }
3413
3414 /* maps shared action to translated non shared in some actions array */
3415 struct mlx5_translated_shared_action {
3416         struct rte_flow_shared_action *action; /**< Shared action */
3417         int index; /**< Index in related array of rte_flow_action */
3418 };
3419
3420 /**
3421  * Translates actions of type RTE_FLOW_ACTION_TYPE_SHARED to related
3422  * non shared action if translation possible.
3423  * This functionality used to run same execution path for both shared & non
3424  * shared actions on flow create. All necessary preparations for shared
3425  * action handling should be preformed on *shared* actions list returned
3426  * from this call.
3427  *
3428  * @param[in] dev
3429  *   Pointer to Ethernet device.
3430  * @param[in] actions
3431  *   List of actions to translate.
3432  * @param[out] shared
3433  *   List to store translated shared actions.
3434  * @param[in, out] shared_n
3435  *   Size of *shared* array. On return should be updated with number of shared
3436  *   actions retrieved from the *actions* list.
3437  * @param[out] translated_actions
3438  *   List of actions where all shared actions were translated to non shared
3439  *   if possible. NULL if no translation took place.
3440  * @param[out] error
3441  *   Pointer to the error structure.
3442  *
3443  * @return
3444  *   0 on success, a negative errno value otherwise and rte_errno is set.
3445  */
3446 static int
3447 flow_shared_actions_translate(struct rte_eth_dev *dev,
3448                               const struct rte_flow_action actions[],
3449                               struct mlx5_translated_shared_action *shared,
3450                               int *shared_n,
3451                               struct rte_flow_action **translated_actions,
3452                               struct rte_flow_error *error)
3453 {
3454         struct mlx5_priv *priv = dev->data->dev_private;
3455         struct rte_flow_action *translated = NULL;
3456         size_t actions_size;
3457         int n;
3458         int copied_n = 0;
3459         struct mlx5_translated_shared_action *shared_end = NULL;
3460
3461         for (n = 0; actions[n].type != RTE_FLOW_ACTION_TYPE_END; n++) {
3462                 if (actions[n].type != RTE_FLOW_ACTION_TYPE_SHARED)
3463                         continue;
3464                 if (copied_n == *shared_n) {
3465                         return rte_flow_error_set
3466                                 (error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION_NUM,
3467                                  NULL, "too many shared actions");
3468                 }
3469                 rte_memcpy(&shared[copied_n].action, &actions[n].conf,
3470                            sizeof(actions[n].conf));
3471                 shared[copied_n].index = n;
3472                 copied_n++;
3473         }
3474         n++;
3475         *shared_n = copied_n;
3476         if (!copied_n)
3477                 return 0;
3478         actions_size = sizeof(struct rte_flow_action) * n;
3479         translated = mlx5_malloc(MLX5_MEM_ZERO, actions_size, 0, SOCKET_ID_ANY);
3480         if (!translated) {
3481                 rte_errno = ENOMEM;
3482                 return -ENOMEM;
3483         }
3484         memcpy(translated, actions, actions_size);
3485         for (shared_end = shared + copied_n; shared < shared_end; shared++) {
3486                 struct mlx5_shared_action_rss *shared_rss;
3487                 uint32_t act_idx = (uint32_t)(uintptr_t)shared->action;
3488                 uint32_t type = act_idx >> MLX5_SHARED_ACTION_TYPE_OFFSET;
3489                 uint32_t idx = act_idx & ((1u << MLX5_SHARED_ACTION_TYPE_OFFSET)
3490                                                                            - 1);
3491
3492                 switch (type) {
3493                 case MLX5_SHARED_ACTION_TYPE_RSS:
3494                         shared_rss = mlx5_ipool_get
3495                           (priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS], idx);
3496                         translated[shared->index].type =
3497                                 RTE_FLOW_ACTION_TYPE_RSS;
3498                         translated[shared->index].conf =
3499                                 &shared_rss->origin;
3500                         break;
3501                 case MLX5_SHARED_ACTION_TYPE_AGE:
3502                         if (priv->sh->flow_hit_aso_en) {
3503                                 translated[shared->index].type =
3504                                         (enum rte_flow_action_type)
3505                                         MLX5_RTE_FLOW_ACTION_TYPE_AGE;
3506                                 translated[shared->index].conf =
3507                                                          (void *)(uintptr_t)idx;
3508                                 break;
3509                         }
3510                         /* Fall-through */
3511                 default:
3512                         mlx5_free(translated);
3513                         return rte_flow_error_set
3514                                 (error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION,
3515                                  NULL, "invalid shared action type");
3516                 }
3517         }
3518         *translated_actions = translated;
3519         return 0;
3520 }
3521
3522 /**
3523  * Get Shared RSS action from the action list.
3524  *
3525  * @param[in] dev
3526  *   Pointer to Ethernet device.
3527  * @param[in] shared
3528  *   Pointer to the list of actions.
3529  * @param[in] shared_n
3530  *   Actions list length.
3531  *
3532  * @return
3533  *   The MLX5 RSS action ID if exists, otherwise return 0.
3534  */
3535 static uint32_t
3536 flow_get_shared_rss_action(struct rte_eth_dev *dev,
3537                            struct mlx5_translated_shared_action *shared,
3538                            int shared_n)
3539 {
3540         struct mlx5_translated_shared_action *shared_end;
3541         struct mlx5_priv *priv = dev->data->dev_private;
3542         struct mlx5_shared_action_rss *shared_rss;
3543
3544
3545         for (shared_end = shared + shared_n; shared < shared_end; shared++) {
3546                 uint32_t act_idx = (uint32_t)(uintptr_t)shared->action;
3547                 uint32_t type = act_idx >> MLX5_SHARED_ACTION_TYPE_OFFSET;
3548                 uint32_t idx = act_idx &
3549                                    ((1u << MLX5_SHARED_ACTION_TYPE_OFFSET) - 1);
3550                 switch (type) {
3551                 case MLX5_SHARED_ACTION_TYPE_RSS:
3552                         shared_rss = mlx5_ipool_get
3553                                 (priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS],
3554                                                                            idx);
3555                         __atomic_add_fetch(&shared_rss->refcnt, 1,
3556                                            __ATOMIC_RELAXED);
3557                         return idx;
3558                 default:
3559                         break;
3560                 }
3561         }
3562         return 0;
3563 }
3564
3565 static unsigned int
3566 find_graph_root(const struct rte_flow_item pattern[], uint32_t rss_level)
3567 {
3568         const struct rte_flow_item *item;
3569         unsigned int has_vlan = 0;
3570
3571         for (item = pattern; item->type != RTE_FLOW_ITEM_TYPE_END; item++) {
3572                 if (item->type == RTE_FLOW_ITEM_TYPE_VLAN) {
3573                         has_vlan = 1;
3574                         break;
3575                 }
3576         }
3577         if (has_vlan)
3578                 return rss_level < 2 ? MLX5_EXPANSION_ROOT_ETH_VLAN :
3579                                        MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN;
3580         return rss_level < 2 ? MLX5_EXPANSION_ROOT :
3581                                MLX5_EXPANSION_ROOT_OUTER;
3582 }
3583
3584 /**
3585  *  Get layer flags from the prefix flow.
3586  *
3587  *  Some flows may be split to several subflows, the prefix subflow gets the
3588  *  match items and the suffix sub flow gets the actions.
3589  *  Some actions need the user defined match item flags to get the detail for
3590  *  the action.
3591  *  This function helps the suffix flow to get the item layer flags from prefix
3592  *  subflow.
3593  *
3594  * @param[in] dev_flow
3595  *   Pointer the created preifx subflow.
3596  *
3597  * @return
3598  *   The layers get from prefix subflow.
3599  */
3600 static inline uint64_t
3601 flow_get_prefix_layer_flags(struct mlx5_flow *dev_flow)
3602 {
3603         uint64_t layers = 0;
3604
3605         /*
3606          * Layers bits could be localization, but usually the compiler will
3607          * help to do the optimization work for source code.
3608          * If no decap actions, use the layers directly.
3609          */
3610         if (!(dev_flow->act_flags & MLX5_FLOW_ACTION_DECAP))
3611                 return dev_flow->handle->layers;
3612         /* Convert L3 layers with decap action. */
3613         if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L3_IPV4)
3614                 layers |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
3615         else if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L3_IPV6)
3616                 layers |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
3617         /* Convert L4 layers with decap action.  */
3618         if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L4_TCP)
3619                 layers |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
3620         else if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L4_UDP)
3621                 layers |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
3622         return layers;
3623 }
3624
3625 /**
3626  * Get metadata split action information.
3627  *
3628  * @param[in] actions
3629  *   Pointer to the list of actions.
3630  * @param[out] qrss
3631  *   Pointer to the return pointer.
3632  * @param[out] qrss_type
3633  *   Pointer to the action type to return. RTE_FLOW_ACTION_TYPE_END is returned
3634  *   if no QUEUE/RSS is found.
3635  * @param[out] encap_idx
3636  *   Pointer to the index of the encap action if exists, otherwise the last
3637  *   action index.
3638  *
3639  * @return
3640  *   Total number of actions.
3641  */
3642 static int
3643 flow_parse_metadata_split_actions_info(const struct rte_flow_action actions[],
3644                                        const struct rte_flow_action **qrss,
3645                                        int *encap_idx)
3646 {
3647         const struct rte_flow_action_raw_encap *raw_encap;
3648         int actions_n = 0;
3649         int raw_decap_idx = -1;
3650
3651         *encap_idx = -1;
3652         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
3653                 switch (actions->type) {
3654                 case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
3655                 case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
3656                         *encap_idx = actions_n;
3657                         break;
3658                 case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
3659                         raw_decap_idx = actions_n;
3660                         break;
3661                 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
3662                         raw_encap = actions->conf;
3663                         if (raw_encap->size > MLX5_ENCAPSULATION_DECISION_SIZE)
3664                                 *encap_idx = raw_decap_idx != -1 ?
3665                                                       raw_decap_idx : actions_n;
3666                         break;
3667                 case RTE_FLOW_ACTION_TYPE_QUEUE:
3668                 case RTE_FLOW_ACTION_TYPE_RSS:
3669                         *qrss = actions;
3670                         break;
3671                 default:
3672                         break;
3673                 }
3674                 actions_n++;
3675         }
3676         if (*encap_idx == -1)
3677                 *encap_idx = actions_n;
3678         /* Count RTE_FLOW_ACTION_TYPE_END. */
3679         return actions_n + 1;
3680 }
3681
3682 /**
3683  * Check meter action from the action list.
3684  *
3685  * @param[in] actions
3686  *   Pointer to the list of actions.
3687  * @param[out] mtr
3688  *   Pointer to the meter exist flag.
3689  *
3690  * @return
3691  *   Total number of actions.
3692  */
3693 static int
3694 flow_check_meter_action(const struct rte_flow_action actions[], uint32_t *mtr)
3695 {
3696         int actions_n = 0;
3697
3698         MLX5_ASSERT(mtr);
3699         *mtr = 0;
3700         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
3701                 switch (actions->type) {
3702                 case RTE_FLOW_ACTION_TYPE_METER:
3703                         *mtr = 1;
3704                         break;
3705                 default:
3706                         break;
3707                 }
3708                 actions_n++;
3709         }
3710         /* Count RTE_FLOW_ACTION_TYPE_END. */
3711         return actions_n + 1;
3712 }
3713
3714 /**
3715  * Check if the flow should be split due to hairpin.
3716  * The reason for the split is that in current HW we can't
3717  * support encap and push-vlan on Rx, so if a flow contains
3718  * these actions we move it to Tx.
3719  *
3720  * @param dev
3721  *   Pointer to Ethernet device.
3722  * @param[in] attr
3723  *   Flow rule attributes.
3724  * @param[in] actions
3725  *   Associated actions (list terminated by the END action).
3726  *
3727  * @return
3728  *   > 0 the number of actions and the flow should be split,
3729  *   0 when no split required.
3730  */
3731 static int
3732 flow_check_hairpin_split(struct rte_eth_dev *dev,
3733                          const struct rte_flow_attr *attr,
3734                          const struct rte_flow_action actions[])
3735 {
3736         int queue_action = 0;
3737         int action_n = 0;
3738         int split = 0;
3739         const struct rte_flow_action_queue *queue;
3740         const struct rte_flow_action_rss *rss;
3741         const struct rte_flow_action_raw_encap *raw_encap;
3742         const struct rte_eth_hairpin_conf *conf;
3743
3744         if (!attr->ingress)
3745                 return 0;
3746         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
3747                 switch (actions->type) {
3748                 case RTE_FLOW_ACTION_TYPE_QUEUE:
3749                         queue = actions->conf;
3750                         if (queue == NULL)
3751                                 return 0;
3752                         conf = mlx5_rxq_get_hairpin_conf(dev, queue->index);
3753                         if (conf == NULL || conf->tx_explicit != 0)
3754                                 return 0;
3755                         queue_action = 1;
3756                         action_n++;
3757                         break;
3758                 case RTE_FLOW_ACTION_TYPE_RSS:
3759                         rss = actions->conf;
3760                         if (rss == NULL || rss->queue_num == 0)
3761                                 return 0;
3762                         conf = mlx5_rxq_get_hairpin_conf(dev, rss->queue[0]);
3763                         if (conf == NULL || conf->tx_explicit != 0)
3764                                 return 0;
3765                         queue_action = 1;
3766                         action_n++;
3767                         break;
3768                 case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
3769                 case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
3770                 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
3771                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
3772                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
3773                         split++;
3774                         action_n++;
3775                         break;
3776                 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
3777                         raw_encap = actions->conf;
3778                         if (raw_encap->size > MLX5_ENCAPSULATION_DECISION_SIZE)
3779                                 split++;
3780                         action_n++;
3781                         break;
3782                 default:
3783                         action_n++;
3784                         break;
3785                 }
3786         }
3787         if (split && queue_action)
3788                 return action_n;
3789         return 0;
3790 }
3791
3792 /* Declare flow create/destroy prototype in advance. */
3793 static uint32_t
3794 flow_list_create(struct rte_eth_dev *dev, uint32_t *list,
3795                  const struct rte_flow_attr *attr,
3796                  const struct rte_flow_item items[],
3797                  const struct rte_flow_action actions[],
3798                  bool external, struct rte_flow_error *error);
3799
3800 static void
3801 flow_list_destroy(struct rte_eth_dev *dev, uint32_t *list,
3802                   uint32_t flow_idx);
3803
3804 int
3805 flow_dv_mreg_match_cb(struct mlx5_hlist *list __rte_unused,
3806                       struct mlx5_hlist_entry *entry,
3807                       uint64_t key, void *cb_ctx __rte_unused)
3808 {
3809         struct mlx5_flow_mreg_copy_resource *mcp_res =
3810                 container_of(entry, typeof(*mcp_res), hlist_ent);
3811
3812         return mcp_res->mark_id != key;
3813 }
3814
3815 struct mlx5_hlist_entry *
3816 flow_dv_mreg_create_cb(struct mlx5_hlist *list, uint64_t key,
3817                        void *cb_ctx)
3818 {
3819         struct rte_eth_dev *dev = list->ctx;
3820         struct mlx5_priv *priv = dev->data->dev_private;
3821         struct mlx5_flow_cb_ctx *ctx = cb_ctx;
3822         struct mlx5_flow_mreg_copy_resource *mcp_res;
3823         struct rte_flow_error *error = ctx->error;
3824         uint32_t idx = 0;
3825         int ret;
3826         uint32_t mark_id = key;
3827         struct rte_flow_attr attr = {
3828                 .group = MLX5_FLOW_MREG_CP_TABLE_GROUP,
3829                 .ingress = 1,
3830         };
3831         struct mlx5_rte_flow_item_tag tag_spec = {
3832                 .data = mark_id,
3833         };
3834         struct rte_flow_item items[] = {
3835                 [1] = { .type = RTE_FLOW_ITEM_TYPE_END, },
3836         };
3837         struct rte_flow_action_mark ftag = {
3838                 .id = mark_id,
3839         };
3840         struct mlx5_flow_action_copy_mreg cp_mreg = {
3841                 .dst = REG_B,
3842                 .src = REG_NON,
3843         };
3844         struct rte_flow_action_jump jump = {
3845                 .group = MLX5_FLOW_MREG_ACT_TABLE_GROUP,
3846         };
3847         struct rte_flow_action actions[] = {
3848                 [3] = { .type = RTE_FLOW_ACTION_TYPE_END, },
3849         };
3850
3851         /* Fill the register fileds in the flow. */
3852         ret = mlx5_flow_get_reg_id(dev, MLX5_FLOW_MARK, 0, error);
3853         if (ret < 0)
3854                 return NULL;
3855         tag_spec.id = ret;
3856         ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_RX, 0, error);
3857         if (ret < 0)
3858                 return NULL;
3859         cp_mreg.src = ret;
3860         /* Provide the full width of FLAG specific value. */
3861         if (mark_id == (priv->sh->dv_regc0_mask & MLX5_FLOW_MARK_DEFAULT))
3862                 tag_spec.data = MLX5_FLOW_MARK_DEFAULT;
3863         /* Build a new flow. */
3864         if (mark_id != MLX5_DEFAULT_COPY_ID) {
3865                 items[0] = (struct rte_flow_item){
3866                         .type = (enum rte_flow_item_type)
3867                                 MLX5_RTE_FLOW_ITEM_TYPE_TAG,
3868                         .spec = &tag_spec,
3869                 };
3870                 items[1] = (struct rte_flow_item){
3871                         .type = RTE_FLOW_ITEM_TYPE_END,
3872                 };
3873                 actions[0] = (struct rte_flow_action){
3874                         .type = (enum rte_flow_action_type)
3875                                 MLX5_RTE_FLOW_ACTION_TYPE_MARK,
3876                         .conf = &ftag,
3877                 };
3878                 actions[1] = (struct rte_flow_action){
3879                         .type = (enum rte_flow_action_type)
3880                                 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
3881                         .conf = &cp_mreg,
3882                 };
3883                 actions[2] = (struct rte_flow_action){
3884                         .type = RTE_FLOW_ACTION_TYPE_JUMP,
3885                         .conf = &jump,
3886                 };
3887                 actions[3] = (struct rte_flow_action){
3888                         .type = RTE_FLOW_ACTION_TYPE_END,
3889                 };
3890         } else {
3891                 /* Default rule, wildcard match. */
3892                 attr.priority = MLX5_FLOW_LOWEST_PRIO_INDICATOR;
3893                 items[0] = (struct rte_flow_item){
3894                         .type = RTE_FLOW_ITEM_TYPE_END,
3895                 };
3896                 actions[0] = (struct rte_flow_action){
3897                         .type = (enum rte_flow_action_type)
3898                                 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
3899                         .conf = &cp_mreg,
3900                 };
3901                 actions[1] = (struct rte_flow_action){
3902                         .type = RTE_FLOW_ACTION_TYPE_JUMP,
3903                         .conf = &jump,
3904                 };
3905                 actions[2] = (struct rte_flow_action){
3906                         .type = RTE_FLOW_ACTION_TYPE_END,
3907                 };
3908         }
3909         /* Build a new entry. */
3910         mcp_res = mlx5_ipool_zmalloc(priv->sh->ipool[MLX5_IPOOL_MCP], &idx);
3911         if (!mcp_res) {
3912                 rte_errno = ENOMEM;
3913                 return NULL;
3914         }
3915         mcp_res->idx = idx;
3916         mcp_res->mark_id = mark_id;
3917         /*
3918          * The copy Flows are not included in any list. There
3919          * ones are referenced from other Flows and can not
3920          * be applied, removed, deleted in ardbitrary order
3921          * by list traversing.
3922          */
3923         mcp_res->rix_flow = flow_list_create(dev, NULL, &attr, items,
3924                                          actions, false, error);
3925         if (!mcp_res->rix_flow) {
3926                 mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MCP], idx);
3927                 return NULL;
3928         }
3929         return &mcp_res->hlist_ent;
3930 }
3931
3932 /**
3933  * Add a flow of copying flow metadata registers in RX_CP_TBL.
3934  *
3935  * As mark_id is unique, if there's already a registered flow for the mark_id,
3936  * return by increasing the reference counter of the resource. Otherwise, create
3937  * the resource (mcp_res) and flow.
3938  *
3939  * Flow looks like,
3940  *   - If ingress port is ANY and reg_c[1] is mark_id,
3941  *     flow_tag := mark_id, reg_b := reg_c[0] and jump to RX_ACT_TBL.
3942  *
3943  * For default flow (zero mark_id), flow is like,
3944  *   - If ingress port is ANY,
3945  *     reg_b := reg_c[0] and jump to RX_ACT_TBL.
3946  *
3947  * @param dev
3948  *   Pointer to Ethernet device.
3949  * @param mark_id
3950  *   ID of MARK action, zero means default flow for META.
3951  * @param[out] error
3952  *   Perform verbose error reporting if not NULL.
3953  *
3954  * @return
3955  *   Associated resource on success, NULL otherwise and rte_errno is set.
3956  */
3957 static struct mlx5_flow_mreg_copy_resource *
3958 flow_mreg_add_copy_action(struct rte_eth_dev *dev, uint32_t mark_id,
3959                           struct rte_flow_error *error)
3960 {
3961         struct mlx5_priv *priv = dev->data->dev_private;
3962         struct mlx5_hlist_entry *entry;
3963         struct mlx5_flow_cb_ctx ctx = {
3964                 .dev = dev,
3965                 .error = error,
3966         };
3967
3968         /* Check if already registered. */
3969         MLX5_ASSERT(priv->mreg_cp_tbl);
3970         entry = mlx5_hlist_register(priv->mreg_cp_tbl, mark_id, &ctx);
3971         if (!entry)
3972                 return NULL;
3973         return container_of(entry, struct mlx5_flow_mreg_copy_resource,
3974                             hlist_ent);
3975 }
3976
3977 void
3978 flow_dv_mreg_remove_cb(struct mlx5_hlist *list, struct mlx5_hlist_entry *entry)
3979 {
3980         struct mlx5_flow_mreg_copy_resource *mcp_res =
3981                 container_of(entry, typeof(*mcp_res), hlist_ent);
3982         struct rte_eth_dev *dev = list->ctx;
3983         struct mlx5_priv *priv = dev->data->dev_private;
3984
3985         MLX5_ASSERT(mcp_res->rix_flow);
3986         flow_list_destroy(dev, NULL, mcp_res->rix_flow);
3987         mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MCP], mcp_res->idx);
3988 }
3989
3990 /**
3991  * Release flow in RX_CP_TBL.
3992  *
3993  * @param dev
3994  *   Pointer to Ethernet device.
3995  * @flow
3996  *   Parent flow for wich copying is provided.
3997  */
3998 static void
3999 flow_mreg_del_copy_action(struct rte_eth_dev *dev,
4000                           struct rte_flow *flow)
4001 {
4002         struct mlx5_flow_mreg_copy_resource *mcp_res;
4003         struct mlx5_priv *priv = dev->data->dev_private;
4004
4005         if (!flow->rix_mreg_copy)
4006                 return;
4007         mcp_res = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_MCP],
4008                                  flow->rix_mreg_copy);
4009         if (!mcp_res || !priv->mreg_cp_tbl)
4010                 return;
4011         MLX5_ASSERT(mcp_res->rix_flow);
4012         mlx5_hlist_unregister(priv->mreg_cp_tbl, &mcp_res->hlist_ent);
4013         flow->rix_mreg_copy = 0;
4014 }
4015
4016 /**
4017  * Remove the default copy action from RX_CP_TBL.
4018  *
4019  * This functions is called in the mlx5_dev_start(). No thread safe
4020  * is guaranteed.
4021  *
4022  * @param dev
4023  *   Pointer to Ethernet device.
4024  */
4025 static void
4026 flow_mreg_del_default_copy_action(struct rte_eth_dev *dev)
4027 {
4028         struct mlx5_hlist_entry *entry;
4029         struct mlx5_priv *priv = dev->data->dev_private;
4030
4031         /* Check if default flow is registered. */
4032         if (!priv->mreg_cp_tbl)
4033                 return;
4034         entry = mlx5_hlist_lookup(priv->mreg_cp_tbl,
4035                                   MLX5_DEFAULT_COPY_ID, NULL);
4036         if (!entry)
4037                 return;
4038         mlx5_hlist_unregister(priv->mreg_cp_tbl, entry);
4039 }
4040
4041 /**
4042  * Add the default copy action in in RX_CP_TBL.
4043  *
4044  * This functions is called in the mlx5_dev_start(). No thread safe
4045  * is guaranteed.
4046  *
4047  * @param dev
4048  *   Pointer to Ethernet device.
4049  * @param[out] error
4050  *   Perform verbose error reporting if not NULL.
4051  *
4052  * @return
4053  *   0 for success, negative value otherwise and rte_errno is set.
4054  */
4055 static int
4056 flow_mreg_add_default_copy_action(struct rte_eth_dev *dev,
4057                                   struct rte_flow_error *error)
4058 {
4059         struct mlx5_priv *priv = dev->data->dev_private;
4060         struct mlx5_flow_mreg_copy_resource *mcp_res;
4061
4062         /* Check whether extensive metadata feature is engaged. */
4063         if (!priv->config.dv_flow_en ||
4064             priv->config.dv_xmeta_en == MLX5_XMETA_MODE_LEGACY ||
4065             !mlx5_flow_ext_mreg_supported(dev) ||
4066             !priv->sh->dv_regc0_mask)
4067                 return 0;
4068         /*
4069          * Add default mreg copy flow may be called multiple time, but
4070          * only be called once in stop. Avoid register it twice.
4071          */
4072         if (mlx5_hlist_lookup(priv->mreg_cp_tbl, MLX5_DEFAULT_COPY_ID, NULL))
4073                 return 0;
4074         mcp_res = flow_mreg_add_copy_action(dev, MLX5_DEFAULT_COPY_ID, error);
4075         if (!mcp_res)
4076                 return -rte_errno;
4077         return 0;
4078 }
4079
4080 /**
4081  * Add a flow of copying flow metadata registers in RX_CP_TBL.
4082  *
4083  * All the flow having Q/RSS action should be split by
4084  * flow_mreg_split_qrss_prep() to pass by RX_CP_TBL. A flow in the RX_CP_TBL
4085  * performs the following,
4086  *   - CQE->flow_tag := reg_c[1] (MARK)
4087  *   - CQE->flow_table_metadata (reg_b) := reg_c[0] (META)
4088  * As CQE's flow_tag is not a register, it can't be simply copied from reg_c[1]
4089  * but there should be a flow per each MARK ID set by MARK action.
4090  *
4091  * For the aforementioned reason, if there's a MARK action in flow's action
4092  * list, a corresponding flow should be added to the RX_CP_TBL in order to copy
4093  * the MARK ID to CQE's flow_tag like,
4094  *   - If reg_c[1] is mark_id,
4095  *     flow_tag := mark_id, reg_b := reg_c[0] and jump to RX_ACT_TBL.
4096  *
4097  * For SET_META action which stores value in reg_c[0], as the destination is
4098  * also a flow metadata register (reg_b), adding a default flow is enough. Zero
4099  * MARK ID means the default flow. The default flow looks like,
4100  *   - For all flow, reg_b := reg_c[0] and jump to RX_ACT_TBL.
4101  *
4102  * @param dev
4103  *   Pointer to Ethernet device.
4104  * @param flow
4105  *   Pointer to flow structure.
4106  * @param[in] actions
4107  *   Pointer to the list of actions.
4108  * @param[out] error
4109  *   Perform verbose error reporting if not NULL.
4110  *
4111  * @return
4112  *   0 on success, negative value otherwise and rte_errno is set.
4113  */
4114 static int
4115 flow_mreg_update_copy_table(struct rte_eth_dev *dev,
4116                             struct rte_flow *flow,
4117                             const struct rte_flow_action *actions,
4118                             struct rte_flow_error *error)
4119 {
4120         struct mlx5_priv *priv = dev->data->dev_private;
4121         struct mlx5_dev_config *config = &priv->config;
4122         struct mlx5_flow_mreg_copy_resource *mcp_res;
4123         const struct rte_flow_action_mark *mark;
4124
4125         /* Check whether extensive metadata feature is engaged. */
4126         if (!config->dv_flow_en ||
4127             config->dv_xmeta_en == MLX5_XMETA_MODE_LEGACY ||
4128             !mlx5_flow_ext_mreg_supported(dev) ||
4129             !priv->sh->dv_regc0_mask)
4130                 return 0;
4131         /* Find MARK action. */
4132         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
4133                 switch (actions->type) {
4134                 case RTE_FLOW_ACTION_TYPE_FLAG:
4135                         mcp_res = flow_mreg_add_copy_action
4136                                 (dev, MLX5_FLOW_MARK_DEFAULT, error);
4137                         if (!mcp_res)
4138                                 return -rte_errno;
4139                         flow->rix_mreg_copy = mcp_res->idx;
4140                         return 0;
4141                 case RTE_FLOW_ACTION_TYPE_MARK:
4142                         mark = (const struct rte_flow_action_mark *)
4143                                 actions->conf;
4144                         mcp_res =
4145                                 flow_mreg_add_copy_action(dev, mark->id, error);
4146                         if (!mcp_res)
4147                                 return -rte_errno;
4148                         flow->rix_mreg_copy = mcp_res->idx;
4149                         return 0;
4150                 default:
4151                         break;
4152                 }
4153         }
4154         return 0;
4155 }
4156
4157 #define MLX5_MAX_SPLIT_ACTIONS 24
4158 #define MLX5_MAX_SPLIT_ITEMS 24
4159
4160 /**
4161  * Split the hairpin flow.
4162  * Since HW can't support encap and push-vlan on Rx, we move these
4163  * actions to Tx.
4164  * If the count action is after the encap then we also
4165  * move the count action. in this case the count will also measure
4166  * the outer bytes.
4167  *
4168  * @param dev
4169  *   Pointer to Ethernet device.
4170  * @param[in] actions
4171  *   Associated actions (list terminated by the END action).
4172  * @param[out] actions_rx
4173  *   Rx flow actions.
4174  * @param[out] actions_tx
4175  *   Tx flow actions..
4176  * @param[out] pattern_tx
4177  *   The pattern items for the Tx flow.
4178  * @param[out] flow_id
4179  *   The flow ID connected to this flow.
4180  *
4181  * @return
4182  *   0 on success.
4183  */
4184 static int
4185 flow_hairpin_split(struct rte_eth_dev *dev,
4186                    const struct rte_flow_action actions[],
4187                    struct rte_flow_action actions_rx[],
4188                    struct rte_flow_action actions_tx[],
4189                    struct rte_flow_item pattern_tx[],
4190                    uint32_t flow_id)
4191 {
4192         const struct rte_flow_action_raw_encap *raw_encap;
4193         const struct rte_flow_action_raw_decap *raw_decap;
4194         struct mlx5_rte_flow_action_set_tag *set_tag;
4195         struct rte_flow_action *tag_action;
4196         struct mlx5_rte_flow_item_tag *tag_item;
4197         struct rte_flow_item *item;
4198         char *addr;
4199         int encap = 0;
4200
4201         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
4202                 switch (actions->type) {
4203                 case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
4204                 case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
4205                 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
4206                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
4207                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
4208                         rte_memcpy(actions_tx, actions,
4209                                sizeof(struct rte_flow_action));
4210                         actions_tx++;
4211                         break;
4212                 case RTE_FLOW_ACTION_TYPE_COUNT:
4213                         if (encap) {
4214                                 rte_memcpy(actions_tx, actions,
4215                                            sizeof(struct rte_flow_action));
4216                                 actions_tx++;
4217                         } else {
4218                                 rte_memcpy(actions_rx, actions,
4219                                            sizeof(struct rte_flow_action));
4220                                 actions_rx++;
4221                         }
4222                         break;
4223                 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
4224                         raw_encap = actions->conf;
4225                         if (raw_encap->size > MLX5_ENCAPSULATION_DECISION_SIZE) {
4226                                 memcpy(actions_tx, actions,
4227                                        sizeof(struct rte_flow_action));
4228                                 actions_tx++;
4229                                 encap = 1;
4230                         } else {
4231                                 rte_memcpy(actions_rx, actions,
4232                                            sizeof(struct rte_flow_action));
4233                                 actions_rx++;
4234                         }
4235                         break;
4236                 case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
4237                         raw_decap = actions->conf;
4238                         if (raw_decap->size < MLX5_ENCAPSULATION_DECISION_SIZE) {
4239                                 memcpy(actions_tx, actions,
4240                                        sizeof(struct rte_flow_action));
4241                                 actions_tx++;
4242                         } else {
4243                                 rte_memcpy(actions_rx, actions,
4244                                            sizeof(struct rte_flow_action));
4245                                 actions_rx++;
4246                         }
4247                         break;
4248                 default:
4249                         rte_memcpy(actions_rx, actions,
4250                                    sizeof(struct rte_flow_action));
4251                         actions_rx++;
4252                         break;
4253                 }
4254         }
4255         /* Add set meta action and end action for the Rx flow. */
4256         tag_action = actions_rx;
4257         tag_action->type = (enum rte_flow_action_type)
4258                            MLX5_RTE_FLOW_ACTION_TYPE_TAG;
4259         actions_rx++;
4260         rte_memcpy(actions_rx, actions, sizeof(struct rte_flow_action));
4261         actions_rx++;
4262         set_tag = (void *)actions_rx;
4263         set_tag->id = mlx5_flow_get_reg_id(dev, MLX5_HAIRPIN_RX, 0, NULL);
4264         MLX5_ASSERT(set_tag->id > REG_NON);
4265         set_tag->data = flow_id;
4266         tag_action->conf = set_tag;
4267         /* Create Tx item list. */
4268         rte_memcpy(actions_tx, actions, sizeof(struct rte_flow_action));
4269         addr = (void *)&pattern_tx[2];
4270         item = pattern_tx;
4271         item->type = (enum rte_flow_item_type)
4272                      MLX5_RTE_FLOW_ITEM_TYPE_TAG;
4273         tag_item = (void *)addr;
4274         tag_item->data = flow_id;
4275         tag_item->id = mlx5_flow_get_reg_id(dev, MLX5_HAIRPIN_TX, 0, NULL);
4276         MLX5_ASSERT(set_tag->id > REG_NON);
4277         item->spec = tag_item;
4278         addr += sizeof(struct mlx5_rte_flow_item_tag);
4279         tag_item = (void *)addr;
4280         tag_item->data = UINT32_MAX;
4281         tag_item->id = UINT16_MAX;
4282         item->mask = tag_item;
4283         item->last = NULL;
4284         item++;
4285         item->type = RTE_FLOW_ITEM_TYPE_END;
4286         return 0;
4287 }
4288
4289 /**
4290  * The last stage of splitting chain, just creates the subflow
4291  * without any modification.
4292  *
4293  * @param[in] dev
4294  *   Pointer to Ethernet device.
4295  * @param[in] flow
4296  *   Parent flow structure pointer.
4297  * @param[in, out] sub_flow
4298  *   Pointer to return the created subflow, may be NULL.
4299  * @param[in] attr
4300  *   Flow rule attributes.
4301  * @param[in] items
4302  *   Pattern specification (list terminated by the END pattern item).
4303  * @param[in] actions
4304  *   Associated actions (list terminated by the END action).
4305  * @param[in] flow_split_info
4306  *   Pointer to flow split info structure.
4307  * @param[out] error
4308  *   Perform verbose error reporting if not NULL.
4309  * @return
4310  *   0 on success, negative value otherwise
4311  */
4312 static int
4313 flow_create_split_inner(struct rte_eth_dev *dev,
4314                         struct rte_flow *flow,
4315                         struct mlx5_flow **sub_flow,
4316                         const struct rte_flow_attr *attr,
4317                         const struct rte_flow_item items[],
4318                         const struct rte_flow_action actions[],
4319                         struct mlx5_flow_split_info *flow_split_info,
4320                         struct rte_flow_error *error)
4321 {
4322         struct mlx5_flow *dev_flow;
4323
4324         dev_flow = flow_drv_prepare(dev, flow, attr, items, actions,
4325                                     flow_split_info->flow_idx, error);
4326         if (!dev_flow)
4327                 return -rte_errno;
4328         dev_flow->flow = flow;
4329         dev_flow->external = flow_split_info->external;
4330         dev_flow->skip_scale = flow_split_info->skip_scale;
4331         /* Subflow object was created, we must include one in the list. */
4332         SILIST_INSERT(&flow->dev_handles, dev_flow->handle_idx,
4333                       dev_flow->handle, next);
4334         /*
4335          * If dev_flow is as one of the suffix flow, some actions in suffix
4336          * flow may need some user defined item layer flags, and pass the
4337          * Metadate rxq mark flag to suffix flow as well.
4338          */
4339         if (flow_split_info->prefix_layers)
4340                 dev_flow->handle->layers = flow_split_info->prefix_layers;
4341         if (flow_split_info->prefix_mark)
4342                 dev_flow->handle->mark = 1;
4343         if (sub_flow)
4344                 *sub_flow = dev_flow;
4345         return flow_drv_translate(dev, dev_flow, attr, items, actions, error);
4346 }
4347
4348 /**
4349  * Split the meter flow.
4350  *
4351  * As meter flow will split to three sub flow, other than meter
4352  * action, the other actions make sense to only meter accepts
4353  * the packet. If it need to be dropped, no other additional
4354  * actions should be take.
4355  *
4356  * One kind of special action which decapsulates the L3 tunnel
4357  * header will be in the prefix sub flow, as not to take the
4358  * L3 tunnel header into account.
4359  *
4360  * @param dev
4361  *   Pointer to Ethernet device.
4362  * @param[in] items
4363  *   Pattern specification (list terminated by the END pattern item).
4364  * @param[out] sfx_items
4365  *   Suffix flow match items (list terminated by the END pattern item).
4366  * @param[in] actions
4367  *   Associated actions (list terminated by the END action).
4368  * @param[out] actions_sfx
4369  *   Suffix flow actions.
4370  * @param[out] actions_pre
4371  *   Prefix flow actions.
4372  * @param[out] pattern_sfx
4373  *   The pattern items for the suffix flow.
4374  * @param[out] tag_sfx
4375  *   Pointer to suffix flow tag.
4376  *
4377  * @return
4378  *   0 on success.
4379  */
4380 static int
4381 flow_meter_split_prep(struct rte_eth_dev *dev,
4382                  const struct rte_flow_item items[],
4383                  struct rte_flow_item sfx_items[],
4384                  const struct rte_flow_action actions[],
4385                  struct rte_flow_action actions_sfx[],
4386                  struct rte_flow_action actions_pre[])
4387 {
4388         struct mlx5_priv *priv = dev->data->dev_private;
4389         struct rte_flow_action *tag_action = NULL;
4390         struct rte_flow_item *tag_item;
4391         struct mlx5_rte_flow_action_set_tag *set_tag;
4392         struct rte_flow_error error;
4393         const struct rte_flow_action_raw_encap *raw_encap;
4394         const struct rte_flow_action_raw_decap *raw_decap;
4395         struct mlx5_rte_flow_item_tag *tag_spec;
4396         struct mlx5_rte_flow_item_tag *tag_mask;
4397         uint32_t tag_id = 0;
4398         bool copy_vlan = false;
4399
4400         /* Prepare the actions for prefix and suffix flow. */
4401         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
4402                 struct rte_flow_action **action_cur = NULL;
4403
4404                 switch (actions->type) {
4405                 case RTE_FLOW_ACTION_TYPE_METER:
4406                         /* Add the extra tag action first. */
4407                         tag_action = actions_pre;
4408                         tag_action->type = (enum rte_flow_action_type)
4409                                            MLX5_RTE_FLOW_ACTION_TYPE_TAG;
4410                         actions_pre++;
4411                         action_cur = &actions_pre;
4412                         break;
4413                 case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP:
4414                 case RTE_FLOW_ACTION_TYPE_NVGRE_DECAP:
4415                         action_cur = &actions_pre;
4416                         break;
4417                 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
4418                         raw_encap = actions->conf;
4419                         if (raw_encap->size < MLX5_ENCAPSULATION_DECISION_SIZE)
4420                                 action_cur = &actions_pre;
4421                         break;
4422                 case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
4423                         raw_decap = actions->conf;
4424                         if (raw_decap->size > MLX5_ENCAPSULATION_DECISION_SIZE)
4425                                 action_cur = &actions_pre;
4426                         break;
4427                 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
4428                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
4429                         copy_vlan = true;
4430                         break;
4431                 default:
4432                         break;
4433                 }
4434                 if (!action_cur)
4435                         action_cur = &actions_sfx;
4436                 memcpy(*action_cur, actions, sizeof(struct rte_flow_action));
4437                 (*action_cur)++;
4438         }
4439         /* Add end action to the actions. */
4440         actions_sfx->type = RTE_FLOW_ACTION_TYPE_END;
4441         actions_pre->type = RTE_FLOW_ACTION_TYPE_END;
4442         actions_pre++;
4443         /* Set the tag. */
4444         set_tag = (void *)actions_pre;
4445         set_tag->id = mlx5_flow_get_reg_id(dev, MLX5_MTR_SFX, 0, &error);
4446         mlx5_ipool_malloc(priv->sh->ipool[MLX5_IPOOL_RSS_EXPANTION_FLOW_ID],
4447                           &tag_id);
4448         if (tag_id >= (1 << (sizeof(tag_id) * 8 - MLX5_MTR_COLOR_BITS))) {
4449                 DRV_LOG(ERR, "Port %u meter flow id exceed max limit.",
4450                         dev->data->port_id);
4451                 mlx5_ipool_free(priv->sh->ipool
4452                                 [MLX5_IPOOL_RSS_EXPANTION_FLOW_ID], tag_id);
4453                 return 0;
4454         } else if (!tag_id) {
4455                 return 0;
4456         }
4457         set_tag->data = tag_id << MLX5_MTR_COLOR_BITS;
4458         assert(tag_action);
4459         tag_action->conf = set_tag;
4460         /* Prepare the suffix subflow items. */
4461         tag_item = sfx_items++;
4462         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
4463                 int item_type = items->type;
4464
4465                 switch (item_type) {
4466                 case RTE_FLOW_ITEM_TYPE_PORT_ID:
4467                         memcpy(sfx_items, items, sizeof(*sfx_items));
4468                         sfx_items++;
4469                         break;
4470                 case RTE_FLOW_ITEM_TYPE_VLAN:
4471                         if (copy_vlan) {
4472                                 memcpy(sfx_items, items, sizeof(*sfx_items));
4473                                 /*
4474                                  * Convert to internal match item, it is used
4475                                  * for vlan push and set vid.
4476                                  */
4477                                 sfx_items->type = (enum rte_flow_item_type)
4478                                                   MLX5_RTE_FLOW_ITEM_TYPE_VLAN;
4479                                 sfx_items++;
4480                         }
4481                         break;
4482                 default:
4483                         break;
4484                 }
4485         }
4486         sfx_items->type = RTE_FLOW_ITEM_TYPE_END;
4487         sfx_items++;
4488         tag_spec = (struct mlx5_rte_flow_item_tag *)sfx_items;
4489         tag_spec->data = tag_id << MLX5_MTR_COLOR_BITS;
4490         tag_spec->id = mlx5_flow_get_reg_id(dev, MLX5_MTR_SFX, 0, &error);
4491         tag_mask = tag_spec + 1;
4492         tag_mask->data = 0xffffff00;
4493         tag_item->type = (enum rte_flow_item_type)
4494                          MLX5_RTE_FLOW_ITEM_TYPE_TAG;
4495         tag_item->spec = tag_spec;
4496         tag_item->last = NULL;
4497         tag_item->mask = tag_mask;
4498         return tag_id;
4499 }
4500
4501 /**
4502  * Split action list having QUEUE/RSS for metadata register copy.
4503  *
4504  * Once Q/RSS action is detected in user's action list, the flow action
4505  * should be split in order to copy metadata registers, which will happen in
4506  * RX_CP_TBL like,
4507  *   - CQE->flow_tag := reg_c[1] (MARK)
4508  *   - CQE->flow_table_metadata (reg_b) := reg_c[0] (META)
4509  * The Q/RSS action will be performed on RX_ACT_TBL after passing by RX_CP_TBL.
4510  * This is because the last action of each flow must be a terminal action
4511  * (QUEUE, RSS or DROP).
4512  *
4513  * Flow ID must be allocated to identify actions in the RX_ACT_TBL and it is
4514  * stored and kept in the mlx5_flow structure per each sub_flow.
4515  *
4516  * The Q/RSS action is replaced with,
4517  *   - SET_TAG, setting the allocated flow ID to reg_c[2].
4518  * And the following JUMP action is added at the end,
4519  *   - JUMP, to RX_CP_TBL.
4520  *
4521  * A flow to perform remained Q/RSS action will be created in RX_ACT_TBL by
4522  * flow_create_split_metadata() routine. The flow will look like,
4523  *   - If flow ID matches (reg_c[2]), perform Q/RSS.
4524  *
4525  * @param dev
4526  *   Pointer to Ethernet device.
4527  * @param[out] split_actions
4528  *   Pointer to store split actions to jump to CP_TBL.
4529  * @param[in] actions
4530  *   Pointer to the list of original flow actions.
4531  * @param[in] qrss
4532  *   Pointer to the Q/RSS action.
4533  * @param[in] actions_n
4534  *   Number of original actions.
4535  * @param[out] error
4536  *   Perform verbose error reporting if not NULL.
4537  *
4538  * @return
4539  *   non-zero unique flow_id on success, otherwise 0 and
4540  *   error/rte_error are set.
4541  */
4542 static uint32_t
4543 flow_mreg_split_qrss_prep(struct rte_eth_dev *dev,
4544                           struct rte_flow_action *split_actions,
4545                           const struct rte_flow_action *actions,
4546                           const struct rte_flow_action *qrss,
4547                           int actions_n, struct rte_flow_error *error)
4548 {
4549         struct mlx5_priv *priv = dev->data->dev_private;
4550         struct mlx5_rte_flow_action_set_tag *set_tag;
4551         struct rte_flow_action_jump *jump;
4552         const int qrss_idx = qrss - actions;
4553         uint32_t flow_id = 0;
4554         int ret = 0;
4555
4556         /*
4557          * Given actions will be split
4558          * - Replace QUEUE/RSS action with SET_TAG to set flow ID.
4559          * - Add jump to mreg CP_TBL.
4560          * As a result, there will be one more action.
4561          */
4562         ++actions_n;
4563         memcpy(split_actions, actions, sizeof(*split_actions) * actions_n);
4564         set_tag = (void *)(split_actions + actions_n);
4565         /*
4566          * If tag action is not set to void(it means we are not the meter
4567          * suffix flow), add the tag action. Since meter suffix flow already
4568          * has the tag added.
4569          */
4570         if (split_actions[qrss_idx].type != RTE_FLOW_ACTION_TYPE_VOID) {
4571                 /*
4572                  * Allocate the new subflow ID. This one is unique within
4573                  * device and not shared with representors. Otherwise,
4574                  * we would have to resolve multi-thread access synch
4575                  * issue. Each flow on the shared device is appended
4576                  * with source vport identifier, so the resulting
4577                  * flows will be unique in the shared (by master and
4578                  * representors) domain even if they have coinciding
4579                  * IDs.
4580                  */
4581                 mlx5_ipool_malloc(priv->sh->ipool
4582                                   [MLX5_IPOOL_RSS_EXPANTION_FLOW_ID], &flow_id);
4583                 if (!flow_id)
4584                         return rte_flow_error_set(error, ENOMEM,
4585                                                   RTE_FLOW_ERROR_TYPE_ACTION,
4586                                                   NULL, "can't allocate id "
4587                                                   "for split Q/RSS subflow");
4588                 /* Internal SET_TAG action to set flow ID. */
4589                 *set_tag = (struct mlx5_rte_flow_action_set_tag){
4590                         .data = flow_id,
4591                 };
4592                 ret = mlx5_flow_get_reg_id(dev, MLX5_COPY_MARK, 0, error);
4593                 if (ret < 0)
4594                         return ret;
4595                 set_tag->id = ret;
4596                 /* Construct new actions array. */
4597                 /* Replace QUEUE/RSS action. */
4598                 split_actions[qrss_idx] = (struct rte_flow_action){
4599                         .type = (enum rte_flow_action_type)
4600                                 MLX5_RTE_FLOW_ACTION_TYPE_TAG,
4601                         .conf = set_tag,
4602                 };
4603         }
4604         /* JUMP action to jump to mreg copy table (CP_TBL). */
4605         jump = (void *)(set_tag + 1);
4606         *jump = (struct rte_flow_action_jump){
4607                 .group = MLX5_FLOW_MREG_CP_TABLE_GROUP,
4608         };
4609         split_actions[actions_n - 2] = (struct rte_flow_action){
4610                 .type = RTE_FLOW_ACTION_TYPE_JUMP,
4611                 .conf = jump,
4612         };
4613         split_actions[actions_n - 1] = (struct rte_flow_action){
4614                 .type = RTE_FLOW_ACTION_TYPE_END,
4615         };
4616         return flow_id;
4617 }
4618
4619 /**
4620  * Extend the given action list for Tx metadata copy.
4621  *
4622  * Copy the given action list to the ext_actions and add flow metadata register
4623  * copy action in order to copy reg_a set by WQE to reg_c[0].
4624  *
4625  * @param[out] ext_actions
4626  *   Pointer to the extended action list.
4627  * @param[in] actions
4628  *   Pointer to the list of actions.
4629  * @param[in] actions_n
4630  *   Number of actions in the list.
4631  * @param[out] error
4632  *   Perform verbose error reporting if not NULL.
4633  * @param[in] encap_idx
4634  *   The encap action inndex.
4635  *
4636  * @return
4637  *   0 on success, negative value otherwise
4638  */
4639 static int
4640 flow_mreg_tx_copy_prep(struct rte_eth_dev *dev,
4641                        struct rte_flow_action *ext_actions,
4642                        const struct rte_flow_action *actions,
4643                        int actions_n, struct rte_flow_error *error,
4644                        int encap_idx)
4645 {
4646         struct mlx5_flow_action_copy_mreg *cp_mreg =
4647                 (struct mlx5_flow_action_copy_mreg *)
4648                         (ext_actions + actions_n + 1);
4649         int ret;
4650
4651         ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_RX, 0, error);
4652         if (ret < 0)
4653                 return ret;
4654         cp_mreg->dst = ret;
4655         ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_TX, 0, error);
4656         if (ret < 0)
4657                 return ret;
4658         cp_mreg->src = ret;
4659         if (encap_idx != 0)
4660                 memcpy(ext_actions, actions, sizeof(*ext_actions) * encap_idx);
4661         if (encap_idx == actions_n - 1) {
4662                 ext_actions[actions_n - 1] = (struct rte_flow_action){
4663                         .type = (enum rte_flow_action_type)
4664                                 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
4665                         .conf = cp_mreg,
4666                 };
4667                 ext_actions[actions_n] = (struct rte_flow_action){
4668                         .type = RTE_FLOW_ACTION_TYPE_END,
4669                 };
4670         } else {
4671                 ext_actions[encap_idx] = (struct rte_flow_action){
4672                         .type = (enum rte_flow_action_type)
4673                                 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
4674                         .conf = cp_mreg,
4675                 };
4676                 memcpy(ext_actions + encap_idx + 1, actions + encap_idx,
4677                                 sizeof(*ext_actions) * (actions_n - encap_idx));
4678         }
4679         return 0;
4680 }
4681
4682 /**
4683  * Check the match action from the action list.
4684  *
4685  * @param[in] actions
4686  *   Pointer to the list of actions.
4687  * @param[in] attr
4688  *   Flow rule attributes.
4689  * @param[in] action
4690  *   The action to be check if exist.
4691  * @param[out] match_action_pos
4692  *   Pointer to the position of the matched action if exists, otherwise is -1.
4693  * @param[out] qrss_action_pos
4694  *   Pointer to the position of the Queue/RSS action if exists, otherwise is -1.
4695  * @param[out] modify_after_mirror
4696  *   Pointer to the flag of modify action after FDB mirroring.
4697  *
4698  * @return
4699  *   > 0 the total number of actions.
4700  *   0 if not found match action in action list.
4701  */
4702 static int
4703 flow_check_match_action(const struct rte_flow_action actions[],
4704                         const struct rte_flow_attr *attr,
4705                         enum rte_flow_action_type action,
4706                         int *match_action_pos, int *qrss_action_pos,
4707                         int *modify_after_mirror)
4708 {
4709         const struct rte_flow_action_sample *sample;
4710         int actions_n = 0;
4711         uint32_t ratio = 0;
4712         int sub_type = 0;
4713         int flag = 0;
4714         int fdb_mirror = 0;
4715
4716         *match_action_pos = -1;
4717         *qrss_action_pos = -1;
4718         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
4719                 if (actions->type == action) {
4720                         flag = 1;
4721                         *match_action_pos = actions_n;
4722                 }
4723                 switch (actions->type) {
4724                 case RTE_FLOW_ACTION_TYPE_QUEUE:
4725                 case RTE_FLOW_ACTION_TYPE_RSS:
4726                         *qrss_action_pos = actions_n;
4727                         break;
4728                 case RTE_FLOW_ACTION_TYPE_SAMPLE:
4729                         sample = actions->conf;
4730                         ratio = sample->ratio;
4731                         sub_type = ((const struct rte_flow_action *)
4732                                         (sample->actions))->type;
4733                         if (ratio == 1 && attr->transfer)
4734                                 fdb_mirror = 1;
4735                         break;
4736                 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
4737                 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
4738                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
4739                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
4740                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
4741                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
4742                 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
4743                 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
4744                 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
4745                 case RTE_FLOW_ACTION_TYPE_SET_TTL:
4746                 case RTE_FLOW_ACTION_TYPE_INC_TCP_SEQ:
4747                 case RTE_FLOW_ACTION_TYPE_DEC_TCP_SEQ:
4748                 case RTE_FLOW_ACTION_TYPE_INC_TCP_ACK:
4749                 case RTE_FLOW_ACTION_TYPE_DEC_TCP_ACK:
4750                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DSCP:
4751                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DSCP:
4752                 case RTE_FLOW_ACTION_TYPE_FLAG:
4753                 case RTE_FLOW_ACTION_TYPE_MARK:
4754                 case RTE_FLOW_ACTION_TYPE_SET_META:
4755                 case RTE_FLOW_ACTION_TYPE_SET_TAG:
4756                         if (fdb_mirror)
4757                                 *modify_after_mirror = 1;
4758                         break;
4759                 default:
4760                         break;
4761                 }
4762                 actions_n++;
4763         }
4764         if (flag && fdb_mirror && !*modify_after_mirror) {
4765                 /* FDB mirroring uses the destination array to implement
4766                  * instead of FLOW_SAMPLER object.
4767                  */
4768                 if (sub_type != RTE_FLOW_ACTION_TYPE_END)
4769                         flag = 0;
4770         }
4771         /* Count RTE_FLOW_ACTION_TYPE_END. */
4772         return flag ? actions_n + 1 : 0;
4773 }
4774
4775 #define SAMPLE_SUFFIX_ITEM 2
4776
4777 /**
4778  * Split the sample flow.
4779  *
4780  * As sample flow will split to two sub flow, sample flow with
4781  * sample action, the other actions will move to new suffix flow.
4782  *
4783  * Also add unique tag id with tag action in the sample flow,
4784  * the same tag id will be as match in the suffix flow.
4785  *
4786  * @param dev
4787  *   Pointer to Ethernet device.
4788  * @param[in] add_tag
4789  *   Add extra tag action flag.
4790  * @param[out] sfx_items
4791  *   Suffix flow match items (list terminated by the END pattern item).
4792  * @param[in] actions
4793  *   Associated actions (list terminated by the END action).
4794  * @param[out] actions_sfx
4795  *   Suffix flow actions.
4796  * @param[out] actions_pre
4797  *   Prefix flow actions.
4798  * @param[in] actions_n
4799  *  The total number of actions.
4800  * @param[in] sample_action_pos
4801  *   The sample action position.
4802  * @param[in] qrss_action_pos
4803  *   The Queue/RSS action position.
4804  * @param[in] jump_table
4805  *   Add extra jump action flag.
4806  * @param[out] error
4807  *   Perform verbose error reporting if not NULL.
4808  *
4809  * @return
4810  *   0 on success, or unique flow_id, a negative errno value
4811  *   otherwise and rte_errno is set.
4812  */
4813 static int
4814 flow_sample_split_prep(struct rte_eth_dev *dev,
4815                        int add_tag,
4816                        struct rte_flow_item sfx_items[],
4817                        const struct rte_flow_action actions[],
4818                        struct rte_flow_action actions_sfx[],
4819                        struct rte_flow_action actions_pre[],
4820                        int actions_n,
4821                        int sample_action_pos,
4822                        int qrss_action_pos,
4823                        int jump_table,
4824                        struct rte_flow_error *error)
4825 {
4826         struct mlx5_priv *priv = dev->data->dev_private;
4827         struct mlx5_rte_flow_action_set_tag *set_tag;
4828         struct mlx5_rte_flow_item_tag *tag_spec;
4829         struct mlx5_rte_flow_item_tag *tag_mask;
4830         struct rte_flow_action_jump *jump_action;
4831         uint32_t tag_id = 0;
4832         int index;
4833         int append_index = 0;
4834         int ret;
4835
4836         if (sample_action_pos < 0)
4837                 return rte_flow_error_set(error, EINVAL,
4838                                           RTE_FLOW_ERROR_TYPE_ACTION,
4839                                           NULL, "invalid position of sample "
4840                                           "action in list");
4841         /* Prepare the actions for prefix and suffix flow. */
4842         if (qrss_action_pos >= 0 && qrss_action_pos < sample_action_pos) {
4843                 index = qrss_action_pos;
4844                 /* Put the preceding the Queue/RSS action into prefix flow. */
4845                 if (index != 0)
4846                         memcpy(actions_pre, actions,
4847                                sizeof(struct rte_flow_action) * index);
4848                 /* Put others preceding the sample action into prefix flow. */
4849                 if (sample_action_pos > index + 1)
4850                         memcpy(actions_pre + index, actions + index + 1,
4851                                sizeof(struct rte_flow_action) *
4852                                (sample_action_pos - index - 1));
4853                 index = sample_action_pos - 1;
4854                 /* Put Queue/RSS action into Suffix flow. */
4855                 memcpy(actions_sfx, actions + qrss_action_pos,
4856                        sizeof(struct rte_flow_action));
4857                 actions_sfx++;
4858         } else {
4859                 index = sample_action_pos;
4860                 if (index != 0)
4861                         memcpy(actions_pre, actions,
4862                                sizeof(struct rte_flow_action) * index);
4863         }
4864         /* For CX5, add an extra tag action for NIC-RX and E-Switch ingress.
4865          * For CX6DX and above, metadata registers Cx preserve their value,
4866          * add an extra tag action for NIC-RX and E-Switch Domain.
4867          */
4868         if (add_tag) {
4869                 /* Prepare the prefix tag action. */
4870                 append_index++;
4871                 set_tag = (void *)(actions_pre + actions_n + append_index);
4872                 ret = mlx5_flow_get_reg_id(dev, MLX5_APP_TAG, 0, error);
4873                 if (ret < 0)
4874                         return ret;
4875                 set_tag->id = ret;
4876                 mlx5_ipool_malloc(priv->sh->ipool
4877                                   [MLX5_IPOOL_RSS_EXPANTION_FLOW_ID], &tag_id);
4878                 set_tag->data = tag_id;
4879                 /* Prepare the suffix subflow items. */
4880                 tag_spec = (void *)(sfx_items + SAMPLE_SUFFIX_ITEM);
4881                 tag_spec->data = tag_id;
4882                 tag_spec->id = set_tag->id;
4883                 tag_mask = tag_spec + 1;
4884                 tag_mask->data = UINT32_MAX;
4885                 sfx_items[0] = (struct rte_flow_item){
4886                         .type = (enum rte_flow_item_type)
4887                                 MLX5_RTE_FLOW_ITEM_TYPE_TAG,
4888                         .spec = tag_spec,
4889                         .last = NULL,
4890                         .mask = tag_mask,
4891                 };
4892                 sfx_items[1] = (struct rte_flow_item){
4893                         .type = (enum rte_flow_item_type)
4894                                 RTE_FLOW_ITEM_TYPE_END,
4895                 };
4896                 /* Prepare the tag action in prefix subflow. */
4897                 actions_pre[index++] =
4898                         (struct rte_flow_action){
4899                         .type = (enum rte_flow_action_type)
4900                                 MLX5_RTE_FLOW_ACTION_TYPE_TAG,
4901                         .conf = set_tag,
4902                 };
4903         }
4904         memcpy(actions_pre + index, actions + sample_action_pos,
4905                sizeof(struct rte_flow_action));
4906         index += 1;
4907         /* For the modify action after the sample action in E-Switch mirroring,
4908          * Add the extra jump action in prefix subflow and jump into the next
4909          * table, then do the modify action in the new table.
4910          */
4911         if (jump_table) {
4912                 /* Prepare the prefix jump action. */
4913                 append_index++;
4914                 jump_action = (void *)(actions_pre + actions_n + append_index);
4915                 jump_action->group = jump_table;
4916                 actions_pre[index++] =
4917                         (struct rte_flow_action){
4918                         .type = (enum rte_flow_action_type)
4919                                 RTE_FLOW_ACTION_TYPE_JUMP,
4920                         .conf = jump_action,
4921                 };
4922         }
4923         actions_pre[index] = (struct rte_flow_action){
4924                 .type = (enum rte_flow_action_type)
4925                         RTE_FLOW_ACTION_TYPE_END,
4926         };
4927         /* Put the actions after sample into Suffix flow. */
4928         memcpy(actions_sfx, actions + sample_action_pos + 1,
4929                sizeof(struct rte_flow_action) *
4930                (actions_n - sample_action_pos - 1));
4931         return tag_id;
4932 }
4933
4934 /**
4935  * The splitting for metadata feature.
4936  *
4937  * - Q/RSS action on NIC Rx should be split in order to pass by
4938  *   the mreg copy table (RX_CP_TBL) and then it jumps to the
4939  *   action table (RX_ACT_TBL) which has the split Q/RSS action.
4940  *
4941  * - All the actions on NIC Tx should have a mreg copy action to
4942  *   copy reg_a from WQE to reg_c[0].
4943  *
4944  * @param dev
4945  *   Pointer to Ethernet device.
4946  * @param[in] flow
4947  *   Parent flow structure pointer.
4948  * @param[in] attr
4949  *   Flow rule attributes.
4950  * @param[in] items
4951  *   Pattern specification (list terminated by the END pattern item).
4952  * @param[in] actions
4953  *   Associated actions (list terminated by the END action).
4954  * @param[in] flow_split_info
4955  *   Pointer to flow split info structure.
4956  * @param[out] error
4957  *   Perform verbose error reporting if not NULL.
4958  * @return
4959  *   0 on success, negative value otherwise
4960  */
4961 static int
4962 flow_create_split_metadata(struct rte_eth_dev *dev,
4963                            struct rte_flow *flow,
4964                            const struct rte_flow_attr *attr,
4965                            const struct rte_flow_item items[],
4966                            const struct rte_flow_action actions[],
4967                            struct mlx5_flow_split_info *flow_split_info,
4968                            struct rte_flow_error *error)
4969 {
4970         struct mlx5_priv *priv = dev->data->dev_private;
4971         struct mlx5_dev_config *config = &priv->config;
4972         const struct rte_flow_action *qrss = NULL;
4973         struct rte_flow_action *ext_actions = NULL;
4974         struct mlx5_flow *dev_flow = NULL;
4975         uint32_t qrss_id = 0;
4976         int mtr_sfx = 0;
4977         size_t act_size;
4978         int actions_n;
4979         int encap_idx;
4980         int ret;
4981
4982         /* Check whether extensive metadata feature is engaged. */
4983         if (!config->dv_flow_en ||
4984             config->dv_xmeta_en == MLX5_XMETA_MODE_LEGACY ||
4985             !mlx5_flow_ext_mreg_supported(dev))
4986                 return flow_create_split_inner(dev, flow, NULL, attr, items,
4987                                                actions, flow_split_info, error);
4988         actions_n = flow_parse_metadata_split_actions_info(actions, &qrss,
4989                                                            &encap_idx);
4990         if (qrss) {
4991                 /* Exclude hairpin flows from splitting. */
4992                 if (qrss->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
4993                         const struct rte_flow_action_queue *queue;
4994
4995                         queue = qrss->conf;
4996                         if (mlx5_rxq_get_type(dev, queue->index) ==
4997                             MLX5_RXQ_TYPE_HAIRPIN)
4998                                 qrss = NULL;
4999                 } else if (qrss->type == RTE_FLOW_ACTION_TYPE_RSS) {
5000                         const struct rte_flow_action_rss *rss;
5001
5002                         rss = qrss->conf;
5003                         if (mlx5_rxq_get_type(dev, rss->queue[0]) ==
5004                             MLX5_RXQ_TYPE_HAIRPIN)
5005                                 qrss = NULL;
5006                 }
5007         }
5008         if (qrss) {
5009                 /* Check if it is in meter suffix table. */
5010                 mtr_sfx = attr->group == (attr->transfer ?
5011                           (MLX5_FLOW_TABLE_LEVEL_SUFFIX - 1) :
5012                           MLX5_FLOW_TABLE_LEVEL_SUFFIX);
5013                 /*
5014                  * Q/RSS action on NIC Rx should be split in order to pass by
5015                  * the mreg copy table (RX_CP_TBL) and then it jumps to the
5016                  * action table (RX_ACT_TBL) which has the split Q/RSS action.
5017                  */
5018                 act_size = sizeof(struct rte_flow_action) * (actions_n + 1) +
5019                            sizeof(struct rte_flow_action_set_tag) +
5020                            sizeof(struct rte_flow_action_jump);
5021                 ext_actions = mlx5_malloc(MLX5_MEM_ZERO, act_size, 0,
5022                                           SOCKET_ID_ANY);
5023                 if (!ext_actions)
5024                         return rte_flow_error_set(error, ENOMEM,
5025                                                   RTE_FLOW_ERROR_TYPE_ACTION,
5026                                                   NULL, "no memory to split "
5027                                                   "metadata flow");
5028                 /*
5029                  * If we are the suffix flow of meter, tag already exist.
5030                  * Set the tag action to void.
5031                  */
5032                 if (mtr_sfx)
5033                         ext_actions[qrss - actions].type =
5034                                                 RTE_FLOW_ACTION_TYPE_VOID;
5035                 else
5036                         ext_actions[qrss - actions].type =
5037                                                 (enum rte_flow_action_type)
5038                                                 MLX5_RTE_FLOW_ACTION_TYPE_TAG;
5039                 /*
5040                  * Create the new actions list with removed Q/RSS action
5041                  * and appended set tag and jump to register copy table
5042                  * (RX_CP_TBL). We should preallocate unique tag ID here
5043                  * in advance, because it is needed for set tag action.
5044                  */
5045                 qrss_id = flow_mreg_split_qrss_prep(dev, ext_actions, actions,
5046                                                     qrss, actions_n, error);
5047                 if (!mtr_sfx && !qrss_id) {
5048                         ret = -rte_errno;
5049                         goto exit;
5050                 }
5051         } else if (attr->egress && !attr->transfer) {
5052                 /*
5053                  * All the actions on NIC Tx should have a metadata register
5054                  * copy action to copy reg_a from WQE to reg_c[meta]
5055                  */
5056                 act_size = sizeof(struct rte_flow_action) * (actions_n + 1) +
5057                            sizeof(struct mlx5_flow_action_copy_mreg);
5058                 ext_actions = mlx5_malloc(MLX5_MEM_ZERO, act_size, 0,
5059                                           SOCKET_ID_ANY);
5060                 if (!ext_actions)
5061                         return rte_flow_error_set(error, ENOMEM,
5062                                                   RTE_FLOW_ERROR_TYPE_ACTION,
5063                                                   NULL, "no memory to split "
5064                                                   "metadata flow");
5065                 /* Create the action list appended with copy register. */
5066                 ret = flow_mreg_tx_copy_prep(dev, ext_actions, actions,
5067                                              actions_n, error, encap_idx);
5068                 if (ret < 0)
5069                         goto exit;
5070         }
5071         /* Add the unmodified original or prefix subflow. */
5072         ret = flow_create_split_inner(dev, flow, &dev_flow, attr,
5073                                       items, ext_actions ? ext_actions :
5074                                       actions, flow_split_info, error);
5075         if (ret < 0)
5076                 goto exit;
5077         MLX5_ASSERT(dev_flow);
5078         if (qrss) {
5079                 const struct rte_flow_attr q_attr = {
5080                         .group = MLX5_FLOW_MREG_ACT_TABLE_GROUP,
5081                         .ingress = 1,
5082                 };
5083                 /* Internal PMD action to set register. */
5084                 struct mlx5_rte_flow_item_tag q_tag_spec = {
5085                         .data = qrss_id,
5086                         .id = REG_NON,
5087                 };
5088                 struct rte_flow_item q_items[] = {
5089                         {
5090                                 .type = (enum rte_flow_item_type)
5091                                         MLX5_RTE_FLOW_ITEM_TYPE_TAG,
5092                                 .spec = &q_tag_spec,
5093                                 .last = NULL,
5094                                 .mask = NULL,
5095                         },
5096                         {
5097                                 .type = RTE_FLOW_ITEM_TYPE_END,
5098                         },
5099                 };
5100                 struct rte_flow_action q_actions[] = {
5101                         {
5102                                 .type = qrss->type,
5103                                 .conf = qrss->conf,
5104                         },
5105                         {
5106                                 .type = RTE_FLOW_ACTION_TYPE_END,
5107                         },
5108                 };
5109                 uint64_t layers = flow_get_prefix_layer_flags(dev_flow);
5110
5111                 /*
5112                  * Configure the tag item only if there is no meter subflow.
5113                  * Since tag is already marked in the meter suffix subflow
5114                  * we can just use the meter suffix items as is.
5115                  */
5116                 if (qrss_id) {
5117                         /* Not meter subflow. */
5118                         MLX5_ASSERT(!mtr_sfx);
5119                         /*
5120                          * Put unique id in prefix flow due to it is destroyed
5121                          * after suffix flow and id will be freed after there
5122                          * is no actual flows with this id and identifier
5123                          * reallocation becomes possible (for example, for
5124                          * other flows in other threads).
5125                          */
5126                         dev_flow->handle->split_flow_id = qrss_id;
5127                         ret = mlx5_flow_get_reg_id(dev, MLX5_COPY_MARK, 0,
5128                                                    error);
5129                         if (ret < 0)
5130                                 goto exit;
5131                         q_tag_spec.id = ret;
5132                 }
5133                 dev_flow = NULL;
5134                 /* Add suffix subflow to execute Q/RSS. */
5135                 flow_split_info->prefix_layers = layers;
5136                 flow_split_info->prefix_mark = 0;
5137                 ret = flow_create_split_inner(dev, flow, &dev_flow,
5138                                               &q_attr, mtr_sfx ? items :
5139                                               q_items, q_actions,
5140                                               flow_split_info, error);
5141                 if (ret < 0)
5142                         goto exit;
5143                 /* qrss ID should be freed if failed. */
5144                 qrss_id = 0;
5145                 MLX5_ASSERT(dev_flow);
5146         }
5147
5148 exit:
5149         /*
5150          * We do not destroy the partially created sub_flows in case of error.
5151          * These ones are included into parent flow list and will be destroyed
5152          * by flow_drv_destroy.
5153          */
5154         mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_RSS_EXPANTION_FLOW_ID],
5155                         qrss_id);
5156         mlx5_free(ext_actions);
5157         return ret;
5158 }
5159
5160 /**
5161  * The splitting for meter feature.
5162  *
5163  * - The meter flow will be split to two flows as prefix and
5164  *   suffix flow. The packets make sense only it pass the prefix
5165  *   meter action.
5166  *
5167  * - Reg_C_5 is used for the packet to match betweend prefix and
5168  *   suffix flow.
5169  *
5170  * @param dev
5171  *   Pointer to Ethernet device.
5172  * @param[in] flow
5173  *   Parent flow structure pointer.
5174  * @param[in] attr
5175  *   Flow rule attributes.
5176  * @param[in] items
5177  *   Pattern specification (list terminated by the END pattern item).
5178  * @param[in] actions
5179  *   Associated actions (list terminated by the END action).
5180  * @param[in] flow_split_info
5181  *   Pointer to flow split info structure.
5182  * @param[out] error
5183  *   Perform verbose error reporting if not NULL.
5184  * @return
5185  *   0 on success, negative value otherwise
5186  */
5187 static int
5188 flow_create_split_meter(struct rte_eth_dev *dev,
5189                         struct rte_flow *flow,
5190                         const struct rte_flow_attr *attr,
5191                         const struct rte_flow_item items[],
5192                         const struct rte_flow_action actions[],
5193                         struct mlx5_flow_split_info *flow_split_info,
5194                         struct rte_flow_error *error)
5195 {
5196         struct mlx5_priv *priv = dev->data->dev_private;
5197         struct rte_flow_action *sfx_actions = NULL;
5198         struct rte_flow_action *pre_actions = NULL;
5199         struct rte_flow_item *sfx_items = NULL;
5200         struct mlx5_flow *dev_flow = NULL;
5201         struct rte_flow_attr sfx_attr = *attr;
5202         uint32_t mtr = 0;
5203         uint32_t mtr_tag_id = 0;
5204         size_t act_size;
5205         size_t item_size;
5206         int actions_n = 0;
5207         int ret;
5208
5209         if (priv->mtr_en)
5210                 actions_n = flow_check_meter_action(actions, &mtr);
5211         if (mtr) {
5212                 /* The five prefix actions: meter, decap, encap, tag, end. */
5213                 act_size = sizeof(struct rte_flow_action) * (actions_n + 5) +
5214                            sizeof(struct mlx5_rte_flow_action_set_tag);
5215                 /* tag, vlan, port id, end. */
5216 #define METER_SUFFIX_ITEM 4
5217                 item_size = sizeof(struct rte_flow_item) * METER_SUFFIX_ITEM +
5218                             sizeof(struct mlx5_rte_flow_item_tag) * 2;
5219                 sfx_actions = mlx5_malloc(MLX5_MEM_ZERO, (act_size + item_size),
5220                                           0, SOCKET_ID_ANY);
5221                 if (!sfx_actions)
5222                         return rte_flow_error_set(error, ENOMEM,
5223                                                   RTE_FLOW_ERROR_TYPE_ACTION,
5224                                                   NULL, "no memory to split "
5225                                                   "meter flow");
5226                 sfx_items = (struct rte_flow_item *)((char *)sfx_actions +
5227                              act_size);
5228                 pre_actions = sfx_actions + actions_n;
5229                 mtr_tag_id = flow_meter_split_prep(dev, items, sfx_items,
5230                                                    actions, sfx_actions,
5231                                                    pre_actions);
5232                 if (!mtr_tag_id) {
5233                         ret = -rte_errno;
5234                         goto exit;
5235                 }
5236                 /* Add the prefix subflow. */
5237                 flow_split_info->prefix_mark = 0;
5238                 ret = flow_create_split_inner(dev, flow, &dev_flow,
5239                                               attr, items, pre_actions,
5240                                               flow_split_info, error);
5241                 if (ret) {
5242                         ret = -rte_errno;
5243                         goto exit;
5244                 }
5245                 dev_flow->handle->split_flow_id = mtr_tag_id;
5246                 /* Setting the sfx group atrr. */
5247                 sfx_attr.group = sfx_attr.transfer ?
5248                                 (MLX5_FLOW_TABLE_LEVEL_SUFFIX - 1) :
5249                                  MLX5_FLOW_TABLE_LEVEL_SUFFIX;
5250                 flow_split_info->prefix_layers =
5251                                 flow_get_prefix_layer_flags(dev_flow);
5252                 flow_split_info->prefix_mark = dev_flow->handle->mark;
5253         }
5254         /* Add the prefix subflow. */
5255         ret = flow_create_split_metadata(dev, flow,
5256                                          &sfx_attr, sfx_items ?
5257                                          sfx_items : items,
5258                                          sfx_actions ? sfx_actions : actions,
5259                                          flow_split_info, error);
5260 exit:
5261         if (sfx_actions)
5262                 mlx5_free(sfx_actions);
5263         return ret;
5264 }
5265
5266 /**
5267  * The splitting for sample feature.
5268  *
5269  * Once Sample action is detected in the action list, the flow actions should
5270  * be split into prefix sub flow and suffix sub flow.
5271  *
5272  * The original items remain in the prefix sub flow, all actions preceding the
5273  * sample action and the sample action itself will be copied to the prefix
5274  * sub flow, the actions following the sample action will be copied to the
5275  * suffix sub flow, Queue action always be located in the suffix sub flow.
5276  *
5277  * In order to make the packet from prefix sub flow matches with suffix sub
5278  * flow, an extra tag action be added into prefix sub flow, and the suffix sub
5279  * flow uses tag item with the unique flow id.
5280  *
5281  * @param dev
5282  *   Pointer to Ethernet device.
5283  * @param[in] flow
5284  *   Parent flow structure pointer.
5285  * @param[in] attr
5286  *   Flow rule attributes.
5287  * @param[in] items
5288  *   Pattern specification (list terminated by the END pattern item).
5289  * @param[in] actions
5290  *   Associated actions (list terminated by the END action).
5291  * @param[in] flow_split_info
5292  *   Pointer to flow split info structure.
5293  * @param[out] error
5294  *   Perform verbose error reporting if not NULL.
5295  * @return
5296  *   0 on success, negative value otherwise
5297  */
5298 static int
5299 flow_create_split_sample(struct rte_eth_dev *dev,
5300                          struct rte_flow *flow,
5301                          const struct rte_flow_attr *attr,
5302                          const struct rte_flow_item items[],
5303                          const struct rte_flow_action actions[],
5304                          struct mlx5_flow_split_info *flow_split_info,
5305                          struct rte_flow_error *error)
5306 {
5307         struct mlx5_priv *priv = dev->data->dev_private;
5308         struct rte_flow_action *sfx_actions = NULL;
5309         struct rte_flow_action *pre_actions = NULL;
5310         struct rte_flow_item *sfx_items = NULL;
5311         struct mlx5_flow *dev_flow = NULL;
5312         struct rte_flow_attr sfx_attr = *attr;
5313 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
5314         struct mlx5_flow_dv_sample_resource *sample_res;
5315         struct mlx5_flow_tbl_data_entry *sfx_tbl_data;
5316         struct mlx5_flow_tbl_resource *sfx_tbl;
5317 #endif
5318         size_t act_size;
5319         size_t item_size;
5320         uint32_t fdb_tx = 0;
5321         int32_t tag_id = 0;
5322         int actions_n = 0;
5323         int sample_action_pos;
5324         int qrss_action_pos;
5325         int add_tag = 0;
5326         int modify_after_mirror = 0;
5327         uint16_t jump_table = 0;
5328         const uint32_t next_ft_step = 1;
5329         int ret = 0;
5330
5331         if (priv->sampler_en)
5332                 actions_n = flow_check_match_action(actions, attr,
5333                                         RTE_FLOW_ACTION_TYPE_SAMPLE,
5334                                         &sample_action_pos, &qrss_action_pos,
5335                                         &modify_after_mirror);
5336         if (actions_n) {
5337                 /* The prefix actions must includes sample, tag, end. */
5338                 act_size = sizeof(struct rte_flow_action) * (actions_n * 2 + 1)
5339                            + sizeof(struct mlx5_rte_flow_action_set_tag);
5340                 item_size = sizeof(struct rte_flow_item) * SAMPLE_SUFFIX_ITEM +
5341                             sizeof(struct mlx5_rte_flow_item_tag) * 2;
5342                 sfx_actions = mlx5_malloc(MLX5_MEM_ZERO, (act_size +
5343                                           item_size), 0, SOCKET_ID_ANY);
5344                 if (!sfx_actions)
5345                         return rte_flow_error_set(error, ENOMEM,
5346                                                   RTE_FLOW_ERROR_TYPE_ACTION,
5347                                                   NULL, "no memory to split "
5348                                                   "sample flow");
5349                 /* The representor_id is -1 for uplink. */
5350                 fdb_tx = (attr->transfer && priv->representor_id != -1);
5351                 /*
5352                  * When reg_c_preserve is set, metadata registers Cx preserve
5353                  * their value even through packet duplication.
5354                  */
5355                 add_tag = (!fdb_tx || priv->config.hca_attr.reg_c_preserve);
5356                 if (add_tag)
5357                         sfx_items = (struct rte_flow_item *)((char *)sfx_actions
5358                                         + act_size);
5359                 if (modify_after_mirror)
5360                         jump_table = attr->group * MLX5_FLOW_TABLE_FACTOR +
5361                                      next_ft_step;
5362                 pre_actions = sfx_actions + actions_n;
5363                 tag_id = flow_sample_split_prep(dev, add_tag, sfx_items,
5364                                                 actions, sfx_actions,
5365                                                 pre_actions, actions_n,
5366                                                 sample_action_pos,
5367                                                 qrss_action_pos, jump_table,
5368                                                 error);
5369                 if (tag_id < 0 || (add_tag && !tag_id)) {
5370                         ret = -rte_errno;
5371                         goto exit;
5372                 }
5373                 if (modify_after_mirror)
5374                         flow_split_info->skip_scale =
5375                                         1 << MLX5_SCALE_JUMP_FLOW_GROUP_BIT;
5376                 /* Add the prefix subflow. */
5377                 ret = flow_create_split_inner(dev, flow, &dev_flow, attr,
5378                                               items, pre_actions,
5379                                               flow_split_info, error);
5380                 if (ret) {
5381                         ret = -rte_errno;
5382                         goto exit;
5383                 }
5384                 dev_flow->handle->split_flow_id = tag_id;
5385 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
5386                 if (!modify_after_mirror) {
5387                         /* Set the sfx group attr. */
5388                         sample_res = (struct mlx5_flow_dv_sample_resource *)
5389                                                 dev_flow->dv.sample_res;
5390                         sfx_tbl = (struct mlx5_flow_tbl_resource *)
5391                                                 sample_res->normal_path_tbl;
5392                         sfx_tbl_data = container_of(sfx_tbl,
5393                                                 struct mlx5_flow_tbl_data_entry,
5394                                                 tbl);
5395                         sfx_attr.group = sfx_attr.transfer ?
5396                                                 (sfx_tbl_data->table_id - 1) :
5397                                                 sfx_tbl_data->table_id;
5398                 } else {
5399                         MLX5_ASSERT(attr->transfer);
5400                         sfx_attr.group = jump_table;
5401                 }
5402                 flow_split_info->prefix_layers =
5403                                 flow_get_prefix_layer_flags(dev_flow);
5404                 flow_split_info->prefix_mark = dev_flow->handle->mark;
5405                 /* Suffix group level already be scaled with factor, set
5406                  * MLX5_SCALE_FLOW_GROUP_BIT of skip_scale to 1 to avoid scale
5407                  * again in translation.
5408                  */
5409                 flow_split_info->skip_scale = 1 << MLX5_SCALE_FLOW_GROUP_BIT;
5410 #endif
5411         }
5412         /* Add the suffix subflow. */
5413         ret = flow_create_split_meter(dev, flow, &sfx_attr,
5414                                       sfx_items ? sfx_items : items,
5415                                       sfx_actions ? sfx_actions : actions,
5416                                       flow_split_info, error);
5417 exit:
5418         if (sfx_actions)
5419                 mlx5_free(sfx_actions);
5420         return ret;
5421 }
5422
5423 /**
5424  * Split the flow to subflow set. The splitters might be linked
5425  * in the chain, like this:
5426  * flow_create_split_outer() calls:
5427  *   flow_create_split_meter() calls:
5428  *     flow_create_split_metadata(meter_subflow_0) calls:
5429  *       flow_create_split_inner(metadata_subflow_0)
5430  *       flow_create_split_inner(metadata_subflow_1)
5431  *       flow_create_split_inner(metadata_subflow_2)
5432  *     flow_create_split_metadata(meter_subflow_1) calls:
5433  *       flow_create_split_inner(metadata_subflow_0)
5434  *       flow_create_split_inner(metadata_subflow_1)
5435  *       flow_create_split_inner(metadata_subflow_2)
5436  *
5437  * This provide flexible way to add new levels of flow splitting.
5438  * The all of successfully created subflows are included to the
5439  * parent flow dev_flow list.
5440  *
5441  * @param dev
5442  *   Pointer to Ethernet device.
5443  * @param[in] flow
5444  *   Parent flow structure pointer.
5445  * @param[in] attr
5446  *   Flow rule attributes.
5447  * @param[in] items
5448  *   Pattern specification (list terminated by the END pattern item).
5449  * @param[in] actions
5450  *   Associated actions (list terminated by the END action).
5451  * @param[in] flow_split_info
5452  *   Pointer to flow split info structure.
5453  * @param[out] error
5454  *   Perform verbose error reporting if not NULL.
5455  * @return
5456  *   0 on success, negative value otherwise
5457  */
5458 static int
5459 flow_create_split_outer(struct rte_eth_dev *dev,
5460                         struct rte_flow *flow,
5461                         const struct rte_flow_attr *attr,
5462                         const struct rte_flow_item items[],
5463                         const struct rte_flow_action actions[],
5464                         struct mlx5_flow_split_info *flow_split_info,
5465                         struct rte_flow_error *error)
5466 {
5467         int ret;
5468
5469         ret = flow_create_split_sample(dev, flow, attr, items,
5470                                        actions, flow_split_info, error);
5471         MLX5_ASSERT(ret <= 0);
5472         return ret;
5473 }
5474
5475 static struct mlx5_flow_tunnel *
5476 flow_tunnel_from_rule(struct rte_eth_dev *dev,
5477                       const struct rte_flow_attr *attr,
5478                       const struct rte_flow_item items[],
5479                       const struct rte_flow_action actions[])
5480 {
5481         struct mlx5_flow_tunnel *tunnel;
5482
5483 #pragma GCC diagnostic push
5484 #pragma GCC diagnostic ignored "-Wcast-qual"
5485         if (is_flow_tunnel_match_rule(dev, attr, items, actions))
5486                 tunnel = (struct mlx5_flow_tunnel *)items[0].spec;
5487         else if (is_flow_tunnel_steer_rule(dev, attr, items, actions))
5488                 tunnel = (struct mlx5_flow_tunnel *)actions[0].conf;
5489         else
5490                 tunnel = NULL;
5491 #pragma GCC diagnostic pop
5492
5493         return tunnel;
5494 }
5495
5496 /**
5497  * Adjust flow RSS workspace if needed.
5498  *
5499  * @param wks
5500  *   Pointer to thread flow work space.
5501  * @param rss_desc
5502  *   Pointer to RSS descriptor.
5503  * @param[in] nrssq_num
5504  *   New RSS queue number.
5505  *
5506  * @return
5507  *   0 on success, -1 otherwise and rte_errno is set.
5508  */
5509 static int
5510 flow_rss_workspace_adjust(struct mlx5_flow_workspace *wks,
5511                           struct mlx5_flow_rss_desc *rss_desc,
5512                           uint32_t nrssq_num)
5513 {
5514         if (likely(nrssq_num <= wks->rssq_num))
5515                 return 0;
5516         rss_desc->queue = realloc(rss_desc->queue,
5517                           sizeof(*rss_desc->queue) * RTE_ALIGN(nrssq_num, 2));
5518         if (!rss_desc->queue) {
5519                 rte_errno = ENOMEM;
5520                 return -1;
5521         }
5522         wks->rssq_num = RTE_ALIGN(nrssq_num, 2);
5523         return 0;
5524 }
5525
5526 /**
5527  * Create a flow and add it to @p list.
5528  *
5529  * @param dev
5530  *   Pointer to Ethernet device.
5531  * @param list
5532  *   Pointer to a TAILQ flow list. If this parameter NULL,
5533  *   no list insertion occurred, flow is just created,
5534  *   this is caller's responsibility to track the
5535  *   created flow.
5536  * @param[in] attr
5537  *   Flow rule attributes.
5538  * @param[in] items
5539  *   Pattern specification (list terminated by the END pattern item).
5540  * @param[in] actions
5541  *   Associated actions (list terminated by the END action).
5542  * @param[in] external
5543  *   This flow rule is created by request external to PMD.
5544  * @param[out] error
5545  *   Perform verbose error reporting if not NULL.
5546  *
5547  * @return
5548  *   A flow index on success, 0 otherwise and rte_errno is set.
5549  */
5550 static uint32_t
5551 flow_list_create(struct rte_eth_dev *dev, uint32_t *list,
5552                  const struct rte_flow_attr *attr,
5553                  const struct rte_flow_item items[],
5554                  const struct rte_flow_action original_actions[],
5555                  bool external, struct rte_flow_error *error)
5556 {
5557         struct mlx5_priv *priv = dev->data->dev_private;
5558         struct rte_flow *flow = NULL;
5559         struct mlx5_flow *dev_flow;
5560         const struct rte_flow_action_rss *rss = NULL;
5561         struct mlx5_translated_shared_action
5562                 shared_actions[MLX5_MAX_SHARED_ACTIONS];
5563         int shared_actions_n = MLX5_MAX_SHARED_ACTIONS;
5564         union {
5565                 struct mlx5_flow_expand_rss buf;
5566                 uint8_t buffer[2048];
5567         } expand_buffer;
5568         union {
5569                 struct rte_flow_action actions[MLX5_MAX_SPLIT_ACTIONS];
5570                 uint8_t buffer[2048];
5571         } actions_rx;
5572         union {
5573                 struct rte_flow_action actions[MLX5_MAX_SPLIT_ACTIONS];
5574                 uint8_t buffer[2048];
5575         } actions_hairpin_tx;
5576         union {
5577                 struct rte_flow_item items[MLX5_MAX_SPLIT_ITEMS];
5578                 uint8_t buffer[2048];
5579         } items_tx;
5580         struct mlx5_flow_expand_rss *buf = &expand_buffer.buf;
5581         struct mlx5_flow_rss_desc *rss_desc;
5582         const struct rte_flow_action *p_actions_rx;
5583         uint32_t i;
5584         uint32_t idx = 0;
5585         int hairpin_flow;
5586         struct rte_flow_attr attr_tx = { .priority = 0 };
5587         const struct rte_flow_action *actions;
5588         struct rte_flow_action *translated_actions = NULL;
5589         struct mlx5_flow_tunnel *tunnel;
5590         struct tunnel_default_miss_ctx default_miss_ctx = { 0, };
5591         struct mlx5_flow_workspace *wks = mlx5_flow_push_thread_workspace();
5592         struct mlx5_flow_split_info flow_split_info = {
5593                 .external = !!external,
5594                 .skip_scale = 0,
5595                 .flow_idx = 0,
5596                 .prefix_mark = 0,
5597                 .prefix_layers = 0
5598         };
5599         int ret;
5600
5601         MLX5_ASSERT(wks);
5602         rss_desc = &wks->rss_desc;
5603         ret = flow_shared_actions_translate(dev, original_actions,
5604                                             shared_actions,
5605                                             &shared_actions_n,
5606                                             &translated_actions, error);
5607         if (ret < 0) {
5608                 MLX5_ASSERT(translated_actions == NULL);
5609                 return 0;
5610         }
5611         actions = translated_actions ? translated_actions : original_actions;
5612         p_actions_rx = actions;
5613         hairpin_flow = flow_check_hairpin_split(dev, attr, actions);
5614         ret = flow_drv_validate(dev, attr, items, p_actions_rx,
5615                                 external, hairpin_flow, error);
5616         if (ret < 0)
5617                 goto error_before_hairpin_split;
5618         flow = mlx5_ipool_zmalloc(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], &idx);
5619         if (!flow) {
5620                 rte_errno = ENOMEM;
5621                 goto error_before_hairpin_split;
5622         }
5623         if (hairpin_flow > 0) {
5624                 if (hairpin_flow > MLX5_MAX_SPLIT_ACTIONS) {
5625                         rte_errno = EINVAL;
5626                         goto error_before_hairpin_split;
5627                 }
5628                 flow_hairpin_split(dev, actions, actions_rx.actions,
5629                                    actions_hairpin_tx.actions, items_tx.items,
5630                                    idx);
5631                 p_actions_rx = actions_rx.actions;
5632         }
5633         flow_split_info.flow_idx = idx;
5634         flow->drv_type = flow_get_drv_type(dev, attr);
5635         MLX5_ASSERT(flow->drv_type > MLX5_FLOW_TYPE_MIN &&
5636                     flow->drv_type < MLX5_FLOW_TYPE_MAX);
5637         memset(rss_desc, 0, offsetof(struct mlx5_flow_rss_desc, queue));
5638         /* RSS Action only works on NIC RX domain */
5639         if (attr->ingress && !attr->transfer)
5640                 rss = flow_get_rss_action(p_actions_rx);
5641         if (rss) {
5642                 if (flow_rss_workspace_adjust(wks, rss_desc, rss->queue_num))
5643                         return 0;
5644                 /*
5645                  * The following information is required by
5646                  * mlx5_flow_hashfields_adjust() in advance.
5647                  */
5648                 rss_desc->level = rss->level;
5649                 /* RSS type 0 indicates default RSS type (ETH_RSS_IP). */
5650                 rss_desc->types = !rss->types ? ETH_RSS_IP : rss->types;
5651         }
5652         flow->dev_handles = 0;
5653         if (rss && rss->types) {
5654                 unsigned int graph_root;
5655
5656                 graph_root = find_graph_root(items, rss->level);
5657                 ret = mlx5_flow_expand_rss(buf, sizeof(expand_buffer.buffer),
5658                                            items, rss->types,
5659                                            mlx5_support_expansion, graph_root);
5660                 MLX5_ASSERT(ret > 0 &&
5661                        (unsigned int)ret < sizeof(expand_buffer.buffer));
5662         } else {
5663                 buf->entries = 1;
5664                 buf->entry[0].pattern = (void *)(uintptr_t)items;
5665         }
5666         rss_desc->shared_rss = flow_get_shared_rss_action(dev, shared_actions,
5667                                                       shared_actions_n);
5668         for (i = 0; i < buf->entries; ++i) {
5669                 /* Initialize flow split data. */
5670                 flow_split_info.prefix_layers = 0;
5671                 flow_split_info.prefix_mark = 0;
5672                 flow_split_info.skip_scale = 0;
5673                 /*
5674                  * The splitter may create multiple dev_flows,
5675                  * depending on configuration. In the simplest
5676                  * case it just creates unmodified original flow.
5677                  */
5678                 ret = flow_create_split_outer(dev, flow, attr,
5679                                               buf->entry[i].pattern,
5680                                               p_actions_rx, &flow_split_info,
5681                                               error);
5682                 if (ret < 0)
5683                         goto error;
5684                 if (is_flow_tunnel_steer_rule(dev, attr,
5685                                               buf->entry[i].pattern,
5686                                               p_actions_rx)) {
5687                         ret = flow_tunnel_add_default_miss(dev, flow, attr,
5688                                                            p_actions_rx,
5689                                                            idx,
5690                                                            &default_miss_ctx,
5691                                                            error);
5692                         if (ret < 0) {
5693                                 mlx5_free(default_miss_ctx.queue);
5694                                 goto error;
5695                         }
5696                 }
5697         }
5698         /* Create the tx flow. */
5699         if (hairpin_flow) {
5700                 attr_tx.group = MLX5_HAIRPIN_TX_TABLE;
5701                 attr_tx.ingress = 0;
5702                 attr_tx.egress = 1;
5703                 dev_flow = flow_drv_prepare(dev, flow, &attr_tx, items_tx.items,
5704                                          actions_hairpin_tx.actions,
5705                                          idx, error);
5706                 if (!dev_flow)
5707                         goto error;
5708                 dev_flow->flow = flow;
5709                 dev_flow->external = 0;
5710                 SILIST_INSERT(&flow->dev_handles, dev_flow->handle_idx,
5711                               dev_flow->handle, next);
5712                 ret = flow_drv_translate(dev, dev_flow, &attr_tx,
5713                                          items_tx.items,
5714                                          actions_hairpin_tx.actions, error);
5715                 if (ret < 0)
5716                         goto error;
5717         }
5718         /*
5719          * Update the metadata register copy table. If extensive
5720          * metadata feature is enabled and registers are supported
5721          * we might create the extra rte_flow for each unique
5722          * MARK/FLAG action ID.
5723          *
5724          * The table is updated for ingress Flows only, because
5725          * the egress Flows belong to the different device and
5726          * copy table should be updated in peer NIC Rx domain.
5727          */
5728         if (attr->ingress &&
5729             (external || attr->group != MLX5_FLOW_MREG_CP_TABLE_GROUP)) {
5730                 ret = flow_mreg_update_copy_table(dev, flow, actions, error);
5731                 if (ret)
5732                         goto error;
5733         }
5734         /*
5735          * If the flow is external (from application) OR device is started,
5736          * OR mreg discover, then apply immediately.
5737          */
5738         if (external || dev->data->dev_started ||
5739             (attr->group == MLX5_FLOW_MREG_CP_TABLE_GROUP &&
5740              attr->priority == MLX5_FLOW_LOWEST_PRIO_INDICATOR)) {
5741                 ret = flow_drv_apply(dev, flow, error);
5742                 if (ret < 0)
5743                         goto error;
5744         }
5745         if (list) {
5746                 rte_spinlock_lock(&priv->flow_list_lock);
5747                 ILIST_INSERT(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], list, idx,
5748                              flow, next);
5749                 rte_spinlock_unlock(&priv->flow_list_lock);
5750         }
5751         flow_rxq_flags_set(dev, flow);
5752         rte_free(translated_actions);
5753         tunnel = flow_tunnel_from_rule(dev, attr, items, actions);
5754         if (tunnel) {
5755                 flow->tunnel = 1;
5756                 flow->tunnel_id = tunnel->tunnel_id;
5757                 __atomic_add_fetch(&tunnel->refctn, 1, __ATOMIC_RELAXED);
5758                 mlx5_free(default_miss_ctx.queue);
5759         }
5760         mlx5_flow_pop_thread_workspace();
5761         return idx;
5762 error:
5763         MLX5_ASSERT(flow);
5764         ret = rte_errno; /* Save rte_errno before cleanup. */
5765         flow_mreg_del_copy_action(dev, flow);
5766         flow_drv_destroy(dev, flow);
5767         if (rss_desc->shared_rss)
5768                 __atomic_sub_fetch(&((struct mlx5_shared_action_rss *)
5769                         mlx5_ipool_get
5770                         (priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS],
5771                         rss_desc->shared_rss))->refcnt, 1, __ATOMIC_RELAXED);
5772         mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], idx);
5773         rte_errno = ret; /* Restore rte_errno. */
5774         ret = rte_errno;
5775         rte_errno = ret;
5776         mlx5_flow_pop_thread_workspace();
5777 error_before_hairpin_split:
5778         rte_free(translated_actions);
5779         return 0;
5780 }
5781
5782 /**
5783  * Create a dedicated flow rule on e-switch table 0 (root table), to direct all
5784  * incoming packets to table 1.
5785  *
5786  * Other flow rules, requested for group n, will be created in
5787  * e-switch table n+1.
5788  * Jump action to e-switch group n will be created to group n+1.
5789  *
5790  * Used when working in switchdev mode, to utilise advantages of table 1
5791  * and above.
5792  *
5793  * @param dev
5794  *   Pointer to Ethernet device.
5795  *
5796  * @return
5797  *   Pointer to flow on success, NULL otherwise and rte_errno is set.
5798  */
5799 struct rte_flow *
5800 mlx5_flow_create_esw_table_zero_flow(struct rte_eth_dev *dev)
5801 {
5802         const struct rte_flow_attr attr = {
5803                 .group = 0,
5804                 .priority = 0,
5805                 .ingress = 1,
5806                 .egress = 0,
5807                 .transfer = 1,
5808         };
5809         const struct rte_flow_item pattern = {
5810                 .type = RTE_FLOW_ITEM_TYPE_END,
5811         };
5812         struct rte_flow_action_jump jump = {
5813                 .group = 1,
5814         };
5815         const struct rte_flow_action actions[] = {
5816                 {
5817                         .type = RTE_FLOW_ACTION_TYPE_JUMP,
5818                         .conf = &jump,
5819                 },
5820                 {
5821                         .type = RTE_FLOW_ACTION_TYPE_END,
5822                 },
5823         };
5824         struct mlx5_priv *priv = dev->data->dev_private;
5825         struct rte_flow_error error;
5826
5827         return (void *)(uintptr_t)flow_list_create(dev, &priv->ctrl_flows,
5828                                                    &attr, &pattern,
5829                                                    actions, false, &error);
5830 }
5831
5832 /**
5833  * Validate a flow supported by the NIC.
5834  *
5835  * @see rte_flow_validate()
5836  * @see rte_flow_ops
5837  */
5838 int
5839 mlx5_flow_validate(struct rte_eth_dev *dev,
5840                    const struct rte_flow_attr *attr,
5841                    const struct rte_flow_item items[],
5842                    const struct rte_flow_action original_actions[],
5843                    struct rte_flow_error *error)
5844 {
5845         int hairpin_flow;
5846         struct mlx5_translated_shared_action
5847                 shared_actions[MLX5_MAX_SHARED_ACTIONS];
5848         int shared_actions_n = MLX5_MAX_SHARED_ACTIONS;
5849         const struct rte_flow_action *actions;
5850         struct rte_flow_action *translated_actions = NULL;
5851         int ret = flow_shared_actions_translate(dev, original_actions,
5852                                                 shared_actions,
5853                                                 &shared_actions_n,
5854                                                 &translated_actions, error);
5855
5856         if (ret)
5857                 return ret;
5858         actions = translated_actions ? translated_actions : original_actions;
5859         hairpin_flow = flow_check_hairpin_split(dev, attr, actions);
5860         ret = flow_drv_validate(dev, attr, items, actions,
5861                                 true, hairpin_flow, error);
5862         rte_free(translated_actions);
5863         return ret;
5864 }
5865
5866 /**
5867  * Create a flow.
5868  *
5869  * @see rte_flow_create()
5870  * @see rte_flow_ops
5871  */
5872 struct rte_flow *
5873 mlx5_flow_create(struct rte_eth_dev *dev,
5874                  const struct rte_flow_attr *attr,
5875                  const struct rte_flow_item items[],
5876                  const struct rte_flow_action actions[],
5877                  struct rte_flow_error *error)
5878 {
5879         struct mlx5_priv *priv = dev->data->dev_private;
5880
5881         /*
5882          * If the device is not started yet, it is not allowed to created a
5883          * flow from application. PMD default flows and traffic control flows
5884          * are not affected.
5885          */
5886         if (unlikely(!dev->data->dev_started)) {
5887                 DRV_LOG(DEBUG, "port %u is not started when "
5888                         "inserting a flow", dev->data->port_id);
5889                 rte_flow_error_set(error, ENODEV,
5890                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
5891                                    NULL,
5892                                    "port not started");
5893                 return NULL;
5894         }
5895
5896         return (void *)(uintptr_t)flow_list_create(dev, &priv->flows,
5897                                   attr, items, actions, true, error);
5898 }
5899
5900 /**
5901  * Destroy a flow in a list.
5902  *
5903  * @param dev
5904  *   Pointer to Ethernet device.
5905  * @param list
5906  *   Pointer to the Indexed flow list. If this parameter NULL,
5907  *   there is no flow removal from the list. Be noted that as
5908  *   flow is add to the indexed list, memory of the indexed
5909  *   list points to maybe changed as flow destroyed.
5910  * @param[in] flow_idx
5911  *   Index of flow to destroy.
5912  */
5913 static void
5914 flow_list_destroy(struct rte_eth_dev *dev, uint32_t *list,
5915                   uint32_t flow_idx)
5916 {
5917         struct mlx5_priv *priv = dev->data->dev_private;
5918         struct rte_flow *flow = mlx5_ipool_get(priv->sh->ipool
5919                                                [MLX5_IPOOL_RTE_FLOW], flow_idx);
5920
5921         if (!flow)
5922                 return;
5923         /*
5924          * Update RX queue flags only if port is started, otherwise it is
5925          * already clean.
5926          */
5927         if (dev->data->dev_started)
5928                 flow_rxq_flags_trim(dev, flow);
5929         flow_drv_destroy(dev, flow);
5930         if (list) {
5931                 rte_spinlock_lock(&priv->flow_list_lock);
5932                 ILIST_REMOVE(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], list,
5933                              flow_idx, flow, next);
5934                 rte_spinlock_unlock(&priv->flow_list_lock);
5935         }
5936         if (flow->tunnel) {
5937                 struct mlx5_flow_tunnel *tunnel;
5938
5939                 tunnel = mlx5_find_tunnel_id(dev, flow->tunnel_id);
5940                 RTE_VERIFY(tunnel);
5941                 if (!__atomic_sub_fetch(&tunnel->refctn, 1, __ATOMIC_RELAXED))
5942                         mlx5_flow_tunnel_free(dev, tunnel);
5943         }
5944         flow_mreg_del_copy_action(dev, flow);
5945         mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], flow_idx);
5946 }
5947
5948 /**
5949  * Destroy all flows.
5950  *
5951  * @param dev
5952  *   Pointer to Ethernet device.
5953  * @param list
5954  *   Pointer to the Indexed flow list.
5955  * @param active
5956  *   If flushing is called avtively.
5957  */
5958 void
5959 mlx5_flow_list_flush(struct rte_eth_dev *dev, uint32_t *list, bool active)
5960 {
5961         uint32_t num_flushed = 0;
5962
5963         while (*list) {
5964                 flow_list_destroy(dev, list, *list);
5965                 num_flushed++;
5966         }
5967         if (active) {
5968                 DRV_LOG(INFO, "port %u: %u flows flushed before stopping",
5969                         dev->data->port_id, num_flushed);
5970         }
5971 }
5972
5973 /**
5974  * Stop all default actions for flows.
5975  *
5976  * @param dev
5977  *   Pointer to Ethernet device.
5978  */
5979 void
5980 mlx5_flow_stop_default(struct rte_eth_dev *dev)
5981 {
5982         flow_mreg_del_default_copy_action(dev);
5983         flow_rxq_flags_clear(dev);
5984 }
5985
5986 /**
5987  * Start all default actions for flows.
5988  *
5989  * @param dev
5990  *   Pointer to Ethernet device.
5991  * @return
5992  *   0 on success, a negative errno value otherwise and rte_errno is set.
5993  */
5994 int
5995 mlx5_flow_start_default(struct rte_eth_dev *dev)
5996 {
5997         struct rte_flow_error error;
5998
5999         /* Make sure default copy action (reg_c[0] -> reg_b) is created. */
6000         return flow_mreg_add_default_copy_action(dev, &error);
6001 }
6002
6003 /**
6004  * Release key of thread specific flow workspace data.
6005  */
6006 void
6007 flow_release_workspace(void *data)
6008 {
6009         struct mlx5_flow_workspace *wks = data;
6010         struct mlx5_flow_workspace *next;
6011
6012         while (wks) {
6013                 next = wks->next;
6014                 free(wks->rss_desc.queue);
6015                 free(wks);
6016                 wks = next;
6017         }
6018 }
6019
6020 /**
6021  * Get thread specific current flow workspace.
6022  *
6023  * @return pointer to thread specific flow workspace data, NULL on error.
6024  */
6025 struct mlx5_flow_workspace*
6026 mlx5_flow_get_thread_workspace(void)
6027 {
6028         struct mlx5_flow_workspace *data;
6029
6030         data = mlx5_flow_os_get_specific_workspace();
6031         MLX5_ASSERT(data && data->inuse);
6032         if (!data || !data->inuse)
6033                 DRV_LOG(ERR, "flow workspace not initialized.");
6034         return data;
6035 }
6036
6037 /**
6038  * Allocate and init new flow workspace.
6039  *
6040  * @return pointer to flow workspace data, NULL on error.
6041  */
6042 static struct mlx5_flow_workspace*
6043 flow_alloc_thread_workspace(void)
6044 {
6045         struct mlx5_flow_workspace *data = calloc(1, sizeof(*data));
6046
6047         if (!data) {
6048                 DRV_LOG(ERR, "Failed to allocate flow workspace "
6049                         "memory.");
6050                 return NULL;
6051         }
6052         data->rss_desc.queue = calloc(1,
6053                         sizeof(uint16_t) * MLX5_RSSQ_DEFAULT_NUM);
6054         if (!data->rss_desc.queue)
6055                 goto err;
6056         data->rssq_num = MLX5_RSSQ_DEFAULT_NUM;
6057         return data;
6058 err:
6059         if (data->rss_desc.queue)
6060                 free(data->rss_desc.queue);
6061         free(data);
6062         return NULL;
6063 }
6064
6065 /**
6066  * Get new thread specific flow workspace.
6067  *
6068  * If current workspace inuse, create new one and set as current.
6069  *
6070  * @return pointer to thread specific flow workspace data, NULL on error.
6071  */
6072 static struct mlx5_flow_workspace*
6073 mlx5_flow_push_thread_workspace(void)
6074 {
6075         struct mlx5_flow_workspace *curr;
6076         struct mlx5_flow_workspace *data;
6077
6078         curr = mlx5_flow_os_get_specific_workspace();
6079         if (!curr) {
6080                 data = flow_alloc_thread_workspace();
6081                 if (!data)
6082                         return NULL;
6083         } else if (!curr->inuse) {
6084                 data = curr;
6085         } else if (curr->next) {
6086                 data = curr->next;
6087         } else {
6088                 data = flow_alloc_thread_workspace();
6089                 if (!data)
6090                         return NULL;
6091                 curr->next = data;
6092                 data->prev = curr;
6093         }
6094         data->inuse = 1;
6095         data->flow_idx = 0;
6096         /* Set as current workspace */
6097         if (mlx5_flow_os_set_specific_workspace(data))
6098                 DRV_LOG(ERR, "Failed to set flow workspace to thread.");
6099         return data;
6100 }
6101
6102 /**
6103  * Close current thread specific flow workspace.
6104  *
6105  * If previous workspace available, set it as current.
6106  *
6107  * @return pointer to thread specific flow workspace data, NULL on error.
6108  */
6109 static void
6110 mlx5_flow_pop_thread_workspace(void)
6111 {
6112         struct mlx5_flow_workspace *data = mlx5_flow_get_thread_workspace();
6113
6114         if (!data)
6115                 return;
6116         if (!data->inuse) {
6117                 DRV_LOG(ERR, "Failed to close unused flow workspace.");
6118                 return;
6119         }
6120         data->inuse = 0;
6121         if (!data->prev)
6122                 return;
6123         if (mlx5_flow_os_set_specific_workspace(data->prev))
6124                 DRV_LOG(ERR, "Failed to set flow workspace to thread.");
6125 }
6126
6127 /**
6128  * Verify the flow list is empty
6129  *
6130  * @param dev
6131  *  Pointer to Ethernet device.
6132  *
6133  * @return the number of flows not released.
6134  */
6135 int
6136 mlx5_flow_verify(struct rte_eth_dev *dev)
6137 {
6138         struct mlx5_priv *priv = dev->data->dev_private;
6139         struct rte_flow *flow;
6140         uint32_t idx;
6141         int ret = 0;
6142
6143         ILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], priv->flows, idx,
6144                       flow, next) {
6145                 DRV_LOG(DEBUG, "port %u flow %p still referenced",
6146                         dev->data->port_id, (void *)flow);
6147                 ++ret;
6148         }
6149         return ret;
6150 }
6151
6152 /**
6153  * Enable default hairpin egress flow.
6154  *
6155  * @param dev
6156  *   Pointer to Ethernet device.
6157  * @param queue
6158  *   The queue index.
6159  *
6160  * @return
6161  *   0 on success, a negative errno value otherwise and rte_errno is set.
6162  */
6163 int
6164 mlx5_ctrl_flow_source_queue(struct rte_eth_dev *dev,
6165                             uint32_t queue)
6166 {
6167         struct mlx5_priv *priv = dev->data->dev_private;
6168         const struct rte_flow_attr attr = {
6169                 .egress = 1,
6170                 .priority = 0,
6171         };
6172         struct mlx5_rte_flow_item_tx_queue queue_spec = {
6173                 .queue = queue,
6174         };
6175         struct mlx5_rte_flow_item_tx_queue queue_mask = {
6176                 .queue = UINT32_MAX,
6177         };
6178         struct rte_flow_item items[] = {
6179                 {
6180                         .type = (enum rte_flow_item_type)
6181                                 MLX5_RTE_FLOW_ITEM_TYPE_TX_QUEUE,
6182                         .spec = &queue_spec,
6183                         .last = NULL,
6184                         .mask = &queue_mask,
6185                 },
6186                 {
6187                         .type = RTE_FLOW_ITEM_TYPE_END,
6188                 },
6189         };
6190         struct rte_flow_action_jump jump = {
6191                 .group = MLX5_HAIRPIN_TX_TABLE,
6192         };
6193         struct rte_flow_action actions[2];
6194         uint32_t flow_idx;
6195         struct rte_flow_error error;
6196
6197         actions[0].type = RTE_FLOW_ACTION_TYPE_JUMP;
6198         actions[0].conf = &jump;
6199         actions[1].type = RTE_FLOW_ACTION_TYPE_END;
6200         flow_idx = flow_list_create(dev, &priv->ctrl_flows,
6201                                 &attr, items, actions, false, &error);
6202         if (!flow_idx) {
6203                 DRV_LOG(DEBUG,
6204                         "Failed to create ctrl flow: rte_errno(%d),"
6205                         " type(%d), message(%s)",
6206                         rte_errno, error.type,
6207                         error.message ? error.message : " (no stated reason)");
6208                 return -rte_errno;
6209         }
6210         return 0;
6211 }
6212
6213 /**
6214  * Enable a control flow configured from the control plane.
6215  *
6216  * @param dev
6217  *   Pointer to Ethernet device.
6218  * @param eth_spec
6219  *   An Ethernet flow spec to apply.
6220  * @param eth_mask
6221  *   An Ethernet flow mask to apply.
6222  * @param vlan_spec
6223  *   A VLAN flow spec to apply.
6224  * @param vlan_mask
6225  *   A VLAN flow mask to apply.
6226  *
6227  * @return
6228  *   0 on success, a negative errno value otherwise and rte_errno is set.
6229  */
6230 int
6231 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
6232                     struct rte_flow_item_eth *eth_spec,
6233                     struct rte_flow_item_eth *eth_mask,
6234                     struct rte_flow_item_vlan *vlan_spec,
6235                     struct rte_flow_item_vlan *vlan_mask)
6236 {
6237         struct mlx5_priv *priv = dev->data->dev_private;
6238         const struct rte_flow_attr attr = {
6239                 .ingress = 1,
6240                 .priority = MLX5_FLOW_LOWEST_PRIO_INDICATOR,
6241         };
6242         struct rte_flow_item items[] = {
6243                 {
6244                         .type = RTE_FLOW_ITEM_TYPE_ETH,
6245                         .spec = eth_spec,
6246                         .last = NULL,
6247                         .mask = eth_mask,
6248                 },
6249                 {
6250                         .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
6251                                               RTE_FLOW_ITEM_TYPE_END,
6252                         .spec = vlan_spec,
6253                         .last = NULL,
6254                         .mask = vlan_mask,
6255                 },
6256                 {
6257                         .type = RTE_FLOW_ITEM_TYPE_END,
6258                 },
6259         };
6260         uint16_t queue[priv->reta_idx_n];
6261         struct rte_flow_action_rss action_rss = {
6262                 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
6263                 .level = 0,
6264                 .types = priv->rss_conf.rss_hf,
6265                 .key_len = priv->rss_conf.rss_key_len,
6266                 .queue_num = priv->reta_idx_n,
6267                 .key = priv->rss_conf.rss_key,
6268                 .queue = queue,
6269         };
6270         struct rte_flow_action actions[] = {
6271                 {
6272                         .type = RTE_FLOW_ACTION_TYPE_RSS,
6273                         .conf = &action_rss,
6274                 },
6275                 {
6276                         .type = RTE_FLOW_ACTION_TYPE_END,
6277                 },
6278         };
6279         uint32_t flow_idx;
6280         struct rte_flow_error error;
6281         unsigned int i;
6282
6283         if (!priv->reta_idx_n || !priv->rxqs_n) {
6284                 return 0;
6285         }
6286         if (!(dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG))
6287                 action_rss.types = 0;
6288         for (i = 0; i != priv->reta_idx_n; ++i)
6289                 queue[i] = (*priv->reta_idx)[i];
6290         flow_idx = flow_list_create(dev, &priv->ctrl_flows,
6291                                 &attr, items, actions, false, &error);
6292         if (!flow_idx)
6293                 return -rte_errno;
6294         return 0;
6295 }
6296
6297 /**
6298  * Enable a flow control configured from the control plane.
6299  *
6300  * @param dev
6301  *   Pointer to Ethernet device.
6302  * @param eth_spec
6303  *   An Ethernet flow spec to apply.
6304  * @param eth_mask
6305  *   An Ethernet flow mask to apply.
6306  *
6307  * @return
6308  *   0 on success, a negative errno value otherwise and rte_errno is set.
6309  */
6310 int
6311 mlx5_ctrl_flow(struct rte_eth_dev *dev,
6312                struct rte_flow_item_eth *eth_spec,
6313                struct rte_flow_item_eth *eth_mask)
6314 {
6315         return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
6316 }
6317
6318 /**
6319  * Create default miss flow rule matching lacp traffic
6320  *
6321  * @param dev
6322  *   Pointer to Ethernet device.
6323  * @param eth_spec
6324  *   An Ethernet flow spec to apply.
6325  *
6326  * @return
6327  *   0 on success, a negative errno value otherwise and rte_errno is set.
6328  */
6329 int
6330 mlx5_flow_lacp_miss(struct rte_eth_dev *dev)
6331 {
6332         struct mlx5_priv *priv = dev->data->dev_private;
6333         /*
6334          * The LACP matching is done by only using ether type since using
6335          * a multicast dst mac causes kernel to give low priority to this flow.
6336          */
6337         static const struct rte_flow_item_eth lacp_spec = {
6338                 .type = RTE_BE16(0x8809),
6339         };
6340         static const struct rte_flow_item_eth lacp_mask = {
6341                 .type = 0xffff,
6342         };
6343         const struct rte_flow_attr attr = {
6344                 .ingress = 1,
6345         };
6346         struct rte_flow_item items[] = {
6347                 {
6348                         .type = RTE_FLOW_ITEM_TYPE_ETH,
6349                         .spec = &lacp_spec,
6350                         .mask = &lacp_mask,
6351                 },
6352                 {
6353                         .type = RTE_FLOW_ITEM_TYPE_END,
6354                 },
6355         };
6356         struct rte_flow_action actions[] = {
6357                 {
6358                         .type = (enum rte_flow_action_type)
6359                                 MLX5_RTE_FLOW_ACTION_TYPE_DEFAULT_MISS,
6360                 },
6361                 {
6362                         .type = RTE_FLOW_ACTION_TYPE_END,
6363                 },
6364         };
6365         struct rte_flow_error error;
6366         uint32_t flow_idx = flow_list_create(dev, &priv->ctrl_flows,
6367                                 &attr, items, actions, false, &error);
6368
6369         if (!flow_idx)
6370                 return -rte_errno;
6371         return 0;
6372 }
6373
6374 /**
6375  * Destroy a flow.
6376  *
6377  * @see rte_flow_destroy()
6378  * @see rte_flow_ops
6379  */
6380 int
6381 mlx5_flow_destroy(struct rte_eth_dev *dev,
6382                   struct rte_flow *flow,
6383                   struct rte_flow_error *error __rte_unused)
6384 {
6385         struct mlx5_priv *priv = dev->data->dev_private;
6386
6387         flow_list_destroy(dev, &priv->flows, (uintptr_t)(void *)flow);
6388         return 0;
6389 }
6390
6391 /**
6392  * Destroy all flows.
6393  *
6394  * @see rte_flow_flush()
6395  * @see rte_flow_ops
6396  */
6397 int
6398 mlx5_flow_flush(struct rte_eth_dev *dev,
6399                 struct rte_flow_error *error __rte_unused)
6400 {
6401         struct mlx5_priv *priv = dev->data->dev_private;
6402
6403         mlx5_flow_list_flush(dev, &priv->flows, false);
6404         return 0;
6405 }
6406
6407 /**
6408  * Isolated mode.
6409  *
6410  * @see rte_flow_isolate()
6411  * @see rte_flow_ops
6412  */
6413 int
6414 mlx5_flow_isolate(struct rte_eth_dev *dev,
6415                   int enable,
6416                   struct rte_flow_error *error)
6417 {
6418         struct mlx5_priv *priv = dev->data->dev_private;
6419
6420         if (dev->data->dev_started) {
6421                 rte_flow_error_set(error, EBUSY,
6422                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
6423                                    NULL,
6424                                    "port must be stopped first");
6425                 return -rte_errno;
6426         }
6427         priv->isolated = !!enable;
6428         if (enable)
6429                 dev->dev_ops = &mlx5_dev_ops_isolate;
6430         else
6431                 dev->dev_ops = &mlx5_dev_ops;
6432
6433         dev->rx_descriptor_status = mlx5_rx_descriptor_status;
6434         dev->tx_descriptor_status = mlx5_tx_descriptor_status;
6435
6436         return 0;
6437 }
6438
6439 /**
6440  * Query a flow.
6441  *
6442  * @see rte_flow_query()
6443  * @see rte_flow_ops
6444  */
6445 static int
6446 flow_drv_query(struct rte_eth_dev *dev,
6447                uint32_t flow_idx,
6448                const struct rte_flow_action *actions,
6449                void *data,
6450                struct rte_flow_error *error)
6451 {
6452         struct mlx5_priv *priv = dev->data->dev_private;
6453         const struct mlx5_flow_driver_ops *fops;
6454         struct rte_flow *flow = mlx5_ipool_get(priv->sh->ipool
6455                                                [MLX5_IPOOL_RTE_FLOW],
6456                                                flow_idx);
6457         enum mlx5_flow_drv_type ftype;
6458
6459         if (!flow) {
6460                 return rte_flow_error_set(error, ENOENT,
6461                           RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
6462                           NULL,
6463                           "invalid flow handle");
6464         }
6465         ftype = flow->drv_type;
6466         MLX5_ASSERT(ftype > MLX5_FLOW_TYPE_MIN && ftype < MLX5_FLOW_TYPE_MAX);
6467         fops = flow_get_drv_ops(ftype);
6468
6469         return fops->query(dev, flow, actions, data, error);
6470 }
6471
6472 /**
6473  * Query a flow.
6474  *
6475  * @see rte_flow_query()
6476  * @see rte_flow_ops
6477  */
6478 int
6479 mlx5_flow_query(struct rte_eth_dev *dev,
6480                 struct rte_flow *flow,
6481                 const struct rte_flow_action *actions,
6482                 void *data,
6483                 struct rte_flow_error *error)
6484 {
6485         int ret;
6486
6487         ret = flow_drv_query(dev, (uintptr_t)(void *)flow, actions, data,
6488                              error);
6489         if (ret < 0)
6490                 return ret;
6491         return 0;
6492 }
6493
6494 /**
6495  * Manage filter operations.
6496  *
6497  * @param dev
6498  *   Pointer to Ethernet device structure.
6499  * @param filter_type
6500  *   Filter type.
6501  * @param filter_op
6502  *   Operation to perform.
6503  * @param arg
6504  *   Pointer to operation-specific structure.
6505  *
6506  * @return
6507  *   0 on success, a negative errno value otherwise and rte_errno is set.
6508  */
6509 int
6510 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
6511                      enum rte_filter_type filter_type,
6512                      enum rte_filter_op filter_op,
6513                      void *arg)
6514 {
6515         switch (filter_type) {
6516         case RTE_ETH_FILTER_GENERIC:
6517                 if (filter_op != RTE_ETH_FILTER_GET) {
6518                         rte_errno = EINVAL;
6519                         return -rte_errno;
6520                 }
6521                 *(const void **)arg = &mlx5_flow_ops;
6522                 return 0;
6523         default:
6524                 DRV_LOG(ERR, "port %u filter type (%d) not supported",
6525                         dev->data->port_id, filter_type);
6526                 rte_errno = ENOTSUP;
6527                 return -rte_errno;
6528         }
6529         return 0;
6530 }
6531
6532 /**
6533  * Create the needed meter and suffix tables.
6534  *
6535  * @param[in] dev
6536  *   Pointer to Ethernet device.
6537  * @param[in] fm
6538  *   Pointer to the flow meter.
6539  *
6540  * @return
6541  *   Pointer to table set on success, NULL otherwise.
6542  */
6543 struct mlx5_meter_domains_infos *
6544 mlx5_flow_create_mtr_tbls(struct rte_eth_dev *dev,
6545                           const struct mlx5_flow_meter *fm)
6546 {
6547         const struct mlx5_flow_driver_ops *fops;
6548
6549         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
6550         return fops->create_mtr_tbls(dev, fm);
6551 }
6552
6553 /**
6554  * Destroy the meter table set.
6555  *
6556  * @param[in] dev
6557  *   Pointer to Ethernet device.
6558  * @param[in] tbl
6559  *   Pointer to the meter table set.
6560  *
6561  * @return
6562  *   0 on success.
6563  */
6564 int
6565 mlx5_flow_destroy_mtr_tbls(struct rte_eth_dev *dev,
6566                            struct mlx5_meter_domains_infos *tbls)
6567 {
6568         const struct mlx5_flow_driver_ops *fops;
6569
6570         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
6571         return fops->destroy_mtr_tbls(dev, tbls);
6572 }
6573
6574 /**
6575  * Create policer rules.
6576  *
6577  * @param[in] dev
6578  *   Pointer to Ethernet device.
6579  * @param[in] fm
6580  *   Pointer to flow meter structure.
6581  * @param[in] attr
6582  *   Pointer to flow attributes.
6583  *
6584  * @return
6585  *   0 on success, -1 otherwise.
6586  */
6587 int
6588 mlx5_flow_create_policer_rules(struct rte_eth_dev *dev,
6589                                struct mlx5_flow_meter *fm,
6590                                const struct rte_flow_attr *attr)
6591 {
6592         const struct mlx5_flow_driver_ops *fops;
6593
6594         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
6595         return fops->create_policer_rules(dev, fm, attr);
6596 }
6597
6598 /**
6599  * Destroy policer rules.
6600  *
6601  * @param[in] fm
6602  *   Pointer to flow meter structure.
6603  * @param[in] attr
6604  *   Pointer to flow attributes.
6605  *
6606  * @return
6607  *   0 on success, -1 otherwise.
6608  */
6609 int
6610 mlx5_flow_destroy_policer_rules(struct rte_eth_dev *dev,
6611                                 struct mlx5_flow_meter *fm,
6612                                 const struct rte_flow_attr *attr)
6613 {
6614         const struct mlx5_flow_driver_ops *fops;
6615
6616         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
6617         return fops->destroy_policer_rules(dev, fm, attr);
6618 }
6619
6620 /**
6621  * Allocate a counter.
6622  *
6623  * @param[in] dev
6624  *   Pointer to Ethernet device structure.
6625  *
6626  * @return
6627  *   Index to allocated counter  on success, 0 otherwise.
6628  */
6629 uint32_t
6630 mlx5_counter_alloc(struct rte_eth_dev *dev)
6631 {
6632         const struct mlx5_flow_driver_ops *fops;
6633         struct rte_flow_attr attr = { .transfer = 0 };
6634
6635         if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
6636                 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
6637                 return fops->counter_alloc(dev);
6638         }
6639         DRV_LOG(ERR,
6640                 "port %u counter allocate is not supported.",
6641                  dev->data->port_id);
6642         return 0;
6643 }
6644
6645 /**
6646  * Free a counter.
6647  *
6648  * @param[in] dev
6649  *   Pointer to Ethernet device structure.
6650  * @param[in] cnt
6651  *   Index to counter to be free.
6652  */
6653 void
6654 mlx5_counter_free(struct rte_eth_dev *dev, uint32_t cnt)
6655 {
6656         const struct mlx5_flow_driver_ops *fops;
6657         struct rte_flow_attr attr = { .transfer = 0 };
6658
6659         if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
6660                 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
6661                 fops->counter_free(dev, cnt);
6662                 return;
6663         }
6664         DRV_LOG(ERR,
6665                 "port %u counter free is not supported.",
6666                  dev->data->port_id);
6667 }
6668
6669 /**
6670  * Query counter statistics.
6671  *
6672  * @param[in] dev
6673  *   Pointer to Ethernet device structure.
6674  * @param[in] cnt
6675  *   Index to counter to query.
6676  * @param[in] clear
6677  *   Set to clear counter statistics.
6678  * @param[out] pkts
6679  *   The counter hits packets number to save.
6680  * @param[out] bytes
6681  *   The counter hits bytes number to save.
6682  *
6683  * @return
6684  *   0 on success, a negative errno value otherwise.
6685  */
6686 int
6687 mlx5_counter_query(struct rte_eth_dev *dev, uint32_t cnt,
6688                    bool clear, uint64_t *pkts, uint64_t *bytes)
6689 {
6690         const struct mlx5_flow_driver_ops *fops;
6691         struct rte_flow_attr attr = { .transfer = 0 };
6692
6693         if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
6694                 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
6695                 return fops->counter_query(dev, cnt, clear, pkts, bytes);
6696         }
6697         DRV_LOG(ERR,
6698                 "port %u counter query is not supported.",
6699                  dev->data->port_id);
6700         return -ENOTSUP;
6701 }
6702
6703 /**
6704  * Allocate a new memory for the counter values wrapped by all the needed
6705  * management.
6706  *
6707  * @param[in] sh
6708  *   Pointer to mlx5_dev_ctx_shared object.
6709  *
6710  * @return
6711  *   0 on success, a negative errno value otherwise.
6712  */
6713 static int
6714 mlx5_flow_create_counter_stat_mem_mng(struct mlx5_dev_ctx_shared *sh)
6715 {
6716         struct mlx5_devx_mkey_attr mkey_attr;
6717         struct mlx5_counter_stats_mem_mng *mem_mng;
6718         volatile struct flow_counter_stats *raw_data;
6719         int raws_n = MLX5_CNT_CONTAINER_RESIZE + MLX5_MAX_PENDING_QUERIES;
6720         int size = (sizeof(struct flow_counter_stats) *
6721                         MLX5_COUNTERS_PER_POOL +
6722                         sizeof(struct mlx5_counter_stats_raw)) * raws_n +
6723                         sizeof(struct mlx5_counter_stats_mem_mng);
6724         size_t pgsize = rte_mem_page_size();
6725         uint8_t *mem;
6726         int i;
6727
6728         if (pgsize == (size_t)-1) {
6729                 DRV_LOG(ERR, "Failed to get mem page size");
6730                 rte_errno = ENOMEM;
6731                 return -ENOMEM;
6732         }
6733         mem = mlx5_malloc(MLX5_MEM_ZERO, size, pgsize, SOCKET_ID_ANY);
6734         if (!mem) {
6735                 rte_errno = ENOMEM;
6736                 return -ENOMEM;
6737         }
6738         mem_mng = (struct mlx5_counter_stats_mem_mng *)(mem + size) - 1;
6739         size = sizeof(*raw_data) * MLX5_COUNTERS_PER_POOL * raws_n;
6740         mem_mng->umem = mlx5_os_umem_reg(sh->ctx, mem, size,
6741                                                  IBV_ACCESS_LOCAL_WRITE);
6742         if (!mem_mng->umem) {
6743                 rte_errno = errno;
6744                 mlx5_free(mem);
6745                 return -rte_errno;
6746         }
6747         mkey_attr.addr = (uintptr_t)mem;
6748         mkey_attr.size = size;
6749         mkey_attr.umem_id = mlx5_os_get_umem_id(mem_mng->umem);
6750         mkey_attr.pd = sh->pdn;
6751         mkey_attr.log_entity_size = 0;
6752         mkey_attr.pg_access = 0;
6753         mkey_attr.klm_array = NULL;
6754         mkey_attr.klm_num = 0;
6755         mkey_attr.relaxed_ordering_write = sh->cmng.relaxed_ordering_write;
6756         mkey_attr.relaxed_ordering_read = sh->cmng.relaxed_ordering_read;
6757         mem_mng->dm = mlx5_devx_cmd_mkey_create(sh->ctx, &mkey_attr);
6758         if (!mem_mng->dm) {
6759                 mlx5_os_umem_dereg(mem_mng->umem);
6760                 rte_errno = errno;
6761                 mlx5_free(mem);
6762                 return -rte_errno;
6763         }
6764         mem_mng->raws = (struct mlx5_counter_stats_raw *)(mem + size);
6765         raw_data = (volatile struct flow_counter_stats *)mem;
6766         for (i = 0; i < raws_n; ++i) {
6767                 mem_mng->raws[i].mem_mng = mem_mng;
6768                 mem_mng->raws[i].data = raw_data + i * MLX5_COUNTERS_PER_POOL;
6769         }
6770         for (i = 0; i < MLX5_MAX_PENDING_QUERIES; ++i)
6771                 LIST_INSERT_HEAD(&sh->cmng.free_stat_raws,
6772                                  mem_mng->raws + MLX5_CNT_CONTAINER_RESIZE + i,
6773                                  next);
6774         LIST_INSERT_HEAD(&sh->cmng.mem_mngs, mem_mng, next);
6775         sh->cmng.mem_mng = mem_mng;
6776         return 0;
6777 }
6778
6779 /**
6780  * Set the statistic memory to the new counter pool.
6781  *
6782  * @param[in] sh
6783  *   Pointer to mlx5_dev_ctx_shared object.
6784  * @param[in] pool
6785  *   Pointer to the pool to set the statistic memory.
6786  *
6787  * @return
6788  *   0 on success, a negative errno value otherwise.
6789  */
6790 static int
6791 mlx5_flow_set_counter_stat_mem(struct mlx5_dev_ctx_shared *sh,
6792                                struct mlx5_flow_counter_pool *pool)
6793 {
6794         struct mlx5_flow_counter_mng *cmng = &sh->cmng;
6795         /* Resize statistic memory once used out. */
6796         if (!(pool->index % MLX5_CNT_CONTAINER_RESIZE) &&
6797             mlx5_flow_create_counter_stat_mem_mng(sh)) {
6798                 DRV_LOG(ERR, "Cannot resize counter stat mem.");
6799                 return -1;
6800         }
6801         rte_spinlock_lock(&pool->sl);
6802         pool->raw = cmng->mem_mng->raws + pool->index %
6803                     MLX5_CNT_CONTAINER_RESIZE;
6804         rte_spinlock_unlock(&pool->sl);
6805         pool->raw_hw = NULL;
6806         return 0;
6807 }
6808
6809 #define MLX5_POOL_QUERY_FREQ_US 1000000
6810
6811 /**
6812  * Set the periodic procedure for triggering asynchronous batch queries for all
6813  * the counter pools.
6814  *
6815  * @param[in] sh
6816  *   Pointer to mlx5_dev_ctx_shared object.
6817  */
6818 void
6819 mlx5_set_query_alarm(struct mlx5_dev_ctx_shared *sh)
6820 {
6821         uint32_t pools_n, us;
6822
6823         pools_n = __atomic_load_n(&sh->cmng.n_valid, __ATOMIC_RELAXED);
6824         us = MLX5_POOL_QUERY_FREQ_US / pools_n;
6825         DRV_LOG(DEBUG, "Set alarm for %u pools each %u us", pools_n, us);
6826         if (rte_eal_alarm_set(us, mlx5_flow_query_alarm, sh)) {
6827                 sh->cmng.query_thread_on = 0;
6828                 DRV_LOG(ERR, "Cannot reinitialize query alarm");
6829         } else {
6830                 sh->cmng.query_thread_on = 1;
6831         }
6832 }
6833
6834 /**
6835  * The periodic procedure for triggering asynchronous batch queries for all the
6836  * counter pools. This function is probably called by the host thread.
6837  *
6838  * @param[in] arg
6839  *   The parameter for the alarm process.
6840  */
6841 void
6842 mlx5_flow_query_alarm(void *arg)
6843 {
6844         struct mlx5_dev_ctx_shared *sh = arg;
6845         int ret;
6846         uint16_t pool_index = sh->cmng.pool_index;
6847         struct mlx5_flow_counter_mng *cmng = &sh->cmng;
6848         struct mlx5_flow_counter_pool *pool;
6849         uint16_t n_valid;
6850
6851         if (sh->cmng.pending_queries >= MLX5_MAX_PENDING_QUERIES)
6852                 goto set_alarm;
6853         rte_spinlock_lock(&cmng->pool_update_sl);
6854         pool = cmng->pools[pool_index];
6855         n_valid = cmng->n_valid;
6856         rte_spinlock_unlock(&cmng->pool_update_sl);
6857         /* Set the statistic memory to the new created pool. */
6858         if ((!pool->raw && mlx5_flow_set_counter_stat_mem(sh, pool)))
6859                 goto set_alarm;
6860         if (pool->raw_hw)
6861                 /* There is a pool query in progress. */
6862                 goto set_alarm;
6863         pool->raw_hw =
6864                 LIST_FIRST(&sh->cmng.free_stat_raws);
6865         if (!pool->raw_hw)
6866                 /* No free counter statistics raw memory. */
6867                 goto set_alarm;
6868         /*
6869          * Identify the counters released between query trigger and query
6870          * handle more efficiently. The counter released in this gap period
6871          * should wait for a new round of query as the new arrived packets
6872          * will not be taken into account.
6873          */
6874         pool->query_gen++;
6875         ret = mlx5_devx_cmd_flow_counter_query(pool->min_dcs, 0,
6876                                                MLX5_COUNTERS_PER_POOL,
6877                                                NULL, NULL,
6878                                                pool->raw_hw->mem_mng->dm->id,
6879                                                (void *)(uintptr_t)
6880                                                pool->raw_hw->data,
6881                                                sh->devx_comp,
6882                                                (uint64_t)(uintptr_t)pool);
6883         if (ret) {
6884                 DRV_LOG(ERR, "Failed to trigger asynchronous query for dcs ID"
6885                         " %d", pool->min_dcs->id);
6886                 pool->raw_hw = NULL;
6887                 goto set_alarm;
6888         }
6889         LIST_REMOVE(pool->raw_hw, next);
6890         sh->cmng.pending_queries++;
6891         pool_index++;
6892         if (pool_index >= n_valid)
6893                 pool_index = 0;
6894 set_alarm:
6895         sh->cmng.pool_index = pool_index;
6896         mlx5_set_query_alarm(sh);
6897 }
6898
6899 /**
6900  * Check and callback event for new aged flow in the counter pool
6901  *
6902  * @param[in] sh
6903  *   Pointer to mlx5_dev_ctx_shared object.
6904  * @param[in] pool
6905  *   Pointer to Current counter pool.
6906  */
6907 static void
6908 mlx5_flow_aging_check(struct mlx5_dev_ctx_shared *sh,
6909                    struct mlx5_flow_counter_pool *pool)
6910 {
6911         struct mlx5_priv *priv;
6912         struct mlx5_flow_counter *cnt;
6913         struct mlx5_age_info *age_info;
6914         struct mlx5_age_param *age_param;
6915         struct mlx5_counter_stats_raw *cur = pool->raw_hw;
6916         struct mlx5_counter_stats_raw *prev = pool->raw;
6917         const uint64_t curr_time = MLX5_CURR_TIME_SEC;
6918         const uint32_t time_delta = curr_time - pool->time_of_last_age_check;
6919         uint16_t expected = AGE_CANDIDATE;
6920         uint32_t i;
6921
6922         pool->time_of_last_age_check = curr_time;
6923         for (i = 0; i < MLX5_COUNTERS_PER_POOL; ++i) {
6924                 cnt = MLX5_POOL_GET_CNT(pool, i);
6925                 age_param = MLX5_CNT_TO_AGE(cnt);
6926                 if (__atomic_load_n(&age_param->state,
6927                                     __ATOMIC_RELAXED) != AGE_CANDIDATE)
6928                         continue;
6929                 if (cur->data[i].hits != prev->data[i].hits) {
6930                         __atomic_store_n(&age_param->sec_since_last_hit, 0,
6931                                          __ATOMIC_RELAXED);
6932                         continue;
6933                 }
6934                 if (__atomic_add_fetch(&age_param->sec_since_last_hit,
6935                                        time_delta,
6936                                        __ATOMIC_RELAXED) <= age_param->timeout)
6937                         continue;
6938                 /**
6939                  * Hold the lock first, or if between the
6940                  * state AGE_TMOUT and tailq operation the
6941                  * release happened, the release procedure
6942                  * may delete a non-existent tailq node.
6943                  */
6944                 priv = rte_eth_devices[age_param->port_id].data->dev_private;
6945                 age_info = GET_PORT_AGE_INFO(priv);
6946                 rte_spinlock_lock(&age_info->aged_sl);
6947                 if (__atomic_compare_exchange_n(&age_param->state, &expected,
6948                                                 AGE_TMOUT, false,
6949                                                 __ATOMIC_RELAXED,
6950                                                 __ATOMIC_RELAXED)) {
6951                         TAILQ_INSERT_TAIL(&age_info->aged_counters, cnt, next);
6952                         MLX5_AGE_SET(age_info, MLX5_AGE_EVENT_NEW);
6953                 }
6954                 rte_spinlock_unlock(&age_info->aged_sl);
6955         }
6956         mlx5_age_event_prepare(sh);
6957 }
6958
6959 /**
6960  * Handler for the HW respond about ready values from an asynchronous batch
6961  * query. This function is probably called by the host thread.
6962  *
6963  * @param[in] sh
6964  *   The pointer to the shared device context.
6965  * @param[in] async_id
6966  *   The Devx async ID.
6967  * @param[in] status
6968  *   The status of the completion.
6969  */
6970 void
6971 mlx5_flow_async_pool_query_handle(struct mlx5_dev_ctx_shared *sh,
6972                                   uint64_t async_id, int status)
6973 {
6974         struct mlx5_flow_counter_pool *pool =
6975                 (struct mlx5_flow_counter_pool *)(uintptr_t)async_id;
6976         struct mlx5_counter_stats_raw *raw_to_free;
6977         uint8_t query_gen = pool->query_gen ^ 1;
6978         struct mlx5_flow_counter_mng *cmng = &sh->cmng;
6979         enum mlx5_counter_type cnt_type =
6980                 pool->is_aged ? MLX5_COUNTER_TYPE_AGE :
6981                                 MLX5_COUNTER_TYPE_ORIGIN;
6982
6983         if (unlikely(status)) {
6984                 raw_to_free = pool->raw_hw;
6985         } else {
6986                 raw_to_free = pool->raw;
6987                 if (pool->is_aged)
6988                         mlx5_flow_aging_check(sh, pool);
6989                 rte_spinlock_lock(&pool->sl);
6990                 pool->raw = pool->raw_hw;
6991                 rte_spinlock_unlock(&pool->sl);
6992                 /* Be sure the new raw counters data is updated in memory. */
6993                 rte_io_wmb();
6994                 if (!TAILQ_EMPTY(&pool->counters[query_gen])) {
6995                         rte_spinlock_lock(&cmng->csl[cnt_type]);
6996                         TAILQ_CONCAT(&cmng->counters[cnt_type],
6997                                      &pool->counters[query_gen], next);
6998                         rte_spinlock_unlock(&cmng->csl[cnt_type]);
6999                 }
7000         }
7001         LIST_INSERT_HEAD(&sh->cmng.free_stat_raws, raw_to_free, next);
7002         pool->raw_hw = NULL;
7003         sh->cmng.pending_queries--;
7004 }
7005
7006 static int
7007 flow_group_to_table(uint32_t port_id, uint32_t group, uint32_t *table,
7008                     const struct flow_grp_info *grp_info,
7009                     struct rte_flow_error *error)
7010 {
7011         if (grp_info->transfer && grp_info->external &&
7012             grp_info->fdb_def_rule) {
7013                 if (group == UINT32_MAX)
7014                         return rte_flow_error_set
7015                                                 (error, EINVAL,
7016                                                  RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
7017                                                  NULL,
7018                                                  "group index not supported");
7019                 *table = group + 1;
7020         } else {
7021                 *table = group;
7022         }
7023         DRV_LOG(DEBUG, "port %u group=%#x table=%#x", port_id, group, *table);
7024         return 0;
7025 }
7026
7027 /**
7028  * Translate the rte_flow group index to HW table value.
7029  *
7030  * If tunnel offload is disabled, all group ids converted to flow table
7031  * id using the standard method.
7032  * If tunnel offload is enabled, group id can be converted using the
7033  * standard or tunnel conversion method. Group conversion method
7034  * selection depends on flags in `grp_info` parameter:
7035  * - Internal (grp_info.external == 0) groups conversion uses the
7036  *   standard method.
7037  * - Group ids in JUMP action converted with the tunnel conversion.
7038  * - Group id in rule attribute conversion depends on a rule type and
7039  *   group id value:
7040  *   ** non zero group attributes converted with the tunnel method
7041  *   ** zero group attribute in non-tunnel rule is converted using the
7042  *      standard method - there's only one root table
7043  *   ** zero group attribute in steer tunnel rule is converted with the
7044  *      standard method - single root table
7045  *   ** zero group attribute in match tunnel rule is a special OvS
7046  *      case: that value is used for portability reasons. That group
7047  *      id is converted with the tunnel conversion method.
7048  *
7049  * @param[in] dev
7050  *   Port device
7051  * @param[in] tunnel
7052  *   PMD tunnel offload object
7053  * @param[in] group
7054  *   rte_flow group index value.
7055  * @param[out] table
7056  *   HW table value.
7057  * @param[in] grp_info
7058  *   flags used for conversion
7059  * @param[out] error
7060  *   Pointer to error structure.
7061  *
7062  * @return
7063  *   0 on success, a negative errno value otherwise and rte_errno is set.
7064  */
7065 int
7066 mlx5_flow_group_to_table(struct rte_eth_dev *dev,
7067                          const struct mlx5_flow_tunnel *tunnel,
7068                          uint32_t group, uint32_t *table,
7069                          const struct flow_grp_info *grp_info,
7070                          struct rte_flow_error *error)
7071 {
7072         int ret;
7073         bool standard_translation;
7074
7075         if (!grp_info->skip_scale && grp_info->external &&
7076             group < MLX5_MAX_TABLES_EXTERNAL)
7077                 group *= MLX5_FLOW_TABLE_FACTOR;
7078         if (is_tunnel_offload_active(dev)) {
7079                 standard_translation = !grp_info->external ||
7080                                         grp_info->std_tbl_fix;
7081         } else {
7082                 standard_translation = true;
7083         }
7084         DRV_LOG(DEBUG,
7085                 "port %u group=%u transfer=%d external=%d fdb_def_rule=%d translate=%s",
7086                 dev->data->port_id, group, grp_info->transfer,
7087                 grp_info->external, grp_info->fdb_def_rule,
7088                 standard_translation ? "STANDARD" : "TUNNEL");
7089         if (standard_translation)
7090                 ret = flow_group_to_table(dev->data->port_id, group, table,
7091                                           grp_info, error);
7092         else
7093                 ret = tunnel_flow_group_to_flow_table(dev, tunnel, group,
7094                                                       table, error);
7095
7096         return ret;
7097 }
7098
7099 /**
7100  * Discover availability of metadata reg_c's.
7101  *
7102  * Iteratively use test flows to check availability.
7103  *
7104  * @param[in] dev
7105  *   Pointer to the Ethernet device structure.
7106  *
7107  * @return
7108  *   0 on success, a negative errno value otherwise and rte_errno is set.
7109  */
7110 int
7111 mlx5_flow_discover_mreg_c(struct rte_eth_dev *dev)
7112 {
7113         struct mlx5_priv *priv = dev->data->dev_private;
7114         struct mlx5_dev_config *config = &priv->config;
7115         enum modify_reg idx;
7116         int n = 0;
7117
7118         /* reg_c[0] and reg_c[1] are reserved. */
7119         config->flow_mreg_c[n++] = REG_C_0;
7120         config->flow_mreg_c[n++] = REG_C_1;
7121         /* Discover availability of other reg_c's. */
7122         for (idx = REG_C_2; idx <= REG_C_7; ++idx) {
7123                 struct rte_flow_attr attr = {
7124                         .group = MLX5_FLOW_MREG_CP_TABLE_GROUP,
7125                         .priority = MLX5_FLOW_LOWEST_PRIO_INDICATOR,
7126                         .ingress = 1,
7127                 };
7128                 struct rte_flow_item items[] = {
7129                         [0] = {
7130                                 .type = RTE_FLOW_ITEM_TYPE_END,
7131                         },
7132                 };
7133                 struct rte_flow_action actions[] = {
7134                         [0] = {
7135                                 .type = (enum rte_flow_action_type)
7136                                         MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
7137                                 .conf = &(struct mlx5_flow_action_copy_mreg){
7138                                         .src = REG_C_1,
7139                                         .dst = idx,
7140                                 },
7141                         },
7142                         [1] = {
7143                                 .type = RTE_FLOW_ACTION_TYPE_JUMP,
7144                                 .conf = &(struct rte_flow_action_jump){
7145                                         .group = MLX5_FLOW_MREG_ACT_TABLE_GROUP,
7146                                 },
7147                         },
7148                         [2] = {
7149                                 .type = RTE_FLOW_ACTION_TYPE_END,
7150                         },
7151                 };
7152                 uint32_t flow_idx;
7153                 struct rte_flow *flow;
7154                 struct rte_flow_error error;
7155
7156                 if (!config->dv_flow_en)
7157                         break;
7158                 /* Create internal flow, validation skips copy action. */
7159                 flow_idx = flow_list_create(dev, NULL, &attr, items,
7160                                             actions, false, &error);
7161                 flow = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW],
7162                                       flow_idx);
7163                 if (!flow)
7164                         continue;
7165                 config->flow_mreg_c[n++] = idx;
7166                 flow_list_destroy(dev, NULL, flow_idx);
7167         }
7168         for (; n < MLX5_MREG_C_NUM; ++n)
7169                 config->flow_mreg_c[n] = REG_NON;
7170         return 0;
7171 }
7172
7173 /**
7174  * Dump flow raw hw data to file
7175  *
7176  * @param[in] dev
7177  *    The pointer to Ethernet device.
7178  * @param[in] file
7179  *   A pointer to a file for output.
7180  * @param[out] error
7181  *   Perform verbose error reporting if not NULL. PMDs initialize this
7182  *   structure in case of error only.
7183  * @return
7184  *   0 on success, a nagative value otherwise.
7185  */
7186 int
7187 mlx5_flow_dev_dump(struct rte_eth_dev *dev,
7188                    FILE *file,
7189                    struct rte_flow_error *error __rte_unused)
7190 {
7191         struct mlx5_priv *priv = dev->data->dev_private;
7192         struct mlx5_dev_ctx_shared *sh = priv->sh;
7193
7194         if (!priv->config.dv_flow_en) {
7195                 if (fputs("device dv flow disabled\n", file) <= 0)
7196                         return -errno;
7197                 return -ENOTSUP;
7198         }
7199         return mlx5_devx_cmd_flow_dump(sh->fdb_domain, sh->rx_domain,
7200                                        sh->tx_domain, file);
7201 }
7202
7203 /**
7204  * Get aged-out flows.
7205  *
7206  * @param[in] dev
7207  *   Pointer to the Ethernet device structure.
7208  * @param[in] context
7209  *   The address of an array of pointers to the aged-out flows contexts.
7210  * @param[in] nb_countexts
7211  *   The length of context array pointers.
7212  * @param[out] error
7213  *   Perform verbose error reporting if not NULL. Initialized in case of
7214  *   error only.
7215  *
7216  * @return
7217  *   how many contexts get in success, otherwise negative errno value.
7218  *   if nb_contexts is 0, return the amount of all aged contexts.
7219  *   if nb_contexts is not 0 , return the amount of aged flows reported
7220  *   in the context array.
7221  */
7222 int
7223 mlx5_flow_get_aged_flows(struct rte_eth_dev *dev, void **contexts,
7224                         uint32_t nb_contexts, struct rte_flow_error *error)
7225 {
7226         const struct mlx5_flow_driver_ops *fops;
7227         struct rte_flow_attr attr = { .transfer = 0 };
7228
7229         if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
7230                 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7231                 return fops->get_aged_flows(dev, contexts, nb_contexts,
7232                                                     error);
7233         }
7234         DRV_LOG(ERR,
7235                 "port %u get aged flows is not supported.",
7236                  dev->data->port_id);
7237         return -ENOTSUP;
7238 }
7239
7240 /* Wrapper for driver action_validate op callback */
7241 static int
7242 flow_drv_action_validate(struct rte_eth_dev *dev,
7243                          const struct rte_flow_shared_action_conf *conf,
7244                          const struct rte_flow_action *action,
7245                          const struct mlx5_flow_driver_ops *fops,
7246                          struct rte_flow_error *error)
7247 {
7248         static const char err_msg[] = "shared action validation unsupported";
7249
7250         if (!fops->action_validate) {
7251                 DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
7252                 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
7253                                    NULL, err_msg);
7254                 return -rte_errno;
7255         }
7256         return fops->action_validate(dev, conf, action, error);
7257 }
7258
7259 /**
7260  * Destroys the shared action by handle.
7261  *
7262  * @param dev
7263  *   Pointer to Ethernet device structure.
7264  * @param[in] action
7265  *   Handle for the shared action to be destroyed.
7266  * @param[out] error
7267  *   Perform verbose error reporting if not NULL. PMDs initialize this
7268  *   structure in case of error only.
7269  *
7270  * @return
7271  *   0 on success, a negative errno value otherwise and rte_errno is set.
7272  *
7273  * @note: wrapper for driver action_create op callback.
7274  */
7275 static int
7276 mlx5_shared_action_destroy(struct rte_eth_dev *dev,
7277                            struct rte_flow_shared_action *action,
7278                            struct rte_flow_error *error)
7279 {
7280         static const char err_msg[] = "shared action destruction unsupported";
7281         struct rte_flow_attr attr = { .transfer = 0 };
7282         const struct mlx5_flow_driver_ops *fops =
7283                         flow_get_drv_ops(flow_get_drv_type(dev, &attr));
7284
7285         if (!fops->action_destroy) {
7286                 DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
7287                 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
7288                                    NULL, err_msg);
7289                 return -rte_errno;
7290         }
7291         return fops->action_destroy(dev, action, error);
7292 }
7293
7294 /* Wrapper for driver action_destroy op callback */
7295 static int
7296 flow_drv_action_update(struct rte_eth_dev *dev,
7297                        struct rte_flow_shared_action *action,
7298                        const void *action_conf,
7299                        const struct mlx5_flow_driver_ops *fops,
7300                        struct rte_flow_error *error)
7301 {
7302         static const char err_msg[] = "shared action update unsupported";
7303
7304         if (!fops->action_update) {
7305                 DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
7306                 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
7307                                    NULL, err_msg);
7308                 return -rte_errno;
7309         }
7310         return fops->action_update(dev, action, action_conf, error);
7311 }
7312
7313 /* Wrapper for driver action_destroy op callback */
7314 static int
7315 flow_drv_action_query(struct rte_eth_dev *dev,
7316                       const struct rte_flow_shared_action *action,
7317                       void *data,
7318                       const struct mlx5_flow_driver_ops *fops,
7319                       struct rte_flow_error *error)
7320 {
7321         static const char err_msg[] = "shared action query unsupported";
7322
7323         if (!fops->action_query) {
7324                 DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
7325                 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
7326                                    NULL, err_msg);
7327                 return -rte_errno;
7328         }
7329         return fops->action_query(dev, action, data, error);
7330 }
7331
7332 /**
7333  * Create shared action for reuse in multiple flow rules.
7334  *
7335  * @param dev
7336  *   Pointer to Ethernet device structure.
7337  * @param[in] action
7338  *   Action configuration for shared action creation.
7339  * @param[out] error
7340  *   Perform verbose error reporting if not NULL. PMDs initialize this
7341  *   structure in case of error only.
7342  * @return
7343  *   A valid handle in case of success, NULL otherwise and rte_errno is set.
7344  */
7345 static struct rte_flow_shared_action *
7346 mlx5_shared_action_create(struct rte_eth_dev *dev,
7347                           const struct rte_flow_shared_action_conf *conf,
7348                           const struct rte_flow_action *action,
7349                           struct rte_flow_error *error)
7350 {
7351         static const char err_msg[] = "shared action creation unsupported";
7352         struct rte_flow_attr attr = { .transfer = 0 };
7353         const struct mlx5_flow_driver_ops *fops =
7354                         flow_get_drv_ops(flow_get_drv_type(dev, &attr));
7355
7356         if (flow_drv_action_validate(dev, conf, action, fops, error))
7357                 return NULL;
7358         if (!fops->action_create) {
7359                 DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
7360                 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
7361                                    NULL, err_msg);
7362                 return NULL;
7363         }
7364         return fops->action_create(dev, conf, action, error);
7365 }
7366
7367 /**
7368  * Updates inplace the shared action configuration pointed by *action* handle
7369  * with the configuration provided as *action* argument.
7370  * The update of the shared action configuration effects all flow rules reusing
7371  * the action via handle.
7372  *
7373  * @param dev
7374  *   Pointer to Ethernet device structure.
7375  * @param[in] shared_action
7376  *   Handle for the shared action to be updated.
7377  * @param[in] action
7378  *   Action specification used to modify the action pointed by handle.
7379  *   *action* should be of same type with the action pointed by the *action*
7380  *   handle argument, otherwise considered as invalid.
7381  * @param[out] error
7382  *   Perform verbose error reporting if not NULL. PMDs initialize this
7383  *   structure in case of error only.
7384  *
7385  * @return
7386  *   0 on success, a negative errno value otherwise and rte_errno is set.
7387  */
7388 static int
7389 mlx5_shared_action_update(struct rte_eth_dev *dev,
7390                 struct rte_flow_shared_action *shared_action,
7391                 const struct rte_flow_action *action,
7392                 struct rte_flow_error *error)
7393 {
7394         struct rte_flow_attr attr = { .transfer = 0 };
7395         const struct mlx5_flow_driver_ops *fops =
7396                         flow_get_drv_ops(flow_get_drv_type(dev, &attr));
7397         int ret;
7398
7399         ret = flow_drv_action_validate(dev, NULL, action, fops, error);
7400         if (ret)
7401                 return ret;
7402         return flow_drv_action_update(dev, shared_action, action->conf, fops,
7403                                       error);
7404 }
7405
7406 /**
7407  * Query the shared action by handle.
7408  *
7409  * This function allows retrieving action-specific data such as counters.
7410  * Data is gathered by special action which may be present/referenced in
7411  * more than one flow rule definition.
7412  *
7413  * \see RTE_FLOW_ACTION_TYPE_COUNT
7414  *
7415  * @param dev
7416  *   Pointer to Ethernet device structure.
7417  * @param[in] action
7418  *   Handle for the shared action to query.
7419  * @param[in, out] data
7420  *   Pointer to storage for the associated query data type.
7421  * @param[out] error
7422  *   Perform verbose error reporting if not NULL. PMDs initialize this
7423  *   structure in case of error only.
7424  *
7425  * @return
7426  *   0 on success, a negative errno value otherwise and rte_errno is set.
7427  */
7428 static int
7429 mlx5_shared_action_query(struct rte_eth_dev *dev,
7430                          const struct rte_flow_shared_action *action,
7431                          void *data,
7432                          struct rte_flow_error *error)
7433 {
7434         struct rte_flow_attr attr = { .transfer = 0 };
7435         const struct mlx5_flow_driver_ops *fops =
7436                         flow_get_drv_ops(flow_get_drv_type(dev, &attr));
7437
7438         return flow_drv_action_query(dev, action, data, fops, error);
7439 }
7440
7441 /**
7442  * Destroy all shared actions.
7443  *
7444  * @param dev
7445  *   Pointer to Ethernet device.
7446  *
7447  * @return
7448  *   0 on success, a negative errno value otherwise and rte_errno is set.
7449  */
7450 int
7451 mlx5_shared_action_flush(struct rte_eth_dev *dev)
7452 {
7453         struct rte_flow_error error;
7454         struct mlx5_priv *priv = dev->data->dev_private;
7455         struct mlx5_shared_action_rss *shared_rss;
7456         int ret = 0;
7457         uint32_t idx;
7458
7459         ILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS],
7460                       priv->rss_shared_actions, idx, shared_rss, next) {
7461                 ret |= mlx5_shared_action_destroy(dev,
7462                        (struct rte_flow_shared_action *)(uintptr_t)idx, &error);
7463         }
7464         return ret;
7465 }
7466
7467 #ifndef HAVE_MLX5DV_DR
7468 #define MLX5_DOMAIN_SYNC_FLOW ((1 << 0) | (1 << 1))
7469 #else
7470 #define MLX5_DOMAIN_SYNC_FLOW \
7471         (MLX5DV_DR_DOMAIN_SYNC_FLAGS_SW | MLX5DV_DR_DOMAIN_SYNC_FLAGS_HW)
7472 #endif
7473
7474 int rte_pmd_mlx5_sync_flow(uint16_t port_id, uint32_t domains)
7475 {
7476         struct rte_eth_dev *dev = &rte_eth_devices[port_id];
7477         const struct mlx5_flow_driver_ops *fops;
7478         int ret;
7479         struct rte_flow_attr attr = { .transfer = 0 };
7480
7481         fops = flow_get_drv_ops(flow_get_drv_type(dev, &attr));
7482         ret = fops->sync_domain(dev, domains, MLX5_DOMAIN_SYNC_FLOW);
7483         if (ret > 0)
7484                 ret = -ret;
7485         return ret;
7486 }
7487
7488 /**
7489  * tunnel offload functionalilty is defined for DV environment only
7490  */
7491 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
7492 __extension__
7493 union tunnel_offload_mark {
7494         uint32_t val;
7495         struct {
7496                 uint32_t app_reserve:8;
7497                 uint32_t table_id:15;
7498                 uint32_t transfer:1;
7499                 uint32_t _unused_:8;
7500         };
7501 };
7502
7503 static bool
7504 mlx5_access_tunnel_offload_db
7505         (struct rte_eth_dev *dev,
7506          bool (*match)(struct rte_eth_dev *,
7507                        struct mlx5_flow_tunnel *, const void *),
7508          void (*hit)(struct rte_eth_dev *, struct mlx5_flow_tunnel *, void *),
7509          void (*miss)(struct rte_eth_dev *, void *),
7510          void *ctx, bool lock_op);
7511
7512 static int
7513 flow_tunnel_add_default_miss(struct rte_eth_dev *dev,
7514                              struct rte_flow *flow,
7515                              const struct rte_flow_attr *attr,
7516                              const struct rte_flow_action *app_actions,
7517                              uint32_t flow_idx,
7518                              struct tunnel_default_miss_ctx *ctx,
7519                              struct rte_flow_error *error)
7520 {
7521         struct mlx5_priv *priv = dev->data->dev_private;
7522         struct mlx5_flow *dev_flow;
7523         struct rte_flow_attr miss_attr = *attr;
7524         const struct mlx5_flow_tunnel *tunnel = app_actions[0].conf;
7525         const struct rte_flow_item miss_items[2] = {
7526                 {
7527                         .type = RTE_FLOW_ITEM_TYPE_ETH,
7528                         .spec = NULL,
7529                         .last = NULL,
7530                         .mask = NULL
7531                 },
7532                 {
7533                         .type = RTE_FLOW_ITEM_TYPE_END,
7534                         .spec = NULL,
7535                         .last = NULL,
7536                         .mask = NULL
7537                 }
7538         };
7539         union tunnel_offload_mark mark_id;
7540         struct rte_flow_action_mark miss_mark;
7541         struct rte_flow_action miss_actions[3] = {
7542                 [0] = { .type = RTE_FLOW_ACTION_TYPE_MARK, .conf = &miss_mark },
7543                 [2] = { .type = RTE_FLOW_ACTION_TYPE_END,  .conf = NULL }
7544         };
7545         const struct rte_flow_action_jump *jump_data;
7546         uint32_t i, flow_table = 0; /* prevent compilation warning */
7547         struct flow_grp_info grp_info = {
7548                 .external = 1,
7549                 .transfer = attr->transfer,
7550                 .fdb_def_rule = !!priv->fdb_def_rule,
7551                 .std_tbl_fix = 0,
7552         };
7553         int ret;
7554
7555         if (!attr->transfer) {
7556                 uint32_t q_size;
7557
7558                 miss_actions[1].type = RTE_FLOW_ACTION_TYPE_RSS;
7559                 q_size = priv->reta_idx_n * sizeof(ctx->queue[0]);
7560                 ctx->queue = mlx5_malloc(MLX5_MEM_SYS | MLX5_MEM_ZERO, q_size,
7561                                          0, SOCKET_ID_ANY);
7562                 if (!ctx->queue)
7563                         return rte_flow_error_set
7564                                 (error, ENOMEM,
7565                                 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
7566                                 NULL, "invalid default miss RSS");
7567                 ctx->action_rss.func = RTE_ETH_HASH_FUNCTION_DEFAULT,
7568                 ctx->action_rss.level = 0,
7569                 ctx->action_rss.types = priv->rss_conf.rss_hf,
7570                 ctx->action_rss.key_len = priv->rss_conf.rss_key_len,
7571                 ctx->action_rss.queue_num = priv->reta_idx_n,
7572                 ctx->action_rss.key = priv->rss_conf.rss_key,
7573                 ctx->action_rss.queue = ctx->queue;
7574                 if (!priv->reta_idx_n || !priv->rxqs_n)
7575                         return rte_flow_error_set
7576                                 (error, EINVAL,
7577                                 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
7578                                 NULL, "invalid port configuration");
7579                 if (!(dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG))
7580                         ctx->action_rss.types = 0;
7581                 for (i = 0; i != priv->reta_idx_n; ++i)
7582                         ctx->queue[i] = (*priv->reta_idx)[i];
7583         } else {
7584                 miss_actions[1].type = RTE_FLOW_ACTION_TYPE_JUMP;
7585                 ctx->miss_jump.group = MLX5_TNL_MISS_FDB_JUMP_GRP;
7586         }
7587         miss_actions[1].conf = (typeof(miss_actions[1].conf))ctx->raw;
7588         for (; app_actions->type != RTE_FLOW_ACTION_TYPE_JUMP; app_actions++);
7589         jump_data = app_actions->conf;
7590         miss_attr.priority = MLX5_TNL_MISS_RULE_PRIORITY;
7591         miss_attr.group = jump_data->group;
7592         ret = mlx5_flow_group_to_table(dev, tunnel, jump_data->group,
7593                                        &flow_table, &grp_info, error);
7594         if (ret)
7595                 return rte_flow_error_set(error, EINVAL,
7596                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
7597                                           NULL, "invalid tunnel id");
7598         mark_id.app_reserve = 0;
7599         mark_id.table_id = tunnel_flow_tbl_to_id(flow_table);
7600         mark_id.transfer = !!attr->transfer;
7601         mark_id._unused_ = 0;
7602         miss_mark.id = mark_id.val;
7603         dev_flow = flow_drv_prepare(dev, flow, &miss_attr,
7604                                     miss_items, miss_actions, flow_idx, error);
7605         if (!dev_flow)
7606                 return -rte_errno;
7607         dev_flow->flow = flow;
7608         dev_flow->external = true;
7609         dev_flow->tunnel = tunnel;
7610         /* Subflow object was created, we must include one in the list. */
7611         SILIST_INSERT(&flow->dev_handles, dev_flow->handle_idx,
7612                       dev_flow->handle, next);
7613         DRV_LOG(DEBUG,
7614                 "port %u tunnel type=%d id=%u miss rule priority=%u group=%u",
7615                 dev->data->port_id, tunnel->app_tunnel.type,
7616                 tunnel->tunnel_id, miss_attr.priority, miss_attr.group);
7617         ret = flow_drv_translate(dev, dev_flow, &miss_attr, miss_items,
7618                                   miss_actions, error);
7619         if (!ret)
7620                 ret = flow_mreg_update_copy_table(dev, flow, miss_actions,
7621                                                   error);
7622
7623         return ret;
7624 }
7625
7626 static const struct mlx5_flow_tbl_data_entry  *
7627 tunnel_mark_decode(struct rte_eth_dev *dev, uint32_t mark)
7628 {
7629         struct mlx5_priv *priv = dev->data->dev_private;
7630         struct mlx5_dev_ctx_shared *sh = priv->sh;
7631         struct mlx5_hlist_entry *he;
7632         union tunnel_offload_mark mbits = { .val = mark };
7633         union mlx5_flow_tbl_key table_key = {
7634                 {
7635                         .table_id = tunnel_id_to_flow_tbl(mbits.table_id),
7636                         .dummy = 0,
7637                         .domain = !!mbits.transfer,
7638                         .direction = 0,
7639                 }
7640         };
7641         he = mlx5_hlist_lookup(sh->flow_tbls, table_key.v64, NULL);
7642         return he ?
7643                container_of(he, struct mlx5_flow_tbl_data_entry, entry) : NULL;
7644 }
7645
7646 static void
7647 mlx5_flow_tunnel_grp2tbl_remove_cb(struct mlx5_hlist *list,
7648                                    struct mlx5_hlist_entry *entry)
7649 {
7650         struct mlx5_dev_ctx_shared *sh = list->ctx;
7651         struct tunnel_tbl_entry *tte = container_of(entry, typeof(*tte), hash);
7652
7653         mlx5_ipool_free(sh->ipool[MLX5_IPOOL_TNL_TBL_ID],
7654                         tunnel_flow_tbl_to_id(tte->flow_table));
7655         mlx5_free(tte);
7656 }
7657
7658 static int
7659 mlx5_flow_tunnel_grp2tbl_match_cb(struct mlx5_hlist *list __rte_unused,
7660                                   struct mlx5_hlist_entry *entry,
7661                                   uint64_t key, void *cb_ctx __rte_unused)
7662 {
7663         union tunnel_tbl_key tbl = {
7664                 .val = key,
7665         };
7666         struct tunnel_tbl_entry *tte = container_of(entry, typeof(*tte), hash);
7667
7668         return tbl.tunnel_id != tte->tunnel_id || tbl.group != tte->group;
7669 }
7670
7671 static struct mlx5_hlist_entry *
7672 mlx5_flow_tunnel_grp2tbl_create_cb(struct mlx5_hlist *list, uint64_t key,
7673                                    void *ctx __rte_unused)
7674 {
7675         struct mlx5_dev_ctx_shared *sh = list->ctx;
7676         struct tunnel_tbl_entry *tte;
7677         union tunnel_tbl_key tbl = {
7678                 .val = key,
7679         };
7680
7681         tte = mlx5_malloc(MLX5_MEM_SYS | MLX5_MEM_ZERO,
7682                           sizeof(*tte), 0,
7683                           SOCKET_ID_ANY);
7684         if (!tte)
7685                 goto err;
7686         mlx5_ipool_malloc(sh->ipool[MLX5_IPOOL_TNL_TBL_ID],
7687                           &tte->flow_table);
7688         if (tte->flow_table >= MLX5_MAX_TABLES) {
7689                 DRV_LOG(ERR, "Tunnel TBL ID %d exceed max limit.",
7690                         tte->flow_table);
7691                 mlx5_ipool_free(sh->ipool[MLX5_IPOOL_TNL_TBL_ID],
7692                                 tte->flow_table);
7693                 goto err;
7694         } else if (!tte->flow_table) {
7695                 goto err;
7696         }
7697         tte->flow_table = tunnel_id_to_flow_tbl(tte->flow_table);
7698         tte->tunnel_id = tbl.tunnel_id;
7699         tte->group = tbl.group;
7700         return &tte->hash;
7701 err:
7702         if (tte)
7703                 mlx5_free(tte);
7704         return NULL;
7705 }
7706
7707 static uint32_t
7708 tunnel_flow_group_to_flow_table(struct rte_eth_dev *dev,
7709                                 const struct mlx5_flow_tunnel *tunnel,
7710                                 uint32_t group, uint32_t *table,
7711                                 struct rte_flow_error *error)
7712 {
7713         struct mlx5_hlist_entry *he;
7714         struct tunnel_tbl_entry *tte;
7715         union tunnel_tbl_key key = {
7716                 .tunnel_id = tunnel ? tunnel->tunnel_id : 0,
7717                 .group = group
7718         };
7719         struct mlx5_flow_tunnel_hub *thub = mlx5_tunnel_hub(dev);
7720         struct mlx5_hlist *group_hash;
7721
7722         group_hash = tunnel ? tunnel->groups : thub->groups;
7723         he = mlx5_hlist_register(group_hash, key.val, NULL);
7724         if (!he)
7725                 return rte_flow_error_set(error, EINVAL,
7726                                           RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
7727                                           NULL,
7728                                           "tunnel group index not supported");
7729         tte = container_of(he, typeof(*tte), hash);
7730         *table = tte->flow_table;
7731         DRV_LOG(DEBUG, "port %u tunnel %u group=%#x table=%#x",
7732                 dev->data->port_id, key.tunnel_id, group, *table);
7733         return 0;
7734 }
7735
7736 static void
7737 mlx5_flow_tunnel_free(struct rte_eth_dev *dev,
7738                       struct mlx5_flow_tunnel *tunnel)
7739 {
7740         struct mlx5_priv *priv = dev->data->dev_private;
7741         struct mlx5_indexed_pool *ipool;
7742
7743         DRV_LOG(DEBUG, "port %u release pmd tunnel id=0x%x",
7744                 dev->data->port_id, tunnel->tunnel_id);
7745         LIST_REMOVE(tunnel, chain);
7746         mlx5_hlist_destroy(tunnel->groups);
7747         ipool = priv->sh->ipool[MLX5_IPOOL_TUNNEL_ID];
7748         mlx5_ipool_free(ipool, tunnel->tunnel_id);
7749 }
7750
7751 static bool
7752 mlx5_access_tunnel_offload_db
7753         (struct rte_eth_dev *dev,
7754          bool (*match)(struct rte_eth_dev *,
7755                        struct mlx5_flow_tunnel *, const void *),
7756          void (*hit)(struct rte_eth_dev *, struct mlx5_flow_tunnel *, void *),
7757          void (*miss)(struct rte_eth_dev *, void *),
7758          void *ctx, bool lock_op)
7759 {
7760         bool verdict = false;
7761         struct mlx5_flow_tunnel_hub *thub = mlx5_tunnel_hub(dev);
7762         struct mlx5_flow_tunnel *tunnel;
7763
7764         rte_spinlock_lock(&thub->sl);
7765         LIST_FOREACH(tunnel, &thub->tunnels, chain) {
7766                 verdict = match(dev, tunnel, (const void *)ctx);
7767                 if (verdict)
7768                         break;
7769         }
7770         if (!lock_op)
7771                 rte_spinlock_unlock(&thub->sl);
7772         if (verdict && hit)
7773                 hit(dev, tunnel, ctx);
7774         if (!verdict && miss)
7775                 miss(dev, ctx);
7776         if (lock_op)
7777                 rte_spinlock_unlock(&thub->sl);
7778
7779         return verdict;
7780 }
7781
7782 struct tunnel_db_find_tunnel_id_ctx {
7783         uint32_t tunnel_id;
7784         struct mlx5_flow_tunnel *tunnel;
7785 };
7786
7787 static bool
7788 find_tunnel_id_match(struct rte_eth_dev *dev,
7789                      struct mlx5_flow_tunnel *tunnel, const void *x)
7790 {
7791         const struct tunnel_db_find_tunnel_id_ctx *ctx = x;
7792
7793         RTE_SET_USED(dev);
7794         return tunnel->tunnel_id == ctx->tunnel_id;
7795 }
7796
7797 static void
7798 find_tunnel_id_hit(struct rte_eth_dev *dev,
7799                    struct mlx5_flow_tunnel *tunnel, void *x)
7800 {
7801         struct tunnel_db_find_tunnel_id_ctx *ctx = x;
7802         RTE_SET_USED(dev);
7803         ctx->tunnel = tunnel;
7804 }
7805
7806 static struct mlx5_flow_tunnel *
7807 mlx5_find_tunnel_id(struct rte_eth_dev *dev, uint32_t id)
7808 {
7809         struct tunnel_db_find_tunnel_id_ctx ctx = {
7810                 .tunnel_id = id,
7811         };
7812
7813         mlx5_access_tunnel_offload_db(dev, find_tunnel_id_match,
7814                                       find_tunnel_id_hit, NULL, &ctx, true);
7815
7816         return ctx.tunnel;
7817 }
7818
7819 static struct mlx5_flow_tunnel *
7820 mlx5_flow_tunnel_allocate(struct rte_eth_dev *dev,
7821                           const struct rte_flow_tunnel *app_tunnel)
7822 {
7823         struct mlx5_priv *priv = dev->data->dev_private;
7824         struct mlx5_indexed_pool *ipool;
7825         struct mlx5_flow_tunnel *tunnel;
7826         uint32_t id;
7827
7828         ipool = priv->sh->ipool[MLX5_IPOOL_TUNNEL_ID];
7829         tunnel = mlx5_ipool_zmalloc(ipool, &id);
7830         if (!tunnel)
7831                 return NULL;
7832         if (id >= MLX5_MAX_TUNNELS) {
7833                 mlx5_ipool_free(ipool, id);
7834                 DRV_LOG(ERR, "Tunnel ID %d exceed max limit.", id);
7835                 return NULL;
7836         }
7837         tunnel->groups = mlx5_hlist_create("tunnel groups", 1024, 0, 0,
7838                                            mlx5_flow_tunnel_grp2tbl_create_cb,
7839                                            mlx5_flow_tunnel_grp2tbl_match_cb,
7840                                            mlx5_flow_tunnel_grp2tbl_remove_cb);
7841         if (!tunnel->groups) {
7842                 mlx5_ipool_free(ipool, id);
7843                 return NULL;
7844         }
7845         tunnel->groups->ctx = priv->sh;
7846         /* initiate new PMD tunnel */
7847         memcpy(&tunnel->app_tunnel, app_tunnel, sizeof(*app_tunnel));
7848         tunnel->tunnel_id = id;
7849         tunnel->action.type = (typeof(tunnel->action.type))
7850                               MLX5_RTE_FLOW_ACTION_TYPE_TUNNEL_SET;
7851         tunnel->action.conf = tunnel;
7852         tunnel->item.type = (typeof(tunnel->item.type))
7853                             MLX5_RTE_FLOW_ITEM_TYPE_TUNNEL;
7854         tunnel->item.spec = tunnel;
7855         tunnel->item.last = NULL;
7856         tunnel->item.mask = NULL;
7857
7858         DRV_LOG(DEBUG, "port %u new pmd tunnel id=0x%x",
7859                 dev->data->port_id, tunnel->tunnel_id);
7860
7861         return tunnel;
7862 }
7863
7864 struct tunnel_db_get_tunnel_ctx {
7865         const struct rte_flow_tunnel *app_tunnel;
7866         struct mlx5_flow_tunnel *tunnel;
7867 };
7868
7869 static bool get_tunnel_match(struct rte_eth_dev *dev,
7870                              struct mlx5_flow_tunnel *tunnel, const void *x)
7871 {
7872         const struct tunnel_db_get_tunnel_ctx *ctx = x;
7873
7874         RTE_SET_USED(dev);
7875         return !memcmp(ctx->app_tunnel, &tunnel->app_tunnel,
7876                        sizeof(*ctx->app_tunnel));
7877 }
7878
7879 static void get_tunnel_hit(struct rte_eth_dev *dev,
7880                            struct mlx5_flow_tunnel *tunnel, void *x)
7881 {
7882         /* called under tunnel spinlock protection */
7883         struct tunnel_db_get_tunnel_ctx *ctx = x;
7884
7885         RTE_SET_USED(dev);
7886         tunnel->refctn++;
7887         ctx->tunnel = tunnel;
7888 }
7889
7890 static void get_tunnel_miss(struct rte_eth_dev *dev, void *x)
7891 {
7892         /* called under tunnel spinlock protection */
7893         struct mlx5_flow_tunnel_hub *thub = mlx5_tunnel_hub(dev);
7894         struct tunnel_db_get_tunnel_ctx *ctx = x;
7895
7896         rte_spinlock_unlock(&thub->sl);
7897         ctx->tunnel = mlx5_flow_tunnel_allocate(dev, ctx->app_tunnel);
7898         ctx->tunnel->refctn = 1;
7899         rte_spinlock_lock(&thub->sl);
7900         if (ctx->tunnel)
7901                 LIST_INSERT_HEAD(&thub->tunnels, ctx->tunnel, chain);
7902 }
7903
7904
7905 static int
7906 mlx5_get_flow_tunnel(struct rte_eth_dev *dev,
7907                      const struct rte_flow_tunnel *app_tunnel,
7908                      struct mlx5_flow_tunnel **tunnel)
7909 {
7910         struct tunnel_db_get_tunnel_ctx ctx = {
7911                 .app_tunnel = app_tunnel,
7912         };
7913
7914         mlx5_access_tunnel_offload_db(dev, get_tunnel_match, get_tunnel_hit,
7915                                       get_tunnel_miss, &ctx, true);
7916         *tunnel = ctx.tunnel;
7917         return ctx.tunnel ? 0 : -ENOMEM;
7918 }
7919
7920 void mlx5_release_tunnel_hub(struct mlx5_dev_ctx_shared *sh, uint16_t port_id)
7921 {
7922         struct mlx5_flow_tunnel_hub *thub = sh->tunnel_hub;
7923
7924         if (!thub)
7925                 return;
7926         if (!LIST_EMPTY(&thub->tunnels))
7927                 DRV_LOG(WARNING, "port %u tunnels present", port_id);
7928         mlx5_hlist_destroy(thub->groups);
7929         mlx5_free(thub);
7930 }
7931
7932 int mlx5_alloc_tunnel_hub(struct mlx5_dev_ctx_shared *sh)
7933 {
7934         int err;
7935         struct mlx5_flow_tunnel_hub *thub;
7936
7937         thub = mlx5_malloc(MLX5_MEM_SYS | MLX5_MEM_ZERO, sizeof(*thub),
7938                            0, SOCKET_ID_ANY);
7939         if (!thub)
7940                 return -ENOMEM;
7941         LIST_INIT(&thub->tunnels);
7942         rte_spinlock_init(&thub->sl);
7943         thub->groups = mlx5_hlist_create("flow groups",
7944                                          rte_align32pow2(MLX5_MAX_TABLES), 0,
7945                                          0, mlx5_flow_tunnel_grp2tbl_create_cb,
7946                                          mlx5_flow_tunnel_grp2tbl_match_cb,
7947                                          mlx5_flow_tunnel_grp2tbl_remove_cb);
7948         if (!thub->groups) {
7949                 err = -rte_errno;
7950                 goto err;
7951         }
7952         thub->groups->ctx = sh;
7953         sh->tunnel_hub = thub;
7954
7955         return 0;
7956
7957 err:
7958         if (thub->groups)
7959                 mlx5_hlist_destroy(thub->groups);
7960         if (thub)
7961                 mlx5_free(thub);
7962         return err;
7963 }
7964
7965 static inline bool
7966 mlx5_flow_tunnel_validate(struct rte_eth_dev *dev,
7967                           struct rte_flow_tunnel *tunnel,
7968                           const char *err_msg)
7969 {
7970         err_msg = NULL;
7971         if (!is_tunnel_offload_active(dev)) {
7972                 err_msg = "tunnel offload was not activated";
7973                 goto out;
7974         } else if (!tunnel) {
7975                 err_msg = "no application tunnel";
7976                 goto out;
7977         }
7978
7979         switch (tunnel->type) {
7980         default:
7981                 err_msg = "unsupported tunnel type";
7982                 goto out;
7983         case RTE_FLOW_ITEM_TYPE_VXLAN:
7984                 break;
7985         }
7986
7987 out:
7988         return !err_msg;
7989 }
7990
7991 static int
7992 mlx5_flow_tunnel_decap_set(struct rte_eth_dev *dev,
7993                     struct rte_flow_tunnel *app_tunnel,
7994                     struct rte_flow_action **actions,
7995                     uint32_t *num_of_actions,
7996                     struct rte_flow_error *error)
7997 {
7998         int ret;
7999         struct mlx5_flow_tunnel *tunnel;
8000         const char *err_msg = NULL;
8001         bool verdict = mlx5_flow_tunnel_validate(dev, app_tunnel, err_msg);
8002
8003         if (!verdict)
8004                 return rte_flow_error_set(error, EINVAL,
8005                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
8006                                           err_msg);
8007         ret = mlx5_get_flow_tunnel(dev, app_tunnel, &tunnel);
8008         if (ret < 0) {
8009                 return rte_flow_error_set(error, ret,
8010                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
8011                                           "failed to initialize pmd tunnel");
8012         }
8013         *actions = &tunnel->action;
8014         *num_of_actions = 1;
8015         return 0;
8016 }
8017
8018 static int
8019 mlx5_flow_tunnel_match(struct rte_eth_dev *dev,
8020                        struct rte_flow_tunnel *app_tunnel,
8021                        struct rte_flow_item **items,
8022                        uint32_t *num_of_items,
8023                        struct rte_flow_error *error)
8024 {
8025         int ret;
8026         struct mlx5_flow_tunnel *tunnel;
8027         const char *err_msg = NULL;
8028         bool verdict = mlx5_flow_tunnel_validate(dev, app_tunnel, err_msg);
8029
8030         if (!verdict)
8031                 return rte_flow_error_set(error, EINVAL,
8032                                           RTE_FLOW_ERROR_TYPE_HANDLE, NULL,
8033                                           err_msg);
8034         ret = mlx5_get_flow_tunnel(dev, app_tunnel, &tunnel);
8035         if (ret < 0) {
8036                 return rte_flow_error_set(error, ret,
8037                                           RTE_FLOW_ERROR_TYPE_HANDLE, NULL,
8038                                           "failed to initialize pmd tunnel");
8039         }
8040         *items = &tunnel->item;
8041         *num_of_items = 1;
8042         return 0;
8043 }
8044
8045 struct tunnel_db_element_release_ctx {
8046         struct rte_flow_item *items;
8047         struct rte_flow_action *actions;
8048         uint32_t num_elements;
8049         struct rte_flow_error *error;
8050         int ret;
8051 };
8052
8053 static bool
8054 tunnel_element_release_match(struct rte_eth_dev *dev,
8055                              struct mlx5_flow_tunnel *tunnel, const void *x)
8056 {
8057         const struct tunnel_db_element_release_ctx *ctx = x;
8058
8059         RTE_SET_USED(dev);
8060         if (ctx->num_elements != 1)
8061                 return false;
8062         else if (ctx->items)
8063                 return ctx->items == &tunnel->item;
8064         else if (ctx->actions)
8065                 return ctx->actions == &tunnel->action;
8066
8067         return false;
8068 }
8069
8070 static void
8071 tunnel_element_release_hit(struct rte_eth_dev *dev,
8072                            struct mlx5_flow_tunnel *tunnel, void *x)
8073 {
8074         struct tunnel_db_element_release_ctx *ctx = x;
8075         ctx->ret = 0;
8076         if (!__atomic_sub_fetch(&tunnel->refctn, 1, __ATOMIC_RELAXED))
8077                 mlx5_flow_tunnel_free(dev, tunnel);
8078 }
8079
8080 static void
8081 tunnel_element_release_miss(struct rte_eth_dev *dev, void *x)
8082 {
8083         struct tunnel_db_element_release_ctx *ctx = x;
8084         RTE_SET_USED(dev);
8085         ctx->ret = rte_flow_error_set(ctx->error, EINVAL,
8086                                       RTE_FLOW_ERROR_TYPE_HANDLE, NULL,
8087                                       "invalid argument");
8088 }
8089
8090 static int
8091 mlx5_flow_tunnel_item_release(struct rte_eth_dev *dev,
8092                        struct rte_flow_item *pmd_items,
8093                        uint32_t num_items, struct rte_flow_error *err)
8094 {
8095         struct tunnel_db_element_release_ctx ctx = {
8096                 .items = pmd_items,
8097                 .actions = NULL,
8098                 .num_elements = num_items,
8099                 .error = err,
8100         };
8101
8102         mlx5_access_tunnel_offload_db(dev, tunnel_element_release_match,
8103                                       tunnel_element_release_hit,
8104                                       tunnel_element_release_miss, &ctx, false);
8105
8106         return ctx.ret;
8107 }
8108
8109 static int
8110 mlx5_flow_tunnel_action_release(struct rte_eth_dev *dev,
8111                          struct rte_flow_action *pmd_actions,
8112                          uint32_t num_actions, struct rte_flow_error *err)
8113 {
8114         struct tunnel_db_element_release_ctx ctx = {
8115                 .items = NULL,
8116                 .actions = pmd_actions,
8117                 .num_elements = num_actions,
8118                 .error = err,
8119         };
8120
8121         mlx5_access_tunnel_offload_db(dev, tunnel_element_release_match,
8122                                       tunnel_element_release_hit,
8123                                       tunnel_element_release_miss, &ctx, false);
8124
8125         return ctx.ret;
8126 }
8127
8128 static int
8129 mlx5_flow_tunnel_get_restore_info(struct rte_eth_dev *dev,
8130                                   struct rte_mbuf *m,
8131                                   struct rte_flow_restore_info *info,
8132                                   struct rte_flow_error *err)
8133 {
8134         uint64_t ol_flags = m->ol_flags;
8135         const struct mlx5_flow_tbl_data_entry *tble;
8136         const uint64_t mask = PKT_RX_FDIR | PKT_RX_FDIR_ID;
8137
8138         if (!is_tunnel_offload_active(dev)) {
8139                 info->flags = 0;
8140                 return 0;
8141         }
8142
8143         if ((ol_flags & mask) != mask)
8144                 goto err;
8145         tble = tunnel_mark_decode(dev, m->hash.fdir.hi);
8146         if (!tble) {
8147                 DRV_LOG(DEBUG, "port %u invalid miss tunnel mark %#x",
8148                         dev->data->port_id, m->hash.fdir.hi);
8149                 goto err;
8150         }
8151         MLX5_ASSERT(tble->tunnel);
8152         memcpy(&info->tunnel, &tble->tunnel->app_tunnel, sizeof(info->tunnel));
8153         info->group_id = tble->group_id;
8154         info->flags = RTE_FLOW_RESTORE_INFO_TUNNEL |
8155                       RTE_FLOW_RESTORE_INFO_GROUP_ID |
8156                       RTE_FLOW_RESTORE_INFO_ENCAPSULATED;
8157
8158         return 0;
8159
8160 err:
8161         return rte_flow_error_set(err, EINVAL,
8162                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
8163                                   "failed to get restore info");
8164 }
8165
8166 #else /* HAVE_IBV_FLOW_DV_SUPPORT */
8167 static int
8168 mlx5_flow_tunnel_decap_set(__rte_unused struct rte_eth_dev *dev,
8169                            __rte_unused struct rte_flow_tunnel *app_tunnel,
8170                            __rte_unused struct rte_flow_action **actions,
8171                            __rte_unused uint32_t *num_of_actions,
8172                            __rte_unused struct rte_flow_error *error)
8173 {
8174         return -ENOTSUP;
8175 }
8176
8177 static int
8178 mlx5_flow_tunnel_match(__rte_unused struct rte_eth_dev *dev,
8179                        __rte_unused struct rte_flow_tunnel *app_tunnel,
8180                        __rte_unused struct rte_flow_item **items,
8181                        __rte_unused uint32_t *num_of_items,
8182                        __rte_unused struct rte_flow_error *error)
8183 {
8184         return -ENOTSUP;
8185 }
8186
8187 static int
8188 mlx5_flow_tunnel_item_release(__rte_unused struct rte_eth_dev *dev,
8189                               __rte_unused struct rte_flow_item *pmd_items,
8190                               __rte_unused uint32_t num_items,
8191                               __rte_unused struct rte_flow_error *err)
8192 {
8193         return -ENOTSUP;
8194 }
8195
8196 static int
8197 mlx5_flow_tunnel_action_release(__rte_unused struct rte_eth_dev *dev,
8198                                 __rte_unused struct rte_flow_action *pmd_action,
8199                                 __rte_unused uint32_t num_actions,
8200                                 __rte_unused struct rte_flow_error *err)
8201 {
8202         return -ENOTSUP;
8203 }
8204
8205 static int
8206 mlx5_flow_tunnel_get_restore_info(__rte_unused struct rte_eth_dev *dev,
8207                                   __rte_unused struct rte_mbuf *m,
8208                                   __rte_unused struct rte_flow_restore_info *i,
8209                                   __rte_unused struct rte_flow_error *err)
8210 {
8211         return -ENOTSUP;
8212 }
8213
8214 static int
8215 flow_tunnel_add_default_miss(__rte_unused struct rte_eth_dev *dev,
8216                              __rte_unused struct rte_flow *flow,
8217                              __rte_unused const struct rte_flow_attr *attr,
8218                              __rte_unused const struct rte_flow_action *actions,
8219                              __rte_unused uint32_t flow_idx,
8220                              __rte_unused struct tunnel_default_miss_ctx *ctx,
8221                              __rte_unused struct rte_flow_error *error)
8222 {
8223         return -ENOTSUP;
8224 }
8225
8226 static struct mlx5_flow_tunnel *
8227 mlx5_find_tunnel_id(__rte_unused struct rte_eth_dev *dev,
8228                     __rte_unused uint32_t id)
8229 {
8230         return NULL;
8231 }
8232
8233 static void
8234 mlx5_flow_tunnel_free(__rte_unused struct rte_eth_dev *dev,
8235                       __rte_unused struct mlx5_flow_tunnel *tunnel)
8236 {
8237 }
8238
8239 static uint32_t
8240 tunnel_flow_group_to_flow_table(__rte_unused struct rte_eth_dev *dev,
8241                                 __rte_unused const struct mlx5_flow_tunnel *t,
8242                                 __rte_unused uint32_t group,
8243                                 __rte_unused uint32_t *table,
8244                                 struct rte_flow_error *error)
8245 {
8246         return rte_flow_error_set(error, ENOTSUP,
8247                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
8248                                   "tunnel offload requires DV support");
8249 }
8250
8251 void
8252 mlx5_release_tunnel_hub(__rte_unused struct mlx5_dev_ctx_shared *sh,
8253                         __rte_unused  uint16_t port_id)
8254 {
8255 }
8256 #endif /* HAVE_IBV_FLOW_DV_SUPPORT */
8257