net/mlx5: fix RSS flow item expansion for GRE key
[dpdk.git] / drivers / net / mlx5 / mlx5_flow.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2016 6WIND S.A.
3  * Copyright 2016 Mellanox Technologies, Ltd
4  */
5
6 #include <stdalign.h>
7 #include <stdint.h>
8 #include <string.h>
9 #include <stdbool.h>
10 #include <sys/queue.h>
11
12 #include <rte_common.h>
13 #include <rte_ether.h>
14 #include <ethdev_driver.h>
15 #include <rte_eal_paging.h>
16 #include <rte_flow.h>
17 #include <rte_cycles.h>
18 #include <rte_flow_driver.h>
19 #include <rte_malloc.h>
20 #include <rte_ip.h>
21
22 #include <mlx5_glue.h>
23 #include <mlx5_devx_cmds.h>
24 #include <mlx5_prm.h>
25 #include <mlx5_malloc.h>
26
27 #include "mlx5_defs.h"
28 #include "mlx5.h"
29 #include "mlx5_flow.h"
30 #include "mlx5_flow_os.h"
31 #include "mlx5_rx.h"
32 #include "mlx5_tx.h"
33 #include "mlx5_common_os.h"
34 #include "rte_pmd_mlx5.h"
35
36 struct tunnel_default_miss_ctx {
37         uint16_t *queue;
38         __extension__
39         union {
40                 struct rte_flow_action_rss action_rss;
41                 struct rte_flow_action_queue miss_queue;
42                 struct rte_flow_action_jump miss_jump;
43                 uint8_t raw[0];
44         };
45 };
46
47 static int
48 flow_tunnel_add_default_miss(struct rte_eth_dev *dev,
49                              struct rte_flow *flow,
50                              const struct rte_flow_attr *attr,
51                              const struct rte_flow_action *app_actions,
52                              uint32_t flow_idx,
53                              struct tunnel_default_miss_ctx *ctx,
54                              struct rte_flow_error *error);
55 static struct mlx5_flow_tunnel *
56 mlx5_find_tunnel_id(struct rte_eth_dev *dev, uint32_t id);
57 static void
58 mlx5_flow_tunnel_free(struct rte_eth_dev *dev, struct mlx5_flow_tunnel *tunnel);
59 static uint32_t
60 tunnel_flow_group_to_flow_table(struct rte_eth_dev *dev,
61                                 const struct mlx5_flow_tunnel *tunnel,
62                                 uint32_t group, uint32_t *table,
63                                 struct rte_flow_error *error);
64
65 static struct mlx5_flow_workspace *mlx5_flow_push_thread_workspace(void);
66 static void mlx5_flow_pop_thread_workspace(void);
67
68
69 /** Device flow drivers. */
70 extern const struct mlx5_flow_driver_ops mlx5_flow_verbs_drv_ops;
71
72 const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops;
73
74 const struct mlx5_flow_driver_ops *flow_drv_ops[] = {
75         [MLX5_FLOW_TYPE_MIN] = &mlx5_flow_null_drv_ops,
76 #if defined(HAVE_IBV_FLOW_DV_SUPPORT) || !defined(HAVE_INFINIBAND_VERBS_H)
77         [MLX5_FLOW_TYPE_DV] = &mlx5_flow_dv_drv_ops,
78 #endif
79         [MLX5_FLOW_TYPE_VERBS] = &mlx5_flow_verbs_drv_ops,
80         [MLX5_FLOW_TYPE_MAX] = &mlx5_flow_null_drv_ops
81 };
82
83 /** Helper macro to build input graph for mlx5_flow_expand_rss(). */
84 #define MLX5_FLOW_EXPAND_RSS_NEXT(...) \
85         (const int []){ \
86                 __VA_ARGS__, 0, \
87         }
88
89 /** Node object of input graph for mlx5_flow_expand_rss(). */
90 struct mlx5_flow_expand_node {
91         const int *const next;
92         /**<
93          * List of next node indexes. Index 0 is interpreted as a terminator.
94          */
95         const enum rte_flow_item_type type;
96         /**< Pattern item type of current node. */
97         uint64_t rss_types;
98         /**<
99          * RSS types bit-field associated with this node
100          * (see ETH_RSS_* definitions).
101          */
102         uint8_t optional;
103         /**< optional expand field. Default 0 to expand, 1 not go deeper. */
104 };
105
106 /** Object returned by mlx5_flow_expand_rss(). */
107 struct mlx5_flow_expand_rss {
108         uint32_t entries;
109         /**< Number of entries @p patterns and @p priorities. */
110         struct {
111                 struct rte_flow_item *pattern; /**< Expanded pattern array. */
112                 uint32_t priority; /**< Priority offset for each expansion. */
113         } entry[];
114 };
115
116 static enum rte_flow_item_type
117 mlx5_flow_expand_rss_item_complete(const struct rte_flow_item *item)
118 {
119         enum rte_flow_item_type ret = RTE_FLOW_ITEM_TYPE_VOID;
120         uint16_t ether_type = 0;
121         uint16_t ether_type_m;
122         uint8_t ip_next_proto = 0;
123         uint8_t ip_next_proto_m;
124
125         if (item == NULL || item->spec == NULL)
126                 return ret;
127         switch (item->type) {
128         case RTE_FLOW_ITEM_TYPE_ETH:
129                 if (item->mask)
130                         ether_type_m = ((const struct rte_flow_item_eth *)
131                                                 (item->mask))->type;
132                 else
133                         ether_type_m = rte_flow_item_eth_mask.type;
134                 if (ether_type_m != RTE_BE16(0xFFFF))
135                         break;
136                 ether_type = ((const struct rte_flow_item_eth *)
137                                 (item->spec))->type;
138                 if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_IPV4)
139                         ret = RTE_FLOW_ITEM_TYPE_IPV4;
140                 else if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_IPV6)
141                         ret = RTE_FLOW_ITEM_TYPE_IPV6;
142                 else if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_VLAN)
143                         ret = RTE_FLOW_ITEM_TYPE_VLAN;
144                 else
145                         ret = RTE_FLOW_ITEM_TYPE_END;
146                 break;
147         case RTE_FLOW_ITEM_TYPE_VLAN:
148                 if (item->mask)
149                         ether_type_m = ((const struct rte_flow_item_vlan *)
150                                                 (item->mask))->inner_type;
151                 else
152                         ether_type_m = rte_flow_item_vlan_mask.inner_type;
153                 if (ether_type_m != RTE_BE16(0xFFFF))
154                         break;
155                 ether_type = ((const struct rte_flow_item_vlan *)
156                                 (item->spec))->inner_type;
157                 if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_IPV4)
158                         ret = RTE_FLOW_ITEM_TYPE_IPV4;
159                 else if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_IPV6)
160                         ret = RTE_FLOW_ITEM_TYPE_IPV6;
161                 else if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_VLAN)
162                         ret = RTE_FLOW_ITEM_TYPE_VLAN;
163                 else
164                         ret = RTE_FLOW_ITEM_TYPE_END;
165                 break;
166         case RTE_FLOW_ITEM_TYPE_IPV4:
167                 if (item->mask)
168                         ip_next_proto_m = ((const struct rte_flow_item_ipv4 *)
169                                         (item->mask))->hdr.next_proto_id;
170                 else
171                         ip_next_proto_m =
172                                 rte_flow_item_ipv4_mask.hdr.next_proto_id;
173                 if (ip_next_proto_m != 0xFF)
174                         break;
175                 ip_next_proto = ((const struct rte_flow_item_ipv4 *)
176                                 (item->spec))->hdr.next_proto_id;
177                 if (ip_next_proto == IPPROTO_UDP)
178                         ret = RTE_FLOW_ITEM_TYPE_UDP;
179                 else if (ip_next_proto == IPPROTO_TCP)
180                         ret = RTE_FLOW_ITEM_TYPE_TCP;
181                 else if (ip_next_proto == IPPROTO_IP)
182                         ret = RTE_FLOW_ITEM_TYPE_IPV4;
183                 else if (ip_next_proto == IPPROTO_IPV6)
184                         ret = RTE_FLOW_ITEM_TYPE_IPV6;
185                 else
186                         ret = RTE_FLOW_ITEM_TYPE_END;
187                 break;
188         case RTE_FLOW_ITEM_TYPE_IPV6:
189                 if (item->mask)
190                         ip_next_proto_m = ((const struct rte_flow_item_ipv6 *)
191                                                 (item->mask))->hdr.proto;
192                 else
193                         ip_next_proto_m =
194                                 rte_flow_item_ipv6_mask.hdr.proto;
195                 if (ip_next_proto_m != 0xFF)
196                         break;
197                 ip_next_proto = ((const struct rte_flow_item_ipv6 *)
198                                 (item->spec))->hdr.proto;
199                 if (ip_next_proto == IPPROTO_UDP)
200                         ret = RTE_FLOW_ITEM_TYPE_UDP;
201                 else if (ip_next_proto == IPPROTO_TCP)
202                         ret = RTE_FLOW_ITEM_TYPE_TCP;
203                 else if (ip_next_proto == IPPROTO_IP)
204                         ret = RTE_FLOW_ITEM_TYPE_IPV4;
205                 else if (ip_next_proto == IPPROTO_IPV6)
206                         ret = RTE_FLOW_ITEM_TYPE_IPV6;
207                 else
208                         ret = RTE_FLOW_ITEM_TYPE_END;
209                 break;
210         default:
211                 ret = RTE_FLOW_ITEM_TYPE_VOID;
212                 break;
213         }
214         return ret;
215 }
216
217 #define MLX5_RSS_EXP_ELT_N 16
218
219 /**
220  * Expand RSS flows into several possible flows according to the RSS hash
221  * fields requested and the driver capabilities.
222  *
223  * @param[out] buf
224  *   Buffer to store the result expansion.
225  * @param[in] size
226  *   Buffer size in bytes. If 0, @p buf can be NULL.
227  * @param[in] pattern
228  *   User flow pattern.
229  * @param[in] types
230  *   RSS types to expand (see ETH_RSS_* definitions).
231  * @param[in] graph
232  *   Input graph to expand @p pattern according to @p types.
233  * @param[in] graph_root_index
234  *   Index of root node in @p graph, typically 0.
235  *
236  * @return
237  *   A positive value representing the size of @p buf in bytes regardless of
238  *   @p size on success, a negative errno value otherwise and rte_errno is
239  *   set, the following errors are defined:
240  *
241  *   -E2BIG: graph-depth @p graph is too deep.
242  */
243 static int
244 mlx5_flow_expand_rss(struct mlx5_flow_expand_rss *buf, size_t size,
245                      const struct rte_flow_item *pattern, uint64_t types,
246                      const struct mlx5_flow_expand_node graph[],
247                      int graph_root_index)
248 {
249         const struct rte_flow_item *item;
250         const struct mlx5_flow_expand_node *node = &graph[graph_root_index];
251         const int *next_node;
252         const int *stack[MLX5_RSS_EXP_ELT_N];
253         int stack_pos = 0;
254         struct rte_flow_item flow_items[MLX5_RSS_EXP_ELT_N];
255         unsigned int i;
256         size_t lsize;
257         size_t user_pattern_size = 0;
258         void *addr = NULL;
259         const struct mlx5_flow_expand_node *next = NULL;
260         struct rte_flow_item missed_item;
261         int missed = 0;
262         int elt = 0;
263         const struct rte_flow_item *last_item = NULL;
264
265         memset(&missed_item, 0, sizeof(missed_item));
266         lsize = offsetof(struct mlx5_flow_expand_rss, entry) +
267                 MLX5_RSS_EXP_ELT_N * sizeof(buf->entry[0]);
268         if (lsize <= size) {
269                 buf->entry[0].priority = 0;
270                 buf->entry[0].pattern = (void *)&buf->entry[MLX5_RSS_EXP_ELT_N];
271                 buf->entries = 0;
272                 addr = buf->entry[0].pattern;
273         }
274         for (item = pattern; item->type != RTE_FLOW_ITEM_TYPE_END; item++) {
275                 if (item->type != RTE_FLOW_ITEM_TYPE_VOID)
276                         last_item = item;
277                 for (i = 0; node->next && node->next[i]; ++i) {
278                         next = &graph[node->next[i]];
279                         if (next->type == item->type)
280                                 break;
281                 }
282                 if (next)
283                         node = next;
284                 user_pattern_size += sizeof(*item);
285         }
286         user_pattern_size += sizeof(*item); /* Handle END item. */
287         lsize += user_pattern_size;
288         /* Copy the user pattern in the first entry of the buffer. */
289         if (lsize <= size) {
290                 rte_memcpy(addr, pattern, user_pattern_size);
291                 addr = (void *)(((uintptr_t)addr) + user_pattern_size);
292                 buf->entries = 1;
293         }
294         /* Start expanding. */
295         memset(flow_items, 0, sizeof(flow_items));
296         user_pattern_size -= sizeof(*item);
297         /*
298          * Check if the last valid item has spec set, need complete pattern,
299          * and the pattern can be used for expansion.
300          */
301         missed_item.type = mlx5_flow_expand_rss_item_complete(last_item);
302         if (missed_item.type == RTE_FLOW_ITEM_TYPE_END) {
303                 /* Item type END indicates expansion is not required. */
304                 return lsize;
305         }
306         if (missed_item.type != RTE_FLOW_ITEM_TYPE_VOID) {
307                 next = NULL;
308                 missed = 1;
309                 for (i = 0; node->next && node->next[i]; ++i) {
310                         next = &graph[node->next[i]];
311                         if (next->type == missed_item.type) {
312                                 flow_items[0].type = missed_item.type;
313                                 flow_items[1].type = RTE_FLOW_ITEM_TYPE_END;
314                                 break;
315                         }
316                         next = NULL;
317                 }
318         }
319         if (next && missed) {
320                 elt = 2; /* missed item + item end. */
321                 node = next;
322                 lsize += elt * sizeof(*item) + user_pattern_size;
323                 if ((node->rss_types & types) && lsize <= size) {
324                         buf->entry[buf->entries].priority = 1;
325                         buf->entry[buf->entries].pattern = addr;
326                         buf->entries++;
327                         rte_memcpy(addr, buf->entry[0].pattern,
328                                    user_pattern_size);
329                         addr = (void *)(((uintptr_t)addr) + user_pattern_size);
330                         rte_memcpy(addr, flow_items, elt * sizeof(*item));
331                         addr = (void *)(((uintptr_t)addr) +
332                                         elt * sizeof(*item));
333                 }
334         }
335         memset(flow_items, 0, sizeof(flow_items));
336         next_node = node->next;
337         stack[stack_pos] = next_node;
338         node = next_node ? &graph[*next_node] : NULL;
339         while (node) {
340                 flow_items[stack_pos].type = node->type;
341                 if (node->rss_types & types) {
342                         /*
343                          * compute the number of items to copy from the
344                          * expansion and copy it.
345                          * When the stack_pos is 0, there are 1 element in it,
346                          * plus the addition END item.
347                          */
348                         elt = stack_pos + 2;
349                         flow_items[stack_pos + 1].type = RTE_FLOW_ITEM_TYPE_END;
350                         lsize += elt * sizeof(*item) + user_pattern_size;
351                         if (lsize <= size) {
352                                 size_t n = elt * sizeof(*item);
353
354                                 buf->entry[buf->entries].priority =
355                                         stack_pos + 1 + missed;
356                                 buf->entry[buf->entries].pattern = addr;
357                                 buf->entries++;
358                                 rte_memcpy(addr, buf->entry[0].pattern,
359                                            user_pattern_size);
360                                 addr = (void *)(((uintptr_t)addr) +
361                                                 user_pattern_size);
362                                 rte_memcpy(addr, &missed_item,
363                                            missed * sizeof(*item));
364                                 addr = (void *)(((uintptr_t)addr) +
365                                         missed * sizeof(*item));
366                                 rte_memcpy(addr, flow_items, n);
367                                 addr = (void *)(((uintptr_t)addr) + n);
368                         }
369                 }
370                 /* Go deeper. */
371                 if (!node->optional && node->next) {
372                         next_node = node->next;
373                         if (stack_pos++ == MLX5_RSS_EXP_ELT_N) {
374                                 rte_errno = E2BIG;
375                                 return -rte_errno;
376                         }
377                         stack[stack_pos] = next_node;
378                 } else if (*(next_node + 1)) {
379                         /* Follow up with the next possibility. */
380                         ++next_node;
381                 } else {
382                         /* Move to the next path. */
383                         if (stack_pos)
384                                 next_node = stack[--stack_pos];
385                         next_node++;
386                         stack[stack_pos] = next_node;
387                 }
388                 node = *next_node ? &graph[*next_node] : NULL;
389         };
390         return lsize;
391 }
392
393 enum mlx5_expansion {
394         MLX5_EXPANSION_ROOT,
395         MLX5_EXPANSION_ROOT_OUTER,
396         MLX5_EXPANSION_ROOT_ETH_VLAN,
397         MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN,
398         MLX5_EXPANSION_OUTER_ETH,
399         MLX5_EXPANSION_OUTER_ETH_VLAN,
400         MLX5_EXPANSION_OUTER_VLAN,
401         MLX5_EXPANSION_OUTER_IPV4,
402         MLX5_EXPANSION_OUTER_IPV4_UDP,
403         MLX5_EXPANSION_OUTER_IPV4_TCP,
404         MLX5_EXPANSION_OUTER_IPV6,
405         MLX5_EXPANSION_OUTER_IPV6_UDP,
406         MLX5_EXPANSION_OUTER_IPV6_TCP,
407         MLX5_EXPANSION_VXLAN,
408         MLX5_EXPANSION_VXLAN_GPE,
409         MLX5_EXPANSION_GRE,
410         MLX5_EXPANSION_GRE_KEY,
411         MLX5_EXPANSION_MPLS,
412         MLX5_EXPANSION_ETH,
413         MLX5_EXPANSION_ETH_VLAN,
414         MLX5_EXPANSION_VLAN,
415         MLX5_EXPANSION_IPV4,
416         MLX5_EXPANSION_IPV4_UDP,
417         MLX5_EXPANSION_IPV4_TCP,
418         MLX5_EXPANSION_IPV6,
419         MLX5_EXPANSION_IPV6_UDP,
420         MLX5_EXPANSION_IPV6_TCP,
421 };
422
423 /** Supported expansion of items. */
424 static const struct mlx5_flow_expand_node mlx5_support_expansion[] = {
425         [MLX5_EXPANSION_ROOT] = {
426                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
427                                                   MLX5_EXPANSION_IPV4,
428                                                   MLX5_EXPANSION_IPV6),
429                 .type = RTE_FLOW_ITEM_TYPE_END,
430         },
431         [MLX5_EXPANSION_ROOT_OUTER] = {
432                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_ETH,
433                                                   MLX5_EXPANSION_OUTER_IPV4,
434                                                   MLX5_EXPANSION_OUTER_IPV6),
435                 .type = RTE_FLOW_ITEM_TYPE_END,
436         },
437         [MLX5_EXPANSION_ROOT_ETH_VLAN] = {
438                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH_VLAN),
439                 .type = RTE_FLOW_ITEM_TYPE_END,
440         },
441         [MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN] = {
442                 .next = MLX5_FLOW_EXPAND_RSS_NEXT
443                                                 (MLX5_EXPANSION_OUTER_ETH_VLAN),
444                 .type = RTE_FLOW_ITEM_TYPE_END,
445         },
446         [MLX5_EXPANSION_OUTER_ETH] = {
447                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_IPV4,
448                                                   MLX5_EXPANSION_OUTER_IPV6,
449                                                   MLX5_EXPANSION_MPLS),
450                 .type = RTE_FLOW_ITEM_TYPE_ETH,
451                 .rss_types = 0,
452         },
453         [MLX5_EXPANSION_OUTER_ETH_VLAN] = {
454                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_VLAN),
455                 .type = RTE_FLOW_ITEM_TYPE_ETH,
456                 .rss_types = 0,
457         },
458         [MLX5_EXPANSION_OUTER_VLAN] = {
459                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_IPV4,
460                                                   MLX5_EXPANSION_OUTER_IPV6),
461                 .type = RTE_FLOW_ITEM_TYPE_VLAN,
462         },
463         [MLX5_EXPANSION_OUTER_IPV4] = {
464                 .next = MLX5_FLOW_EXPAND_RSS_NEXT
465                         (MLX5_EXPANSION_OUTER_IPV4_UDP,
466                          MLX5_EXPANSION_OUTER_IPV4_TCP,
467                          MLX5_EXPANSION_GRE,
468                          MLX5_EXPANSION_IPV4,
469                          MLX5_EXPANSION_IPV6),
470                 .type = RTE_FLOW_ITEM_TYPE_IPV4,
471                 .rss_types = ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
472                         ETH_RSS_NONFRAG_IPV4_OTHER,
473         },
474         [MLX5_EXPANSION_OUTER_IPV4_UDP] = {
475                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN,
476                                                   MLX5_EXPANSION_VXLAN_GPE),
477                 .type = RTE_FLOW_ITEM_TYPE_UDP,
478                 .rss_types = ETH_RSS_NONFRAG_IPV4_UDP,
479         },
480         [MLX5_EXPANSION_OUTER_IPV4_TCP] = {
481                 .type = RTE_FLOW_ITEM_TYPE_TCP,
482                 .rss_types = ETH_RSS_NONFRAG_IPV4_TCP,
483         },
484         [MLX5_EXPANSION_OUTER_IPV6] = {
485                 .next = MLX5_FLOW_EXPAND_RSS_NEXT
486                         (MLX5_EXPANSION_OUTER_IPV6_UDP,
487                          MLX5_EXPANSION_OUTER_IPV6_TCP,
488                          MLX5_EXPANSION_IPV4,
489                          MLX5_EXPANSION_IPV6,
490                          MLX5_EXPANSION_GRE),
491                 .type = RTE_FLOW_ITEM_TYPE_IPV6,
492                 .rss_types = ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 |
493                         ETH_RSS_NONFRAG_IPV6_OTHER,
494         },
495         [MLX5_EXPANSION_OUTER_IPV6_UDP] = {
496                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN,
497                                                   MLX5_EXPANSION_VXLAN_GPE),
498                 .type = RTE_FLOW_ITEM_TYPE_UDP,
499                 .rss_types = ETH_RSS_NONFRAG_IPV6_UDP,
500         },
501         [MLX5_EXPANSION_OUTER_IPV6_TCP] = {
502                 .type = RTE_FLOW_ITEM_TYPE_TCP,
503                 .rss_types = ETH_RSS_NONFRAG_IPV6_TCP,
504         },
505         [MLX5_EXPANSION_VXLAN] = {
506                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
507                                                   MLX5_EXPANSION_IPV4,
508                                                   MLX5_EXPANSION_IPV6),
509                 .type = RTE_FLOW_ITEM_TYPE_VXLAN,
510         },
511         [MLX5_EXPANSION_VXLAN_GPE] = {
512                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
513                                                   MLX5_EXPANSION_IPV4,
514                                                   MLX5_EXPANSION_IPV6),
515                 .type = RTE_FLOW_ITEM_TYPE_VXLAN_GPE,
516         },
517         [MLX5_EXPANSION_GRE] = {
518                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
519                                                   MLX5_EXPANSION_IPV6,
520                                                   MLX5_EXPANSION_GRE_KEY),
521                 .type = RTE_FLOW_ITEM_TYPE_GRE,
522         },
523         [MLX5_EXPANSION_GRE_KEY] = {
524                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
525                                                   MLX5_EXPANSION_IPV6),
526                 .type = RTE_FLOW_ITEM_TYPE_GRE_KEY,
527                 .optional = 1,
528         },
529         [MLX5_EXPANSION_MPLS] = {
530                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
531                                                   MLX5_EXPANSION_IPV6),
532                 .type = RTE_FLOW_ITEM_TYPE_MPLS,
533         },
534         [MLX5_EXPANSION_ETH] = {
535                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
536                                                   MLX5_EXPANSION_IPV6),
537                 .type = RTE_FLOW_ITEM_TYPE_ETH,
538         },
539         [MLX5_EXPANSION_ETH_VLAN] = {
540                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VLAN),
541                 .type = RTE_FLOW_ITEM_TYPE_ETH,
542         },
543         [MLX5_EXPANSION_VLAN] = {
544                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
545                                                   MLX5_EXPANSION_IPV6),
546                 .type = RTE_FLOW_ITEM_TYPE_VLAN,
547         },
548         [MLX5_EXPANSION_IPV4] = {
549                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4_UDP,
550                                                   MLX5_EXPANSION_IPV4_TCP),
551                 .type = RTE_FLOW_ITEM_TYPE_IPV4,
552                 .rss_types = ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
553                         ETH_RSS_NONFRAG_IPV4_OTHER,
554         },
555         [MLX5_EXPANSION_IPV4_UDP] = {
556                 .type = RTE_FLOW_ITEM_TYPE_UDP,
557                 .rss_types = ETH_RSS_NONFRAG_IPV4_UDP,
558         },
559         [MLX5_EXPANSION_IPV4_TCP] = {
560                 .type = RTE_FLOW_ITEM_TYPE_TCP,
561                 .rss_types = ETH_RSS_NONFRAG_IPV4_TCP,
562         },
563         [MLX5_EXPANSION_IPV6] = {
564                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV6_UDP,
565                                                   MLX5_EXPANSION_IPV6_TCP),
566                 .type = RTE_FLOW_ITEM_TYPE_IPV6,
567                 .rss_types = ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 |
568                         ETH_RSS_NONFRAG_IPV6_OTHER,
569         },
570         [MLX5_EXPANSION_IPV6_UDP] = {
571                 .type = RTE_FLOW_ITEM_TYPE_UDP,
572                 .rss_types = ETH_RSS_NONFRAG_IPV6_UDP,
573         },
574         [MLX5_EXPANSION_IPV6_TCP] = {
575                 .type = RTE_FLOW_ITEM_TYPE_TCP,
576                 .rss_types = ETH_RSS_NONFRAG_IPV6_TCP,
577         },
578 };
579
580 static struct rte_flow_action_handle *
581 mlx5_action_handle_create(struct rte_eth_dev *dev,
582                           const struct rte_flow_indir_action_conf *conf,
583                           const struct rte_flow_action *action,
584                           struct rte_flow_error *error);
585 static int mlx5_action_handle_destroy
586                                 (struct rte_eth_dev *dev,
587                                  struct rte_flow_action_handle *handle,
588                                  struct rte_flow_error *error);
589 static int mlx5_action_handle_update
590                                 (struct rte_eth_dev *dev,
591                                  struct rte_flow_action_handle *handle,
592                                  const void *update,
593                                  struct rte_flow_error *error);
594 static int mlx5_action_handle_query
595                                 (struct rte_eth_dev *dev,
596                                  const struct rte_flow_action_handle *handle,
597                                  void *data,
598                                  struct rte_flow_error *error);
599 static int
600 mlx5_flow_tunnel_decap_set(struct rte_eth_dev *dev,
601                     struct rte_flow_tunnel *app_tunnel,
602                     struct rte_flow_action **actions,
603                     uint32_t *num_of_actions,
604                     struct rte_flow_error *error);
605 static int
606 mlx5_flow_tunnel_match(struct rte_eth_dev *dev,
607                        struct rte_flow_tunnel *app_tunnel,
608                        struct rte_flow_item **items,
609                        uint32_t *num_of_items,
610                        struct rte_flow_error *error);
611 static int
612 mlx5_flow_tunnel_item_release(struct rte_eth_dev *dev,
613                               struct rte_flow_item *pmd_items,
614                               uint32_t num_items, struct rte_flow_error *err);
615 static int
616 mlx5_flow_tunnel_action_release(struct rte_eth_dev *dev,
617                                 struct rte_flow_action *pmd_actions,
618                                 uint32_t num_actions,
619                                 struct rte_flow_error *err);
620 static int
621 mlx5_flow_tunnel_get_restore_info(struct rte_eth_dev *dev,
622                                   struct rte_mbuf *m,
623                                   struct rte_flow_restore_info *info,
624                                   struct rte_flow_error *err);
625
626 static const struct rte_flow_ops mlx5_flow_ops = {
627         .validate = mlx5_flow_validate,
628         .create = mlx5_flow_create,
629         .destroy = mlx5_flow_destroy,
630         .flush = mlx5_flow_flush,
631         .isolate = mlx5_flow_isolate,
632         .query = mlx5_flow_query,
633         .dev_dump = mlx5_flow_dev_dump,
634         .get_aged_flows = mlx5_flow_get_aged_flows,
635         .action_handle_create = mlx5_action_handle_create,
636         .action_handle_destroy = mlx5_action_handle_destroy,
637         .action_handle_update = mlx5_action_handle_update,
638         .action_handle_query = mlx5_action_handle_query,
639         .tunnel_decap_set = mlx5_flow_tunnel_decap_set,
640         .tunnel_match = mlx5_flow_tunnel_match,
641         .tunnel_action_decap_release = mlx5_flow_tunnel_action_release,
642         .tunnel_item_release = mlx5_flow_tunnel_item_release,
643         .get_restore_info = mlx5_flow_tunnel_get_restore_info,
644 };
645
646 /* Tunnel information. */
647 struct mlx5_flow_tunnel_info {
648         uint64_t tunnel; /**< Tunnel bit (see MLX5_FLOW_*). */
649         uint32_t ptype; /**< Tunnel Ptype (see RTE_PTYPE_*). */
650 };
651
652 static struct mlx5_flow_tunnel_info tunnels_info[] = {
653         {
654                 .tunnel = MLX5_FLOW_LAYER_VXLAN,
655                 .ptype = RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L4_UDP,
656         },
657         {
658                 .tunnel = MLX5_FLOW_LAYER_GENEVE,
659                 .ptype = RTE_PTYPE_TUNNEL_GENEVE | RTE_PTYPE_L4_UDP,
660         },
661         {
662                 .tunnel = MLX5_FLOW_LAYER_VXLAN_GPE,
663                 .ptype = RTE_PTYPE_TUNNEL_VXLAN_GPE | RTE_PTYPE_L4_UDP,
664         },
665         {
666                 .tunnel = MLX5_FLOW_LAYER_GRE,
667                 .ptype = RTE_PTYPE_TUNNEL_GRE,
668         },
669         {
670                 .tunnel = MLX5_FLOW_LAYER_MPLS | MLX5_FLOW_LAYER_OUTER_L4_UDP,
671                 .ptype = RTE_PTYPE_TUNNEL_MPLS_IN_UDP | RTE_PTYPE_L4_UDP,
672         },
673         {
674                 .tunnel = MLX5_FLOW_LAYER_MPLS,
675                 .ptype = RTE_PTYPE_TUNNEL_MPLS_IN_GRE,
676         },
677         {
678                 .tunnel = MLX5_FLOW_LAYER_NVGRE,
679                 .ptype = RTE_PTYPE_TUNNEL_NVGRE,
680         },
681         {
682                 .tunnel = MLX5_FLOW_LAYER_IPIP,
683                 .ptype = RTE_PTYPE_TUNNEL_IP,
684         },
685         {
686                 .tunnel = MLX5_FLOW_LAYER_IPV6_ENCAP,
687                 .ptype = RTE_PTYPE_TUNNEL_IP,
688         },
689         {
690                 .tunnel = MLX5_FLOW_LAYER_GTP,
691                 .ptype = RTE_PTYPE_TUNNEL_GTPU,
692         },
693 };
694
695
696
697 /**
698  * Translate tag ID to register.
699  *
700  * @param[in] dev
701  *   Pointer to the Ethernet device structure.
702  * @param[in] feature
703  *   The feature that request the register.
704  * @param[in] id
705  *   The request register ID.
706  * @param[out] error
707  *   Error description in case of any.
708  *
709  * @return
710  *   The request register on success, a negative errno
711  *   value otherwise and rte_errno is set.
712  */
713 int
714 mlx5_flow_get_reg_id(struct rte_eth_dev *dev,
715                      enum mlx5_feature_name feature,
716                      uint32_t id,
717                      struct rte_flow_error *error)
718 {
719         struct mlx5_priv *priv = dev->data->dev_private;
720         struct mlx5_dev_config *config = &priv->config;
721         enum modify_reg start_reg;
722         bool skip_mtr_reg = false;
723
724         switch (feature) {
725         case MLX5_HAIRPIN_RX:
726                 return REG_B;
727         case MLX5_HAIRPIN_TX:
728                 return REG_A;
729         case MLX5_METADATA_RX:
730                 switch (config->dv_xmeta_en) {
731                 case MLX5_XMETA_MODE_LEGACY:
732                         return REG_B;
733                 case MLX5_XMETA_MODE_META16:
734                         return REG_C_0;
735                 case MLX5_XMETA_MODE_META32:
736                         return REG_C_1;
737                 }
738                 break;
739         case MLX5_METADATA_TX:
740                 return REG_A;
741         case MLX5_METADATA_FDB:
742                 switch (config->dv_xmeta_en) {
743                 case MLX5_XMETA_MODE_LEGACY:
744                         return REG_NON;
745                 case MLX5_XMETA_MODE_META16:
746                         return REG_C_0;
747                 case MLX5_XMETA_MODE_META32:
748                         return REG_C_1;
749                 }
750                 break;
751         case MLX5_FLOW_MARK:
752                 switch (config->dv_xmeta_en) {
753                 case MLX5_XMETA_MODE_LEGACY:
754                         return REG_NON;
755                 case MLX5_XMETA_MODE_META16:
756                         return REG_C_1;
757                 case MLX5_XMETA_MODE_META32:
758                         return REG_C_0;
759                 }
760                 break;
761         case MLX5_MTR_ID:
762                 /*
763                  * If meter color and meter id share one register, flow match
764                  * should use the meter color register for match.
765                  */
766                 if (priv->mtr_reg_share)
767                         return priv->mtr_color_reg;
768                 else
769                         return priv->mtr_color_reg != REG_C_2 ? REG_C_2 :
770                                REG_C_3;
771         case MLX5_MTR_COLOR:
772         case MLX5_ASO_FLOW_HIT:
773         case MLX5_ASO_CONNTRACK:
774                 /* All features use the same REG_C. */
775                 MLX5_ASSERT(priv->mtr_color_reg != REG_NON);
776                 return priv->mtr_color_reg;
777         case MLX5_COPY_MARK:
778                 /*
779                  * Metadata COPY_MARK register using is in meter suffix sub
780                  * flow while with meter. It's safe to share the same register.
781                  */
782                 return priv->mtr_color_reg != REG_C_2 ? REG_C_2 : REG_C_3;
783         case MLX5_APP_TAG:
784                 /*
785                  * If meter is enable, it will engage the register for color
786                  * match and flow match. If meter color match is not using the
787                  * REG_C_2, need to skip the REG_C_x be used by meter color
788                  * match.
789                  * If meter is disable, free to use all available registers.
790                  */
791                 start_reg = priv->mtr_color_reg != REG_C_2 ? REG_C_2 :
792                             (priv->mtr_reg_share ? REG_C_3 : REG_C_4);
793                 skip_mtr_reg = !!(priv->mtr_en && start_reg == REG_C_2);
794                 if (id > (uint32_t)(REG_C_7 - start_reg))
795                         return rte_flow_error_set(error, EINVAL,
796                                                   RTE_FLOW_ERROR_TYPE_ITEM,
797                                                   NULL, "invalid tag id");
798                 if (config->flow_mreg_c[id + start_reg - REG_C_0] == REG_NON)
799                         return rte_flow_error_set(error, ENOTSUP,
800                                                   RTE_FLOW_ERROR_TYPE_ITEM,
801                                                   NULL, "unsupported tag id");
802                 /*
803                  * This case means meter is using the REG_C_x great than 2.
804                  * Take care not to conflict with meter color REG_C_x.
805                  * If the available index REG_C_y >= REG_C_x, skip the
806                  * color register.
807                  */
808                 if (skip_mtr_reg && config->flow_mreg_c
809                     [id + start_reg - REG_C_0] >= priv->mtr_color_reg) {
810                         if (id >= (uint32_t)(REG_C_7 - start_reg))
811                                 return rte_flow_error_set(error, EINVAL,
812                                                        RTE_FLOW_ERROR_TYPE_ITEM,
813                                                         NULL, "invalid tag id");
814                         if (config->flow_mreg_c
815                             [id + 1 + start_reg - REG_C_0] != REG_NON)
816                                 return config->flow_mreg_c
817                                                [id + 1 + start_reg - REG_C_0];
818                         return rte_flow_error_set(error, ENOTSUP,
819                                                   RTE_FLOW_ERROR_TYPE_ITEM,
820                                                   NULL, "unsupported tag id");
821                 }
822                 return config->flow_mreg_c[id + start_reg - REG_C_0];
823         }
824         MLX5_ASSERT(false);
825         return rte_flow_error_set(error, EINVAL,
826                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
827                                   NULL, "invalid feature name");
828 }
829
830 /**
831  * Check extensive flow metadata register support.
832  *
833  * @param dev
834  *   Pointer to rte_eth_dev structure.
835  *
836  * @return
837  *   True if device supports extensive flow metadata register, otherwise false.
838  */
839 bool
840 mlx5_flow_ext_mreg_supported(struct rte_eth_dev *dev)
841 {
842         struct mlx5_priv *priv = dev->data->dev_private;
843         struct mlx5_dev_config *config = &priv->config;
844
845         /*
846          * Having available reg_c can be regarded inclusively as supporting
847          * extensive flow metadata register, which could mean,
848          * - metadata register copy action by modify header.
849          * - 16 modify header actions is supported.
850          * - reg_c's are preserved across different domain (FDB and NIC) on
851          *   packet loopback by flow lookup miss.
852          */
853         return config->flow_mreg_c[2] != REG_NON;
854 }
855
856 /**
857  * Get the lowest priority.
858  *
859  * @param[in] dev
860  *   Pointer to the Ethernet device structure.
861  * @param[in] attributes
862  *   Pointer to device flow rule attributes.
863  *
864  * @return
865  *   The value of lowest priority of flow.
866  */
867 uint32_t
868 mlx5_get_lowest_priority(struct rte_eth_dev *dev,
869                           const struct rte_flow_attr *attr)
870 {
871         struct mlx5_priv *priv = dev->data->dev_private;
872
873         if (!attr->group && !attr->transfer)
874                 return priv->config.flow_prio - 2;
875         return MLX5_NON_ROOT_FLOW_MAX_PRIO - 1;
876 }
877
878 /**
879  * Calculate matcher priority of the flow.
880  *
881  * @param[in] dev
882  *   Pointer to the Ethernet device structure.
883  * @param[in] attr
884  *   Pointer to device flow rule attributes.
885  * @param[in] subpriority
886  *   The priority based on the items.
887  * @return
888  *   The matcher priority of the flow.
889  */
890 uint16_t
891 mlx5_get_matcher_priority(struct rte_eth_dev *dev,
892                           const struct rte_flow_attr *attr,
893                           uint32_t subpriority)
894 {
895         uint16_t priority = (uint16_t)attr->priority;
896         struct mlx5_priv *priv = dev->data->dev_private;
897
898         if (!attr->group && !attr->transfer) {
899                 if (attr->priority == MLX5_FLOW_LOWEST_PRIO_INDICATOR)
900                         priority = priv->config.flow_prio - 1;
901                 return mlx5_os_flow_adjust_priority(dev, priority, subpriority);
902         }
903         if (attr->priority == MLX5_FLOW_LOWEST_PRIO_INDICATOR)
904                 priority = MLX5_NON_ROOT_FLOW_MAX_PRIO;
905         return priority * 3 + subpriority;
906 }
907
908 /**
909  * Verify the @p item specifications (spec, last, mask) are compatible with the
910  * NIC capabilities.
911  *
912  * @param[in] item
913  *   Item specification.
914  * @param[in] mask
915  *   @p item->mask or flow default bit-masks.
916  * @param[in] nic_mask
917  *   Bit-masks covering supported fields by the NIC to compare with user mask.
918  * @param[in] size
919  *   Bit-masks size in bytes.
920  * @param[in] range_accepted
921  *   True if range of values is accepted for specific fields, false otherwise.
922  * @param[out] error
923  *   Pointer to error structure.
924  *
925  * @return
926  *   0 on success, a negative errno value otherwise and rte_errno is set.
927  */
928 int
929 mlx5_flow_item_acceptable(const struct rte_flow_item *item,
930                           const uint8_t *mask,
931                           const uint8_t *nic_mask,
932                           unsigned int size,
933                           bool range_accepted,
934                           struct rte_flow_error *error)
935 {
936         unsigned int i;
937
938         MLX5_ASSERT(nic_mask);
939         for (i = 0; i < size; ++i)
940                 if ((nic_mask[i] | mask[i]) != nic_mask[i])
941                         return rte_flow_error_set(error, ENOTSUP,
942                                                   RTE_FLOW_ERROR_TYPE_ITEM,
943                                                   item,
944                                                   "mask enables non supported"
945                                                   " bits");
946         if (!item->spec && (item->mask || item->last))
947                 return rte_flow_error_set(error, EINVAL,
948                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
949                                           "mask/last without a spec is not"
950                                           " supported");
951         if (item->spec && item->last && !range_accepted) {
952                 uint8_t spec[size];
953                 uint8_t last[size];
954                 unsigned int i;
955                 int ret;
956
957                 for (i = 0; i < size; ++i) {
958                         spec[i] = ((const uint8_t *)item->spec)[i] & mask[i];
959                         last[i] = ((const uint8_t *)item->last)[i] & mask[i];
960                 }
961                 ret = memcmp(spec, last, size);
962                 if (ret != 0)
963                         return rte_flow_error_set(error, EINVAL,
964                                                   RTE_FLOW_ERROR_TYPE_ITEM,
965                                                   item,
966                                                   "range is not valid");
967         }
968         return 0;
969 }
970
971 /**
972  * Adjust the hash fields according to the @p flow information.
973  *
974  * @param[in] dev_flow.
975  *   Pointer to the mlx5_flow.
976  * @param[in] tunnel
977  *   1 when the hash field is for a tunnel item.
978  * @param[in] layer_types
979  *   ETH_RSS_* types.
980  * @param[in] hash_fields
981  *   Item hash fields.
982  *
983  * @return
984  *   The hash fields that should be used.
985  */
986 uint64_t
987 mlx5_flow_hashfields_adjust(struct mlx5_flow_rss_desc *rss_desc,
988                             int tunnel __rte_unused, uint64_t layer_types,
989                             uint64_t hash_fields)
990 {
991 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
992         int rss_request_inner = rss_desc->level >= 2;
993
994         /* Check RSS hash level for tunnel. */
995         if (tunnel && rss_request_inner)
996                 hash_fields |= IBV_RX_HASH_INNER;
997         else if (tunnel || rss_request_inner)
998                 return 0;
999 #endif
1000         /* Check if requested layer matches RSS hash fields. */
1001         if (!(rss_desc->types & layer_types))
1002                 return 0;
1003         return hash_fields;
1004 }
1005
1006 /**
1007  * Lookup and set the ptype in the data Rx part.  A single Ptype can be used,
1008  * if several tunnel rules are used on this queue, the tunnel ptype will be
1009  * cleared.
1010  *
1011  * @param rxq_ctrl
1012  *   Rx queue to update.
1013  */
1014 static void
1015 flow_rxq_tunnel_ptype_update(struct mlx5_rxq_ctrl *rxq_ctrl)
1016 {
1017         unsigned int i;
1018         uint32_t tunnel_ptype = 0;
1019
1020         /* Look up for the ptype to use. */
1021         for (i = 0; i != MLX5_FLOW_TUNNEL; ++i) {
1022                 if (!rxq_ctrl->flow_tunnels_n[i])
1023                         continue;
1024                 if (!tunnel_ptype) {
1025                         tunnel_ptype = tunnels_info[i].ptype;
1026                 } else {
1027                         tunnel_ptype = 0;
1028                         break;
1029                 }
1030         }
1031         rxq_ctrl->rxq.tunnel = tunnel_ptype;
1032 }
1033
1034 /**
1035  * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) according to the devive
1036  * flow.
1037  *
1038  * @param[in] dev
1039  *   Pointer to the Ethernet device structure.
1040  * @param[in] dev_handle
1041  *   Pointer to device flow handle structure.
1042  */
1043 void
1044 flow_drv_rxq_flags_set(struct rte_eth_dev *dev,
1045                        struct mlx5_flow_handle *dev_handle)
1046 {
1047         struct mlx5_priv *priv = dev->data->dev_private;
1048         const int mark = dev_handle->mark;
1049         const int tunnel = !!(dev_handle->layers & MLX5_FLOW_LAYER_TUNNEL);
1050         struct mlx5_ind_table_obj *ind_tbl = NULL;
1051         unsigned int i;
1052
1053         if (dev_handle->fate_action == MLX5_FLOW_FATE_QUEUE) {
1054                 struct mlx5_hrxq *hrxq;
1055
1056                 hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ],
1057                               dev_handle->rix_hrxq);
1058                 if (hrxq)
1059                         ind_tbl = hrxq->ind_table;
1060         } else if (dev_handle->fate_action == MLX5_FLOW_FATE_SHARED_RSS) {
1061                 struct mlx5_shared_action_rss *shared_rss;
1062
1063                 shared_rss = mlx5_ipool_get
1064                         (priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS],
1065                          dev_handle->rix_srss);
1066                 if (shared_rss)
1067                         ind_tbl = shared_rss->ind_tbl;
1068         }
1069         if (!ind_tbl)
1070                 return;
1071         for (i = 0; i != ind_tbl->queues_n; ++i) {
1072                 int idx = ind_tbl->queues[i];
1073                 struct mlx5_rxq_ctrl *rxq_ctrl =
1074                         container_of((*priv->rxqs)[idx],
1075                                      struct mlx5_rxq_ctrl, rxq);
1076
1077                 /*
1078                  * To support metadata register copy on Tx loopback,
1079                  * this must be always enabled (metadata may arive
1080                  * from other port - not from local flows only.
1081                  */
1082                 if (priv->config.dv_flow_en &&
1083                     priv->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY &&
1084                     mlx5_flow_ext_mreg_supported(dev)) {
1085                         rxq_ctrl->rxq.mark = 1;
1086                         rxq_ctrl->flow_mark_n = 1;
1087                 } else if (mark) {
1088                         rxq_ctrl->rxq.mark = 1;
1089                         rxq_ctrl->flow_mark_n++;
1090                 }
1091                 if (tunnel) {
1092                         unsigned int j;
1093
1094                         /* Increase the counter matching the flow. */
1095                         for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) {
1096                                 if ((tunnels_info[j].tunnel &
1097                                      dev_handle->layers) ==
1098                                     tunnels_info[j].tunnel) {
1099                                         rxq_ctrl->flow_tunnels_n[j]++;
1100                                         break;
1101                                 }
1102                         }
1103                         flow_rxq_tunnel_ptype_update(rxq_ctrl);
1104                 }
1105         }
1106 }
1107
1108 /**
1109  * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) for a flow
1110  *
1111  * @param[in] dev
1112  *   Pointer to the Ethernet device structure.
1113  * @param[in] flow
1114  *   Pointer to flow structure.
1115  */
1116 static void
1117 flow_rxq_flags_set(struct rte_eth_dev *dev, struct rte_flow *flow)
1118 {
1119         struct mlx5_priv *priv = dev->data->dev_private;
1120         uint32_t handle_idx;
1121         struct mlx5_flow_handle *dev_handle;
1122
1123         SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles,
1124                        handle_idx, dev_handle, next)
1125                 flow_drv_rxq_flags_set(dev, dev_handle);
1126 }
1127
1128 /**
1129  * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the
1130  * device flow if no other flow uses it with the same kind of request.
1131  *
1132  * @param dev
1133  *   Pointer to Ethernet device.
1134  * @param[in] dev_handle
1135  *   Pointer to the device flow handle structure.
1136  */
1137 static void
1138 flow_drv_rxq_flags_trim(struct rte_eth_dev *dev,
1139                         struct mlx5_flow_handle *dev_handle)
1140 {
1141         struct mlx5_priv *priv = dev->data->dev_private;
1142         const int mark = dev_handle->mark;
1143         const int tunnel = !!(dev_handle->layers & MLX5_FLOW_LAYER_TUNNEL);
1144         struct mlx5_ind_table_obj *ind_tbl = NULL;
1145         unsigned int i;
1146
1147         if (dev_handle->fate_action == MLX5_FLOW_FATE_QUEUE) {
1148                 struct mlx5_hrxq *hrxq;
1149
1150                 hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ],
1151                               dev_handle->rix_hrxq);
1152                 if (hrxq)
1153                         ind_tbl = hrxq->ind_table;
1154         } else if (dev_handle->fate_action == MLX5_FLOW_FATE_SHARED_RSS) {
1155                 struct mlx5_shared_action_rss *shared_rss;
1156
1157                 shared_rss = mlx5_ipool_get
1158                         (priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS],
1159                          dev_handle->rix_srss);
1160                 if (shared_rss)
1161                         ind_tbl = shared_rss->ind_tbl;
1162         }
1163         if (!ind_tbl)
1164                 return;
1165         MLX5_ASSERT(dev->data->dev_started);
1166         for (i = 0; i != ind_tbl->queues_n; ++i) {
1167                 int idx = ind_tbl->queues[i];
1168                 struct mlx5_rxq_ctrl *rxq_ctrl =
1169                         container_of((*priv->rxqs)[idx],
1170                                      struct mlx5_rxq_ctrl, rxq);
1171
1172                 if (priv->config.dv_flow_en &&
1173                     priv->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY &&
1174                     mlx5_flow_ext_mreg_supported(dev)) {
1175                         rxq_ctrl->rxq.mark = 1;
1176                         rxq_ctrl->flow_mark_n = 1;
1177                 } else if (mark) {
1178                         rxq_ctrl->flow_mark_n--;
1179                         rxq_ctrl->rxq.mark = !!rxq_ctrl->flow_mark_n;
1180                 }
1181                 if (tunnel) {
1182                         unsigned int j;
1183
1184                         /* Decrease the counter matching the flow. */
1185                         for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) {
1186                                 if ((tunnels_info[j].tunnel &
1187                                      dev_handle->layers) ==
1188                                     tunnels_info[j].tunnel) {
1189                                         rxq_ctrl->flow_tunnels_n[j]--;
1190                                         break;
1191                                 }
1192                         }
1193                         flow_rxq_tunnel_ptype_update(rxq_ctrl);
1194                 }
1195         }
1196 }
1197
1198 /**
1199  * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the
1200  * @p flow if no other flow uses it with the same kind of request.
1201  *
1202  * @param dev
1203  *   Pointer to Ethernet device.
1204  * @param[in] flow
1205  *   Pointer to the flow.
1206  */
1207 static void
1208 flow_rxq_flags_trim(struct rte_eth_dev *dev, struct rte_flow *flow)
1209 {
1210         struct mlx5_priv *priv = dev->data->dev_private;
1211         uint32_t handle_idx;
1212         struct mlx5_flow_handle *dev_handle;
1213
1214         SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles,
1215                        handle_idx, dev_handle, next)
1216                 flow_drv_rxq_flags_trim(dev, dev_handle);
1217 }
1218
1219 /**
1220  * Clear the Mark/Flag and Tunnel ptype information in all Rx queues.
1221  *
1222  * @param dev
1223  *   Pointer to Ethernet device.
1224  */
1225 static void
1226 flow_rxq_flags_clear(struct rte_eth_dev *dev)
1227 {
1228         struct mlx5_priv *priv = dev->data->dev_private;
1229         unsigned int i;
1230
1231         for (i = 0; i != priv->rxqs_n; ++i) {
1232                 struct mlx5_rxq_ctrl *rxq_ctrl;
1233                 unsigned int j;
1234
1235                 if (!(*priv->rxqs)[i])
1236                         continue;
1237                 rxq_ctrl = container_of((*priv->rxqs)[i],
1238                                         struct mlx5_rxq_ctrl, rxq);
1239                 rxq_ctrl->flow_mark_n = 0;
1240                 rxq_ctrl->rxq.mark = 0;
1241                 for (j = 0; j != MLX5_FLOW_TUNNEL; ++j)
1242                         rxq_ctrl->flow_tunnels_n[j] = 0;
1243                 rxq_ctrl->rxq.tunnel = 0;
1244         }
1245 }
1246
1247 /**
1248  * Set the Rx queue dynamic metadata (mask and offset) for a flow
1249  *
1250  * @param[in] dev
1251  *   Pointer to the Ethernet device structure.
1252  */
1253 void
1254 mlx5_flow_rxq_dynf_metadata_set(struct rte_eth_dev *dev)
1255 {
1256         struct mlx5_priv *priv = dev->data->dev_private;
1257         struct mlx5_rxq_data *data;
1258         unsigned int i;
1259
1260         for (i = 0; i != priv->rxqs_n; ++i) {
1261                 if (!(*priv->rxqs)[i])
1262                         continue;
1263                 data = (*priv->rxqs)[i];
1264                 if (!rte_flow_dynf_metadata_avail()) {
1265                         data->dynf_meta = 0;
1266                         data->flow_meta_mask = 0;
1267                         data->flow_meta_offset = -1;
1268                         data->flow_meta_port_mask = 0;
1269                 } else {
1270                         data->dynf_meta = 1;
1271                         data->flow_meta_mask = rte_flow_dynf_metadata_mask;
1272                         data->flow_meta_offset = rte_flow_dynf_metadata_offs;
1273                         data->flow_meta_port_mask = (uint32_t)~0;
1274                         if (priv->config.dv_xmeta_en == MLX5_XMETA_MODE_META16)
1275                                 data->flow_meta_port_mask >>= 16;
1276                 }
1277         }
1278 }
1279
1280 /*
1281  * return a pointer to the desired action in the list of actions.
1282  *
1283  * @param[in] actions
1284  *   The list of actions to search the action in.
1285  * @param[in] action
1286  *   The action to find.
1287  *
1288  * @return
1289  *   Pointer to the action in the list, if found. NULL otherwise.
1290  */
1291 const struct rte_flow_action *
1292 mlx5_flow_find_action(const struct rte_flow_action *actions,
1293                       enum rte_flow_action_type action)
1294 {
1295         if (actions == NULL)
1296                 return NULL;
1297         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++)
1298                 if (actions->type == action)
1299                         return actions;
1300         return NULL;
1301 }
1302
1303 /*
1304  * Validate the flag action.
1305  *
1306  * @param[in] action_flags
1307  *   Bit-fields that holds the actions detected until now.
1308  * @param[in] attr
1309  *   Attributes of flow that includes this action.
1310  * @param[out] error
1311  *   Pointer to error structure.
1312  *
1313  * @return
1314  *   0 on success, a negative errno value otherwise and rte_errno is set.
1315  */
1316 int
1317 mlx5_flow_validate_action_flag(uint64_t action_flags,
1318                                const struct rte_flow_attr *attr,
1319                                struct rte_flow_error *error)
1320 {
1321         if (action_flags & MLX5_FLOW_ACTION_MARK)
1322                 return rte_flow_error_set(error, EINVAL,
1323                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1324                                           "can't mark and flag in same flow");
1325         if (action_flags & MLX5_FLOW_ACTION_FLAG)
1326                 return rte_flow_error_set(error, EINVAL,
1327                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1328                                           "can't have 2 flag"
1329                                           " actions in same flow");
1330         if (attr->egress)
1331                 return rte_flow_error_set(error, ENOTSUP,
1332                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1333                                           "flag action not supported for "
1334                                           "egress");
1335         return 0;
1336 }
1337
1338 /*
1339  * Validate the mark action.
1340  *
1341  * @param[in] action
1342  *   Pointer to the queue action.
1343  * @param[in] action_flags
1344  *   Bit-fields that holds the actions detected until now.
1345  * @param[in] attr
1346  *   Attributes of flow that includes this action.
1347  * @param[out] error
1348  *   Pointer to error structure.
1349  *
1350  * @return
1351  *   0 on success, a negative errno value otherwise and rte_errno is set.
1352  */
1353 int
1354 mlx5_flow_validate_action_mark(const struct rte_flow_action *action,
1355                                uint64_t action_flags,
1356                                const struct rte_flow_attr *attr,
1357                                struct rte_flow_error *error)
1358 {
1359         const struct rte_flow_action_mark *mark = action->conf;
1360
1361         if (!mark)
1362                 return rte_flow_error_set(error, EINVAL,
1363                                           RTE_FLOW_ERROR_TYPE_ACTION,
1364                                           action,
1365                                           "configuration cannot be null");
1366         if (mark->id >= MLX5_FLOW_MARK_MAX)
1367                 return rte_flow_error_set(error, EINVAL,
1368                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1369                                           &mark->id,
1370                                           "mark id must in 0 <= id < "
1371                                           RTE_STR(MLX5_FLOW_MARK_MAX));
1372         if (action_flags & MLX5_FLOW_ACTION_FLAG)
1373                 return rte_flow_error_set(error, EINVAL,
1374                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1375                                           "can't flag and mark in same flow");
1376         if (action_flags & MLX5_FLOW_ACTION_MARK)
1377                 return rte_flow_error_set(error, EINVAL,
1378                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1379                                           "can't have 2 mark actions in same"
1380                                           " flow");
1381         if (attr->egress)
1382                 return rte_flow_error_set(error, ENOTSUP,
1383                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1384                                           "mark action not supported for "
1385                                           "egress");
1386         return 0;
1387 }
1388
1389 /*
1390  * Validate the drop action.
1391  *
1392  * @param[in] action_flags
1393  *   Bit-fields that holds the actions detected until now.
1394  * @param[in] attr
1395  *   Attributes of flow that includes this action.
1396  * @param[out] error
1397  *   Pointer to error structure.
1398  *
1399  * @return
1400  *   0 on success, a negative errno value otherwise and rte_errno is set.
1401  */
1402 int
1403 mlx5_flow_validate_action_drop(uint64_t action_flags __rte_unused,
1404                                const struct rte_flow_attr *attr,
1405                                struct rte_flow_error *error)
1406 {
1407         if (attr->egress)
1408                 return rte_flow_error_set(error, ENOTSUP,
1409                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1410                                           "drop action not supported for "
1411                                           "egress");
1412         return 0;
1413 }
1414
1415 /*
1416  * Validate the queue action.
1417  *
1418  * @param[in] action
1419  *   Pointer to the queue action.
1420  * @param[in] action_flags
1421  *   Bit-fields that holds the actions detected until now.
1422  * @param[in] dev
1423  *   Pointer to the Ethernet device structure.
1424  * @param[in] attr
1425  *   Attributes of flow that includes this action.
1426  * @param[out] error
1427  *   Pointer to error structure.
1428  *
1429  * @return
1430  *   0 on success, a negative errno value otherwise and rte_errno is set.
1431  */
1432 int
1433 mlx5_flow_validate_action_queue(const struct rte_flow_action *action,
1434                                 uint64_t action_flags,
1435                                 struct rte_eth_dev *dev,
1436                                 const struct rte_flow_attr *attr,
1437                                 struct rte_flow_error *error)
1438 {
1439         struct mlx5_priv *priv = dev->data->dev_private;
1440         const struct rte_flow_action_queue *queue = action->conf;
1441
1442         if (action_flags & MLX5_FLOW_FATE_ACTIONS)
1443                 return rte_flow_error_set(error, EINVAL,
1444                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1445                                           "can't have 2 fate actions in"
1446                                           " same flow");
1447         if (!priv->rxqs_n)
1448                 return rte_flow_error_set(error, EINVAL,
1449                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1450                                           NULL, "No Rx queues configured");
1451         if (queue->index >= priv->rxqs_n)
1452                 return rte_flow_error_set(error, EINVAL,
1453                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1454                                           &queue->index,
1455                                           "queue index out of range");
1456         if (!(*priv->rxqs)[queue->index])
1457                 return rte_flow_error_set(error, EINVAL,
1458                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1459                                           &queue->index,
1460                                           "queue is not configured");
1461         if (attr->egress)
1462                 return rte_flow_error_set(error, ENOTSUP,
1463                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1464                                           "queue action not supported for "
1465                                           "egress");
1466         return 0;
1467 }
1468
1469 /*
1470  * Validate the rss action.
1471  *
1472  * @param[in] dev
1473  *   Pointer to the Ethernet device structure.
1474  * @param[in] action
1475  *   Pointer to the queue action.
1476  * @param[out] error
1477  *   Pointer to error structure.
1478  *
1479  * @return
1480  *   0 on success, a negative errno value otherwise and rte_errno is set.
1481  */
1482 int
1483 mlx5_validate_action_rss(struct rte_eth_dev *dev,
1484                          const struct rte_flow_action *action,
1485                          struct rte_flow_error *error)
1486 {
1487         struct mlx5_priv *priv = dev->data->dev_private;
1488         const struct rte_flow_action_rss *rss = action->conf;
1489         enum mlx5_rxq_type rxq_type = MLX5_RXQ_TYPE_UNDEFINED;
1490         unsigned int i;
1491
1492         if (rss->func != RTE_ETH_HASH_FUNCTION_DEFAULT &&
1493             rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ)
1494                 return rte_flow_error_set(error, ENOTSUP,
1495                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1496                                           &rss->func,
1497                                           "RSS hash function not supported");
1498 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
1499         if (rss->level > 2)
1500 #else
1501         if (rss->level > 1)
1502 #endif
1503                 return rte_flow_error_set(error, ENOTSUP,
1504                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1505                                           &rss->level,
1506                                           "tunnel RSS is not supported");
1507         /* allow RSS key_len 0 in case of NULL (default) RSS key. */
1508         if (rss->key_len == 0 && rss->key != NULL)
1509                 return rte_flow_error_set(error, ENOTSUP,
1510                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1511                                           &rss->key_len,
1512                                           "RSS hash key length 0");
1513         if (rss->key_len > 0 && rss->key_len < MLX5_RSS_HASH_KEY_LEN)
1514                 return rte_flow_error_set(error, ENOTSUP,
1515                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1516                                           &rss->key_len,
1517                                           "RSS hash key too small");
1518         if (rss->key_len > MLX5_RSS_HASH_KEY_LEN)
1519                 return rte_flow_error_set(error, ENOTSUP,
1520                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1521                                           &rss->key_len,
1522                                           "RSS hash key too large");
1523         if (rss->queue_num > priv->config.ind_table_max_size)
1524                 return rte_flow_error_set(error, ENOTSUP,
1525                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1526                                           &rss->queue_num,
1527                                           "number of queues too large");
1528         if (rss->types & MLX5_RSS_HF_MASK)
1529                 return rte_flow_error_set(error, ENOTSUP,
1530                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1531                                           &rss->types,
1532                                           "some RSS protocols are not"
1533                                           " supported");
1534         if ((rss->types & (ETH_RSS_L3_SRC_ONLY | ETH_RSS_L3_DST_ONLY)) &&
1535             !(rss->types & ETH_RSS_IP))
1536                 return rte_flow_error_set(error, EINVAL,
1537                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
1538                                           "L3 partial RSS requested but L3 RSS"
1539                                           " type not specified");
1540         if ((rss->types & (ETH_RSS_L4_SRC_ONLY | ETH_RSS_L4_DST_ONLY)) &&
1541             !(rss->types & (ETH_RSS_UDP | ETH_RSS_TCP)))
1542                 return rte_flow_error_set(error, EINVAL,
1543                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
1544                                           "L4 partial RSS requested but L4 RSS"
1545                                           " type not specified");
1546         if (!priv->rxqs_n)
1547                 return rte_flow_error_set(error, EINVAL,
1548                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1549                                           NULL, "No Rx queues configured");
1550         if (!rss->queue_num)
1551                 return rte_flow_error_set(error, EINVAL,
1552                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1553                                           NULL, "No queues configured");
1554         for (i = 0; i != rss->queue_num; ++i) {
1555                 struct mlx5_rxq_ctrl *rxq_ctrl;
1556
1557                 if (rss->queue[i] >= priv->rxqs_n)
1558                         return rte_flow_error_set
1559                                 (error, EINVAL,
1560                                  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1561                                  &rss->queue[i], "queue index out of range");
1562                 if (!(*priv->rxqs)[rss->queue[i]])
1563                         return rte_flow_error_set
1564                                 (error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1565                                  &rss->queue[i], "queue is not configured");
1566                 rxq_ctrl = container_of((*priv->rxqs)[rss->queue[i]],
1567                                         struct mlx5_rxq_ctrl, rxq);
1568                 if (i == 0)
1569                         rxq_type = rxq_ctrl->type;
1570                 if (rxq_type != rxq_ctrl->type)
1571                         return rte_flow_error_set
1572                                 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1573                                  &rss->queue[i],
1574                                  "combining hairpin and regular RSS queues is not supported");
1575         }
1576         return 0;
1577 }
1578
1579 /*
1580  * Validate the rss action.
1581  *
1582  * @param[in] action
1583  *   Pointer to the queue action.
1584  * @param[in] action_flags
1585  *   Bit-fields that holds the actions detected until now.
1586  * @param[in] dev
1587  *   Pointer to the Ethernet device structure.
1588  * @param[in] attr
1589  *   Attributes of flow that includes this action.
1590  * @param[in] item_flags
1591  *   Items that were detected.
1592  * @param[out] error
1593  *   Pointer to error structure.
1594  *
1595  * @return
1596  *   0 on success, a negative errno value otherwise and rte_errno is set.
1597  */
1598 int
1599 mlx5_flow_validate_action_rss(const struct rte_flow_action *action,
1600                               uint64_t action_flags,
1601                               struct rte_eth_dev *dev,
1602                               const struct rte_flow_attr *attr,
1603                               uint64_t item_flags,
1604                               struct rte_flow_error *error)
1605 {
1606         const struct rte_flow_action_rss *rss = action->conf;
1607         int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1608         int ret;
1609
1610         if (action_flags & MLX5_FLOW_FATE_ACTIONS)
1611                 return rte_flow_error_set(error, EINVAL,
1612                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1613                                           "can't have 2 fate actions"
1614                                           " in same flow");
1615         ret = mlx5_validate_action_rss(dev, action, error);
1616         if (ret)
1617                 return ret;
1618         if (attr->egress)
1619                 return rte_flow_error_set(error, ENOTSUP,
1620                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1621                                           "rss action not supported for "
1622                                           "egress");
1623         if (rss->level > 1 && !tunnel)
1624                 return rte_flow_error_set(error, EINVAL,
1625                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
1626                                           "inner RSS is not supported for "
1627                                           "non-tunnel flows");
1628         if ((item_flags & MLX5_FLOW_LAYER_ECPRI) &&
1629             !(item_flags & MLX5_FLOW_LAYER_INNER_L4_UDP)) {
1630                 return rte_flow_error_set(error, EINVAL,
1631                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
1632                                           "RSS on eCPRI is not supported now");
1633         }
1634         return 0;
1635 }
1636
1637 /*
1638  * Validate the default miss action.
1639  *
1640  * @param[in] action_flags
1641  *   Bit-fields that holds the actions detected until now.
1642  * @param[out] error
1643  *   Pointer to error structure.
1644  *
1645  * @return
1646  *   0 on success, a negative errno value otherwise and rte_errno is set.
1647  */
1648 int
1649 mlx5_flow_validate_action_default_miss(uint64_t action_flags,
1650                                 const struct rte_flow_attr *attr,
1651                                 struct rte_flow_error *error)
1652 {
1653         if (action_flags & MLX5_FLOW_FATE_ACTIONS)
1654                 return rte_flow_error_set(error, EINVAL,
1655                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1656                                           "can't have 2 fate actions in"
1657                                           " same flow");
1658         if (attr->egress)
1659                 return rte_flow_error_set(error, ENOTSUP,
1660                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1661                                           "default miss action not supported "
1662                                           "for egress");
1663         if (attr->group)
1664                 return rte_flow_error_set(error, ENOTSUP,
1665                                           RTE_FLOW_ERROR_TYPE_ATTR_GROUP, NULL,
1666                                           "only group 0 is supported");
1667         if (attr->transfer)
1668                 return rte_flow_error_set(error, ENOTSUP,
1669                                           RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
1670                                           NULL, "transfer is not supported");
1671         return 0;
1672 }
1673
1674 /*
1675  * Validate the count action.
1676  *
1677  * @param[in] dev
1678  *   Pointer to the Ethernet device structure.
1679  * @param[in] attr
1680  *   Attributes of flow that includes this action.
1681  * @param[out] error
1682  *   Pointer to error structure.
1683  *
1684  * @return
1685  *   0 on success, a negative errno value otherwise and rte_errno is set.
1686  */
1687 int
1688 mlx5_flow_validate_action_count(struct rte_eth_dev *dev __rte_unused,
1689                                 const struct rte_flow_attr *attr,
1690                                 struct rte_flow_error *error)
1691 {
1692         if (attr->egress)
1693                 return rte_flow_error_set(error, ENOTSUP,
1694                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1695                                           "count action not supported for "
1696                                           "egress");
1697         return 0;
1698 }
1699
1700 /*
1701  * Validate the ASO CT action.
1702  *
1703  * @param[in] dev
1704  *   Pointer to the Ethernet device structure.
1705  * @param[in] conntrack
1706  *   Pointer to the CT action profile.
1707  * @param[out] error
1708  *   Pointer to error structure.
1709  *
1710  * @return
1711  *   0 on success, a negative errno value otherwise and rte_errno is set.
1712  */
1713 int
1714 mlx5_validate_action_ct(struct rte_eth_dev *dev,
1715                         const struct rte_flow_action_conntrack *conntrack,
1716                         struct rte_flow_error *error)
1717 {
1718         RTE_SET_USED(dev);
1719
1720         if (conntrack->state > RTE_FLOW_CONNTRACK_STATE_TIME_WAIT)
1721                 return rte_flow_error_set(error, EINVAL,
1722                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1723                                           "Invalid CT state");
1724         if (conntrack->last_index > RTE_FLOW_CONNTRACK_FLAG_RST)
1725                 return rte_flow_error_set(error, EINVAL,
1726                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1727                                           "Invalid last TCP packet flag");
1728         return 0;
1729 }
1730
1731 /**
1732  * Verify the @p attributes will be correctly understood by the NIC and store
1733  * them in the @p flow if everything is correct.
1734  *
1735  * @param[in] dev
1736  *   Pointer to the Ethernet device structure.
1737  * @param[in] attributes
1738  *   Pointer to flow attributes
1739  * @param[out] error
1740  *   Pointer to error structure.
1741  *
1742  * @return
1743  *   0 on success, a negative errno value otherwise and rte_errno is set.
1744  */
1745 int
1746 mlx5_flow_validate_attributes(struct rte_eth_dev *dev,
1747                               const struct rte_flow_attr *attributes,
1748                               struct rte_flow_error *error)
1749 {
1750         struct mlx5_priv *priv = dev->data->dev_private;
1751         uint32_t priority_max = priv->config.flow_prio - 1;
1752
1753         if (attributes->group)
1754                 return rte_flow_error_set(error, ENOTSUP,
1755                                           RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
1756                                           NULL, "groups is not supported");
1757         if (attributes->priority != MLX5_FLOW_LOWEST_PRIO_INDICATOR &&
1758             attributes->priority >= priority_max)
1759                 return rte_flow_error_set(error, ENOTSUP,
1760                                           RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
1761                                           NULL, "priority out of range");
1762         if (attributes->egress)
1763                 return rte_flow_error_set(error, ENOTSUP,
1764                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1765                                           "egress is not supported");
1766         if (attributes->transfer && !priv->config.dv_esw_en)
1767                 return rte_flow_error_set(error, ENOTSUP,
1768                                           RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
1769                                           NULL, "transfer is not supported");
1770         if (!attributes->ingress)
1771                 return rte_flow_error_set(error, EINVAL,
1772                                           RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
1773                                           NULL,
1774                                           "ingress attribute is mandatory");
1775         return 0;
1776 }
1777
1778 /**
1779  * Validate ICMP6 item.
1780  *
1781  * @param[in] item
1782  *   Item specification.
1783  * @param[in] item_flags
1784  *   Bit-fields that holds the items detected until now.
1785  * @param[in] ext_vlan_sup
1786  *   Whether extended VLAN features are supported or not.
1787  * @param[out] error
1788  *   Pointer to error structure.
1789  *
1790  * @return
1791  *   0 on success, a negative errno value otherwise and rte_errno is set.
1792  */
1793 int
1794 mlx5_flow_validate_item_icmp6(const struct rte_flow_item *item,
1795                                uint64_t item_flags,
1796                                uint8_t target_protocol,
1797                                struct rte_flow_error *error)
1798 {
1799         const struct rte_flow_item_icmp6 *mask = item->mask;
1800         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1801         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
1802                                       MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1803         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
1804                                       MLX5_FLOW_LAYER_OUTER_L4;
1805         int ret;
1806
1807         if (target_protocol != 0xFF && target_protocol != IPPROTO_ICMPV6)
1808                 return rte_flow_error_set(error, EINVAL,
1809                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1810                                           "protocol filtering not compatible"
1811                                           " with ICMP6 layer");
1812         if (!(item_flags & l3m))
1813                 return rte_flow_error_set(error, EINVAL,
1814                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1815                                           "IPv6 is mandatory to filter on"
1816                                           " ICMP6");
1817         if (item_flags & l4m)
1818                 return rte_flow_error_set(error, EINVAL,
1819                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1820                                           "multiple L4 layers not supported");
1821         if (!mask)
1822                 mask = &rte_flow_item_icmp6_mask;
1823         ret = mlx5_flow_item_acceptable
1824                 (item, (const uint8_t *)mask,
1825                  (const uint8_t *)&rte_flow_item_icmp6_mask,
1826                  sizeof(struct rte_flow_item_icmp6),
1827                  MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
1828         if (ret < 0)
1829                 return ret;
1830         return 0;
1831 }
1832
1833 /**
1834  * Validate ICMP item.
1835  *
1836  * @param[in] item
1837  *   Item specification.
1838  * @param[in] item_flags
1839  *   Bit-fields that holds the items detected until now.
1840  * @param[out] error
1841  *   Pointer to error structure.
1842  *
1843  * @return
1844  *   0 on success, a negative errno value otherwise and rte_errno is set.
1845  */
1846 int
1847 mlx5_flow_validate_item_icmp(const struct rte_flow_item *item,
1848                              uint64_t item_flags,
1849                              uint8_t target_protocol,
1850                              struct rte_flow_error *error)
1851 {
1852         const struct rte_flow_item_icmp *mask = item->mask;
1853         const struct rte_flow_item_icmp nic_mask = {
1854                 .hdr.icmp_type = 0xff,
1855                 .hdr.icmp_code = 0xff,
1856                 .hdr.icmp_ident = RTE_BE16(0xffff),
1857                 .hdr.icmp_seq_nb = RTE_BE16(0xffff),
1858         };
1859         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1860         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
1861                                       MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1862         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
1863                                       MLX5_FLOW_LAYER_OUTER_L4;
1864         int ret;
1865
1866         if (target_protocol != 0xFF && target_protocol != IPPROTO_ICMP)
1867                 return rte_flow_error_set(error, EINVAL,
1868                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1869                                           "protocol filtering not compatible"
1870                                           " with ICMP layer");
1871         if (!(item_flags & l3m))
1872                 return rte_flow_error_set(error, EINVAL,
1873                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1874                                           "IPv4 is mandatory to filter"
1875                                           " on ICMP");
1876         if (item_flags & l4m)
1877                 return rte_flow_error_set(error, EINVAL,
1878                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1879                                           "multiple L4 layers not supported");
1880         if (!mask)
1881                 mask = &nic_mask;
1882         ret = mlx5_flow_item_acceptable
1883                 (item, (const uint8_t *)mask,
1884                  (const uint8_t *)&nic_mask,
1885                  sizeof(struct rte_flow_item_icmp),
1886                  MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
1887         if (ret < 0)
1888                 return ret;
1889         return 0;
1890 }
1891
1892 /**
1893  * Validate Ethernet item.
1894  *
1895  * @param[in] item
1896  *   Item specification.
1897  * @param[in] item_flags
1898  *   Bit-fields that holds the items detected until now.
1899  * @param[out] error
1900  *   Pointer to error structure.
1901  *
1902  * @return
1903  *   0 on success, a negative errno value otherwise and rte_errno is set.
1904  */
1905 int
1906 mlx5_flow_validate_item_eth(const struct rte_flow_item *item,
1907                             uint64_t item_flags, bool ext_vlan_sup,
1908                             struct rte_flow_error *error)
1909 {
1910         const struct rte_flow_item_eth *mask = item->mask;
1911         const struct rte_flow_item_eth nic_mask = {
1912                 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1913                 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1914                 .type = RTE_BE16(0xffff),
1915                 .has_vlan = ext_vlan_sup ? 1 : 0,
1916         };
1917         int ret;
1918         int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1919         const uint64_t ethm = tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
1920                                        MLX5_FLOW_LAYER_OUTER_L2;
1921
1922         if (item_flags & ethm)
1923                 return rte_flow_error_set(error, ENOTSUP,
1924                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1925                                           "multiple L2 layers not supported");
1926         if ((!tunnel && (item_flags & MLX5_FLOW_LAYER_OUTER_L3)) ||
1927             (tunnel && (item_flags & MLX5_FLOW_LAYER_INNER_L3)))
1928                 return rte_flow_error_set(error, EINVAL,
1929                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1930                                           "L2 layer should not follow "
1931                                           "L3 layers");
1932         if ((!tunnel && (item_flags & MLX5_FLOW_LAYER_OUTER_VLAN)) ||
1933             (tunnel && (item_flags & MLX5_FLOW_LAYER_INNER_VLAN)))
1934                 return rte_flow_error_set(error, EINVAL,
1935                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1936                                           "L2 layer should not follow VLAN");
1937         if (!mask)
1938                 mask = &rte_flow_item_eth_mask;
1939         ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
1940                                         (const uint8_t *)&nic_mask,
1941                                         sizeof(struct rte_flow_item_eth),
1942                                         MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
1943         return ret;
1944 }
1945
1946 /**
1947  * Validate VLAN item.
1948  *
1949  * @param[in] item
1950  *   Item specification.
1951  * @param[in] item_flags
1952  *   Bit-fields that holds the items detected until now.
1953  * @param[in] dev
1954  *   Ethernet device flow is being created on.
1955  * @param[out] error
1956  *   Pointer to error structure.
1957  *
1958  * @return
1959  *   0 on success, a negative errno value otherwise and rte_errno is set.
1960  */
1961 int
1962 mlx5_flow_validate_item_vlan(const struct rte_flow_item *item,
1963                              uint64_t item_flags,
1964                              struct rte_eth_dev *dev,
1965                              struct rte_flow_error *error)
1966 {
1967         const struct rte_flow_item_vlan *spec = item->spec;
1968         const struct rte_flow_item_vlan *mask = item->mask;
1969         const struct rte_flow_item_vlan nic_mask = {
1970                 .tci = RTE_BE16(UINT16_MAX),
1971                 .inner_type = RTE_BE16(UINT16_MAX),
1972         };
1973         uint16_t vlan_tag = 0;
1974         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1975         int ret;
1976         const uint64_t l34m = tunnel ? (MLX5_FLOW_LAYER_INNER_L3 |
1977                                         MLX5_FLOW_LAYER_INNER_L4) :
1978                                        (MLX5_FLOW_LAYER_OUTER_L3 |
1979                                         MLX5_FLOW_LAYER_OUTER_L4);
1980         const uint64_t vlanm = tunnel ? MLX5_FLOW_LAYER_INNER_VLAN :
1981                                         MLX5_FLOW_LAYER_OUTER_VLAN;
1982
1983         if (item_flags & vlanm)
1984                 return rte_flow_error_set(error, EINVAL,
1985                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1986                                           "multiple VLAN layers not supported");
1987         else if ((item_flags & l34m) != 0)
1988                 return rte_flow_error_set(error, EINVAL,
1989                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1990                                           "VLAN cannot follow L3/L4 layer");
1991         if (!mask)
1992                 mask = &rte_flow_item_vlan_mask;
1993         ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
1994                                         (const uint8_t *)&nic_mask,
1995                                         sizeof(struct rte_flow_item_vlan),
1996                                         MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
1997         if (ret)
1998                 return ret;
1999         if (!tunnel && mask->tci != RTE_BE16(0x0fff)) {
2000                 struct mlx5_priv *priv = dev->data->dev_private;
2001
2002                 if (priv->vmwa_context) {
2003                         /*
2004                          * Non-NULL context means we have a virtual machine
2005                          * and SR-IOV enabled, we have to create VLAN interface
2006                          * to make hypervisor to setup E-Switch vport
2007                          * context correctly. We avoid creating the multiple
2008                          * VLAN interfaces, so we cannot support VLAN tag mask.
2009                          */
2010                         return rte_flow_error_set(error, EINVAL,
2011                                                   RTE_FLOW_ERROR_TYPE_ITEM,
2012                                                   item,
2013                                                   "VLAN tag mask is not"
2014                                                   " supported in virtual"
2015                                                   " environment");
2016                 }
2017         }
2018         if (spec) {
2019                 vlan_tag = spec->tci;
2020                 vlan_tag &= mask->tci;
2021         }
2022         /*
2023          * From verbs perspective an empty VLAN is equivalent
2024          * to a packet without VLAN layer.
2025          */
2026         if (!vlan_tag)
2027                 return rte_flow_error_set(error, EINVAL,
2028                                           RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
2029                                           item->spec,
2030                                           "VLAN cannot be empty");
2031         return 0;
2032 }
2033
2034 /**
2035  * Validate IPV4 item.
2036  *
2037  * @param[in] item
2038  *   Item specification.
2039  * @param[in] item_flags
2040  *   Bit-fields that holds the items detected until now.
2041  * @param[in] last_item
2042  *   Previous validated item in the pattern items.
2043  * @param[in] ether_type
2044  *   Type in the ethernet layer header (including dot1q).
2045  * @param[in] acc_mask
2046  *   Acceptable mask, if NULL default internal default mask
2047  *   will be used to check whether item fields are supported.
2048  * @param[in] range_accepted
2049  *   True if range of values is accepted for specific fields, false otherwise.
2050  * @param[out] error
2051  *   Pointer to error structure.
2052  *
2053  * @return
2054  *   0 on success, a negative errno value otherwise and rte_errno is set.
2055  */
2056 int
2057 mlx5_flow_validate_item_ipv4(const struct rte_flow_item *item,
2058                              uint64_t item_flags,
2059                              uint64_t last_item,
2060                              uint16_t ether_type,
2061                              const struct rte_flow_item_ipv4 *acc_mask,
2062                              bool range_accepted,
2063                              struct rte_flow_error *error)
2064 {
2065         const struct rte_flow_item_ipv4 *mask = item->mask;
2066         const struct rte_flow_item_ipv4 *spec = item->spec;
2067         const struct rte_flow_item_ipv4 nic_mask = {
2068                 .hdr = {
2069                         .src_addr = RTE_BE32(0xffffffff),
2070                         .dst_addr = RTE_BE32(0xffffffff),
2071                         .type_of_service = 0xff,
2072                         .next_proto_id = 0xff,
2073                 },
2074         };
2075         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2076         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
2077                                       MLX5_FLOW_LAYER_OUTER_L3;
2078         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2079                                       MLX5_FLOW_LAYER_OUTER_L4;
2080         int ret;
2081         uint8_t next_proto = 0xFF;
2082         const uint64_t l2_vlan = (MLX5_FLOW_LAYER_L2 |
2083                                   MLX5_FLOW_LAYER_OUTER_VLAN |
2084                                   MLX5_FLOW_LAYER_INNER_VLAN);
2085
2086         if ((last_item & l2_vlan) && ether_type &&
2087             ether_type != RTE_ETHER_TYPE_IPV4)
2088                 return rte_flow_error_set(error, EINVAL,
2089                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2090                                           "IPv4 cannot follow L2/VLAN layer "
2091                                           "which ether type is not IPv4");
2092         if (item_flags & MLX5_FLOW_LAYER_IPIP) {
2093                 if (mask && spec)
2094                         next_proto = mask->hdr.next_proto_id &
2095                                      spec->hdr.next_proto_id;
2096                 if (next_proto == IPPROTO_IPIP || next_proto == IPPROTO_IPV6)
2097                         return rte_flow_error_set(error, EINVAL,
2098                                                   RTE_FLOW_ERROR_TYPE_ITEM,
2099                                                   item,
2100                                                   "multiple tunnel "
2101                                                   "not supported");
2102         }
2103         if (item_flags & MLX5_FLOW_LAYER_IPV6_ENCAP)
2104                 return rte_flow_error_set(error, EINVAL,
2105                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2106                                           "wrong tunnel type - IPv6 specified "
2107                                           "but IPv4 item provided");
2108         if (item_flags & l3m)
2109                 return rte_flow_error_set(error, ENOTSUP,
2110                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2111                                           "multiple L3 layers not supported");
2112         else if (item_flags & l4m)
2113                 return rte_flow_error_set(error, EINVAL,
2114                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2115                                           "L3 cannot follow an L4 layer.");
2116         else if ((item_flags & MLX5_FLOW_LAYER_NVGRE) &&
2117                   !(item_flags & MLX5_FLOW_LAYER_INNER_L2))
2118                 return rte_flow_error_set(error, EINVAL,
2119                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2120                                           "L3 cannot follow an NVGRE layer.");
2121         if (!mask)
2122                 mask = &rte_flow_item_ipv4_mask;
2123         else if (mask->hdr.next_proto_id != 0 &&
2124                  mask->hdr.next_proto_id != 0xff)
2125                 return rte_flow_error_set(error, EINVAL,
2126                                           RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
2127                                           "partial mask is not supported"
2128                                           " for protocol");
2129         ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
2130                                         acc_mask ? (const uint8_t *)acc_mask
2131                                                  : (const uint8_t *)&nic_mask,
2132                                         sizeof(struct rte_flow_item_ipv4),
2133                                         range_accepted, error);
2134         if (ret < 0)
2135                 return ret;
2136         return 0;
2137 }
2138
2139 /**
2140  * Validate IPV6 item.
2141  *
2142  * @param[in] item
2143  *   Item specification.
2144  * @param[in] item_flags
2145  *   Bit-fields that holds the items detected until now.
2146  * @param[in] last_item
2147  *   Previous validated item in the pattern items.
2148  * @param[in] ether_type
2149  *   Type in the ethernet layer header (including dot1q).
2150  * @param[in] acc_mask
2151  *   Acceptable mask, if NULL default internal default mask
2152  *   will be used to check whether item fields are supported.
2153  * @param[out] error
2154  *   Pointer to error structure.
2155  *
2156  * @return
2157  *   0 on success, a negative errno value otherwise and rte_errno is set.
2158  */
2159 int
2160 mlx5_flow_validate_item_ipv6(const struct rte_flow_item *item,
2161                              uint64_t item_flags,
2162                              uint64_t last_item,
2163                              uint16_t ether_type,
2164                              const struct rte_flow_item_ipv6 *acc_mask,
2165                              struct rte_flow_error *error)
2166 {
2167         const struct rte_flow_item_ipv6 *mask = item->mask;
2168         const struct rte_flow_item_ipv6 *spec = item->spec;
2169         const struct rte_flow_item_ipv6 nic_mask = {
2170                 .hdr = {
2171                         .src_addr =
2172                                 "\xff\xff\xff\xff\xff\xff\xff\xff"
2173                                 "\xff\xff\xff\xff\xff\xff\xff\xff",
2174                         .dst_addr =
2175                                 "\xff\xff\xff\xff\xff\xff\xff\xff"
2176                                 "\xff\xff\xff\xff\xff\xff\xff\xff",
2177                         .vtc_flow = RTE_BE32(0xffffffff),
2178                         .proto = 0xff,
2179                 },
2180         };
2181         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2182         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
2183                                       MLX5_FLOW_LAYER_OUTER_L3;
2184         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2185                                       MLX5_FLOW_LAYER_OUTER_L4;
2186         int ret;
2187         uint8_t next_proto = 0xFF;
2188         const uint64_t l2_vlan = (MLX5_FLOW_LAYER_L2 |
2189                                   MLX5_FLOW_LAYER_OUTER_VLAN |
2190                                   MLX5_FLOW_LAYER_INNER_VLAN);
2191
2192         if ((last_item & l2_vlan) && ether_type &&
2193             ether_type != RTE_ETHER_TYPE_IPV6)
2194                 return rte_flow_error_set(error, EINVAL,
2195                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2196                                           "IPv6 cannot follow L2/VLAN layer "
2197                                           "which ether type is not IPv6");
2198         if (mask && mask->hdr.proto == UINT8_MAX && spec)
2199                 next_proto = spec->hdr.proto;
2200         if (item_flags & MLX5_FLOW_LAYER_IPV6_ENCAP) {
2201                 if (next_proto == IPPROTO_IPIP || next_proto == IPPROTO_IPV6)
2202                         return rte_flow_error_set(error, EINVAL,
2203                                                   RTE_FLOW_ERROR_TYPE_ITEM,
2204                                                   item,
2205                                                   "multiple tunnel "
2206                                                   "not supported");
2207         }
2208         if (next_proto == IPPROTO_HOPOPTS  ||
2209             next_proto == IPPROTO_ROUTING  ||
2210             next_proto == IPPROTO_FRAGMENT ||
2211             next_proto == IPPROTO_ESP      ||
2212             next_proto == IPPROTO_AH       ||
2213             next_proto == IPPROTO_DSTOPTS)
2214                 return rte_flow_error_set(error, EINVAL,
2215                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2216                                           "IPv6 proto (next header) should "
2217                                           "not be set as extension header");
2218         if (item_flags & MLX5_FLOW_LAYER_IPIP)
2219                 return rte_flow_error_set(error, EINVAL,
2220                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2221                                           "wrong tunnel type - IPv4 specified "
2222                                           "but IPv6 item provided");
2223         if (item_flags & l3m)
2224                 return rte_flow_error_set(error, ENOTSUP,
2225                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2226                                           "multiple L3 layers not supported");
2227         else if (item_flags & l4m)
2228                 return rte_flow_error_set(error, EINVAL,
2229                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2230                                           "L3 cannot follow an L4 layer.");
2231         else if ((item_flags & MLX5_FLOW_LAYER_NVGRE) &&
2232                   !(item_flags & MLX5_FLOW_LAYER_INNER_L2))
2233                 return rte_flow_error_set(error, EINVAL,
2234                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2235                                           "L3 cannot follow an NVGRE layer.");
2236         if (!mask)
2237                 mask = &rte_flow_item_ipv6_mask;
2238         ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
2239                                         acc_mask ? (const uint8_t *)acc_mask
2240                                                  : (const uint8_t *)&nic_mask,
2241                                         sizeof(struct rte_flow_item_ipv6),
2242                                         MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2243         if (ret < 0)
2244                 return ret;
2245         return 0;
2246 }
2247
2248 /**
2249  * Validate UDP item.
2250  *
2251  * @param[in] item
2252  *   Item specification.
2253  * @param[in] item_flags
2254  *   Bit-fields that holds the items detected until now.
2255  * @param[in] target_protocol
2256  *   The next protocol in the previous item.
2257  * @param[in] flow_mask
2258  *   mlx5 flow-specific (DV, verbs, etc.) supported header fields mask.
2259  * @param[out] error
2260  *   Pointer to error structure.
2261  *
2262  * @return
2263  *   0 on success, a negative errno value otherwise and rte_errno is set.
2264  */
2265 int
2266 mlx5_flow_validate_item_udp(const struct rte_flow_item *item,
2267                             uint64_t item_flags,
2268                             uint8_t target_protocol,
2269                             struct rte_flow_error *error)
2270 {
2271         const struct rte_flow_item_udp *mask = item->mask;
2272         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2273         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
2274                                       MLX5_FLOW_LAYER_OUTER_L3;
2275         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2276                                       MLX5_FLOW_LAYER_OUTER_L4;
2277         int ret;
2278
2279         if (target_protocol != 0xff && target_protocol != IPPROTO_UDP)
2280                 return rte_flow_error_set(error, EINVAL,
2281                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2282                                           "protocol filtering not compatible"
2283                                           " with UDP layer");
2284         if (!(item_flags & l3m))
2285                 return rte_flow_error_set(error, EINVAL,
2286                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2287                                           "L3 is mandatory to filter on L4");
2288         if (item_flags & l4m)
2289                 return rte_flow_error_set(error, EINVAL,
2290                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2291                                           "multiple L4 layers not supported");
2292         if (!mask)
2293                 mask = &rte_flow_item_udp_mask;
2294         ret = mlx5_flow_item_acceptable
2295                 (item, (const uint8_t *)mask,
2296                  (const uint8_t *)&rte_flow_item_udp_mask,
2297                  sizeof(struct rte_flow_item_udp), MLX5_ITEM_RANGE_NOT_ACCEPTED,
2298                  error);
2299         if (ret < 0)
2300                 return ret;
2301         return 0;
2302 }
2303
2304 /**
2305  * Validate TCP item.
2306  *
2307  * @param[in] item
2308  *   Item specification.
2309  * @param[in] item_flags
2310  *   Bit-fields that holds the items detected until now.
2311  * @param[in] target_protocol
2312  *   The next protocol in the previous item.
2313  * @param[out] error
2314  *   Pointer to error structure.
2315  *
2316  * @return
2317  *   0 on success, a negative errno value otherwise and rte_errno is set.
2318  */
2319 int
2320 mlx5_flow_validate_item_tcp(const struct rte_flow_item *item,
2321                             uint64_t item_flags,
2322                             uint8_t target_protocol,
2323                             const struct rte_flow_item_tcp *flow_mask,
2324                             struct rte_flow_error *error)
2325 {
2326         const struct rte_flow_item_tcp *mask = item->mask;
2327         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2328         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
2329                                       MLX5_FLOW_LAYER_OUTER_L3;
2330         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2331                                       MLX5_FLOW_LAYER_OUTER_L4;
2332         int ret;
2333
2334         MLX5_ASSERT(flow_mask);
2335         if (target_protocol != 0xff && target_protocol != IPPROTO_TCP)
2336                 return rte_flow_error_set(error, EINVAL,
2337                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2338                                           "protocol filtering not compatible"
2339                                           " with TCP layer");
2340         if (!(item_flags & l3m))
2341                 return rte_flow_error_set(error, EINVAL,
2342                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2343                                           "L3 is mandatory to filter on L4");
2344         if (item_flags & l4m)
2345                 return rte_flow_error_set(error, EINVAL,
2346                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2347                                           "multiple L4 layers not supported");
2348         if (!mask)
2349                 mask = &rte_flow_item_tcp_mask;
2350         ret = mlx5_flow_item_acceptable
2351                 (item, (const uint8_t *)mask,
2352                  (const uint8_t *)flow_mask,
2353                  sizeof(struct rte_flow_item_tcp), MLX5_ITEM_RANGE_NOT_ACCEPTED,
2354                  error);
2355         if (ret < 0)
2356                 return ret;
2357         return 0;
2358 }
2359
2360 /**
2361  * Validate VXLAN item.
2362  *
2363  * @param[in] item
2364  *   Item specification.
2365  * @param[in] item_flags
2366  *   Bit-fields that holds the items detected until now.
2367  * @param[in] target_protocol
2368  *   The next protocol in the previous item.
2369  * @param[out] error
2370  *   Pointer to error structure.
2371  *
2372  * @return
2373  *   0 on success, a negative errno value otherwise and rte_errno is set.
2374  */
2375 int
2376 mlx5_flow_validate_item_vxlan(const struct rte_flow_item *item,
2377                               uint64_t item_flags,
2378                               struct rte_flow_error *error)
2379 {
2380         const struct rte_flow_item_vxlan *spec = item->spec;
2381         const struct rte_flow_item_vxlan *mask = item->mask;
2382         int ret;
2383         union vni {
2384                 uint32_t vlan_id;
2385                 uint8_t vni[4];
2386         } id = { .vlan_id = 0, };
2387
2388
2389         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2390                 return rte_flow_error_set(error, ENOTSUP,
2391                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2392                                           "multiple tunnel layers not"
2393                                           " supported");
2394         /*
2395          * Verify only UDPv4 is present as defined in
2396          * https://tools.ietf.org/html/rfc7348
2397          */
2398         if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
2399                 return rte_flow_error_set(error, EINVAL,
2400                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2401                                           "no outer UDP layer found");
2402         if (!mask)
2403                 mask = &rte_flow_item_vxlan_mask;
2404         ret = mlx5_flow_item_acceptable
2405                 (item, (const uint8_t *)mask,
2406                  (const uint8_t *)&rte_flow_item_vxlan_mask,
2407                  sizeof(struct rte_flow_item_vxlan),
2408                  MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2409         if (ret < 0)
2410                 return ret;
2411         if (spec) {
2412                 memcpy(&id.vni[1], spec->vni, 3);
2413                 memcpy(&id.vni[1], mask->vni, 3);
2414         }
2415         if (!(item_flags & MLX5_FLOW_LAYER_OUTER))
2416                 return rte_flow_error_set(error, ENOTSUP,
2417                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2418                                           "VXLAN tunnel must be fully defined");
2419         return 0;
2420 }
2421
2422 /**
2423  * Validate VXLAN_GPE item.
2424  *
2425  * @param[in] item
2426  *   Item specification.
2427  * @param[in] item_flags
2428  *   Bit-fields that holds the items detected until now.
2429  * @param[in] priv
2430  *   Pointer to the private data structure.
2431  * @param[in] target_protocol
2432  *   The next protocol in the previous item.
2433  * @param[out] error
2434  *   Pointer to error structure.
2435  *
2436  * @return
2437  *   0 on success, a negative errno value otherwise and rte_errno is set.
2438  */
2439 int
2440 mlx5_flow_validate_item_vxlan_gpe(const struct rte_flow_item *item,
2441                                   uint64_t item_flags,
2442                                   struct rte_eth_dev *dev,
2443                                   struct rte_flow_error *error)
2444 {
2445         struct mlx5_priv *priv = dev->data->dev_private;
2446         const struct rte_flow_item_vxlan_gpe *spec = item->spec;
2447         const struct rte_flow_item_vxlan_gpe *mask = item->mask;
2448         int ret;
2449         union vni {
2450                 uint32_t vlan_id;
2451                 uint8_t vni[4];
2452         } id = { .vlan_id = 0, };
2453
2454         if (!priv->config.l3_vxlan_en)
2455                 return rte_flow_error_set(error, ENOTSUP,
2456                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2457                                           "L3 VXLAN is not enabled by device"
2458                                           " parameter and/or not configured in"
2459                                           " firmware");
2460         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2461                 return rte_flow_error_set(error, ENOTSUP,
2462                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2463                                           "multiple tunnel layers not"
2464                                           " supported");
2465         /*
2466          * Verify only UDPv4 is present as defined in
2467          * https://tools.ietf.org/html/rfc7348
2468          */
2469         if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
2470                 return rte_flow_error_set(error, EINVAL,
2471                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2472                                           "no outer UDP layer found");
2473         if (!mask)
2474                 mask = &rte_flow_item_vxlan_gpe_mask;
2475         ret = mlx5_flow_item_acceptable
2476                 (item, (const uint8_t *)mask,
2477                  (const uint8_t *)&rte_flow_item_vxlan_gpe_mask,
2478                  sizeof(struct rte_flow_item_vxlan_gpe),
2479                  MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2480         if (ret < 0)
2481                 return ret;
2482         if (spec) {
2483                 if (spec->protocol)
2484                         return rte_flow_error_set(error, ENOTSUP,
2485                                                   RTE_FLOW_ERROR_TYPE_ITEM,
2486                                                   item,
2487                                                   "VxLAN-GPE protocol"
2488                                                   " not supported");
2489                 memcpy(&id.vni[1], spec->vni, 3);
2490                 memcpy(&id.vni[1], mask->vni, 3);
2491         }
2492         if (!(item_flags & MLX5_FLOW_LAYER_OUTER))
2493                 return rte_flow_error_set(error, ENOTSUP,
2494                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2495                                           "VXLAN-GPE tunnel must be fully"
2496                                           " defined");
2497         return 0;
2498 }
2499 /**
2500  * Validate GRE Key item.
2501  *
2502  * @param[in] item
2503  *   Item specification.
2504  * @param[in] item_flags
2505  *   Bit flags to mark detected items.
2506  * @param[in] gre_item
2507  *   Pointer to gre_item
2508  * @param[out] error
2509  *   Pointer to error structure.
2510  *
2511  * @return
2512  *   0 on success, a negative errno value otherwise and rte_errno is set.
2513  */
2514 int
2515 mlx5_flow_validate_item_gre_key(const struct rte_flow_item *item,
2516                                 uint64_t item_flags,
2517                                 const struct rte_flow_item *gre_item,
2518                                 struct rte_flow_error *error)
2519 {
2520         const rte_be32_t *mask = item->mask;
2521         int ret = 0;
2522         rte_be32_t gre_key_default_mask = RTE_BE32(UINT32_MAX);
2523         const struct rte_flow_item_gre *gre_spec;
2524         const struct rte_flow_item_gre *gre_mask;
2525
2526         if (item_flags & MLX5_FLOW_LAYER_GRE_KEY)
2527                 return rte_flow_error_set(error, ENOTSUP,
2528                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2529                                           "Multiple GRE key not support");
2530         if (!(item_flags & MLX5_FLOW_LAYER_GRE))
2531                 return rte_flow_error_set(error, ENOTSUP,
2532                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2533                                           "No preceding GRE header");
2534         if (item_flags & MLX5_FLOW_LAYER_INNER)
2535                 return rte_flow_error_set(error, ENOTSUP,
2536                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2537                                           "GRE key following a wrong item");
2538         gre_mask = gre_item->mask;
2539         if (!gre_mask)
2540                 gre_mask = &rte_flow_item_gre_mask;
2541         gre_spec = gre_item->spec;
2542         if (gre_spec && (gre_mask->c_rsvd0_ver & RTE_BE16(0x2000)) &&
2543                          !(gre_spec->c_rsvd0_ver & RTE_BE16(0x2000)))
2544                 return rte_flow_error_set(error, EINVAL,
2545                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2546                                           "Key bit must be on");
2547
2548         if (!mask)
2549                 mask = &gre_key_default_mask;
2550         ret = mlx5_flow_item_acceptable
2551                 (item, (const uint8_t *)mask,
2552                  (const uint8_t *)&gre_key_default_mask,
2553                  sizeof(rte_be32_t), MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2554         return ret;
2555 }
2556
2557 /**
2558  * Validate GRE item.
2559  *
2560  * @param[in] item
2561  *   Item specification.
2562  * @param[in] item_flags
2563  *   Bit flags to mark detected items.
2564  * @param[in] target_protocol
2565  *   The next protocol in the previous item.
2566  * @param[out] error
2567  *   Pointer to error structure.
2568  *
2569  * @return
2570  *   0 on success, a negative errno value otherwise and rte_errno is set.
2571  */
2572 int
2573 mlx5_flow_validate_item_gre(const struct rte_flow_item *item,
2574                             uint64_t item_flags,
2575                             uint8_t target_protocol,
2576                             struct rte_flow_error *error)
2577 {
2578         const struct rte_flow_item_gre *spec __rte_unused = item->spec;
2579         const struct rte_flow_item_gre *mask = item->mask;
2580         int ret;
2581         const struct rte_flow_item_gre nic_mask = {
2582                 .c_rsvd0_ver = RTE_BE16(0xB000),
2583                 .protocol = RTE_BE16(UINT16_MAX),
2584         };
2585
2586         if (target_protocol != 0xff && target_protocol != IPPROTO_GRE)
2587                 return rte_flow_error_set(error, EINVAL,
2588                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2589                                           "protocol filtering not compatible"
2590                                           " with this GRE layer");
2591         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2592                 return rte_flow_error_set(error, ENOTSUP,
2593                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2594                                           "multiple tunnel layers not"
2595                                           " supported");
2596         if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3))
2597                 return rte_flow_error_set(error, ENOTSUP,
2598                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2599                                           "L3 Layer is missing");
2600         if (!mask)
2601                 mask = &rte_flow_item_gre_mask;
2602         ret = mlx5_flow_item_acceptable
2603                 (item, (const uint8_t *)mask,
2604                  (const uint8_t *)&nic_mask,
2605                  sizeof(struct rte_flow_item_gre), MLX5_ITEM_RANGE_NOT_ACCEPTED,
2606                  error);
2607         if (ret < 0)
2608                 return ret;
2609 #ifndef HAVE_MLX5DV_DR
2610 #ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT
2611         if (spec && (spec->protocol & mask->protocol))
2612                 return rte_flow_error_set(error, ENOTSUP,
2613                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2614                                           "without MPLS support the"
2615                                           " specification cannot be used for"
2616                                           " filtering");
2617 #endif
2618 #endif
2619         return 0;
2620 }
2621
2622 /**
2623  * Validate Geneve item.
2624  *
2625  * @param[in] item
2626  *   Item specification.
2627  * @param[in] itemFlags
2628  *   Bit-fields that holds the items detected until now.
2629  * @param[in] enPriv
2630  *   Pointer to the private data structure.
2631  * @param[out] error
2632  *   Pointer to error structure.
2633  *
2634  * @return
2635  *   0 on success, a negative errno value otherwise and rte_errno is set.
2636  */
2637
2638 int
2639 mlx5_flow_validate_item_geneve(const struct rte_flow_item *item,
2640                                uint64_t item_flags,
2641                                struct rte_eth_dev *dev,
2642                                struct rte_flow_error *error)
2643 {
2644         struct mlx5_priv *priv = dev->data->dev_private;
2645         const struct rte_flow_item_geneve *spec = item->spec;
2646         const struct rte_flow_item_geneve *mask = item->mask;
2647         int ret;
2648         uint16_t gbhdr;
2649         uint8_t opt_len = priv->config.hca_attr.geneve_max_opt_len ?
2650                           MLX5_GENEVE_OPT_LEN_1 : MLX5_GENEVE_OPT_LEN_0;
2651         const struct rte_flow_item_geneve nic_mask = {
2652                 .ver_opt_len_o_c_rsvd0 = RTE_BE16(0x3f80),
2653                 .vni = "\xff\xff\xff",
2654                 .protocol = RTE_BE16(UINT16_MAX),
2655         };
2656
2657         if (!priv->config.hca_attr.tunnel_stateless_geneve_rx)
2658                 return rte_flow_error_set(error, ENOTSUP,
2659                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2660                                           "L3 Geneve is not enabled by device"
2661                                           " parameter and/or not configured in"
2662                                           " firmware");
2663         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2664                 return rte_flow_error_set(error, ENOTSUP,
2665                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2666                                           "multiple tunnel layers not"
2667                                           " supported");
2668         /*
2669          * Verify only UDPv4 is present as defined in
2670          * https://tools.ietf.org/html/rfc7348
2671          */
2672         if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
2673                 return rte_flow_error_set(error, EINVAL,
2674                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2675                                           "no outer UDP layer found");
2676         if (!mask)
2677                 mask = &rte_flow_item_geneve_mask;
2678         ret = mlx5_flow_item_acceptable
2679                                   (item, (const uint8_t *)mask,
2680                                    (const uint8_t *)&nic_mask,
2681                                    sizeof(struct rte_flow_item_geneve),
2682                                    MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2683         if (ret)
2684                 return ret;
2685         if (spec) {
2686                 gbhdr = rte_be_to_cpu_16(spec->ver_opt_len_o_c_rsvd0);
2687                 if (MLX5_GENEVE_VER_VAL(gbhdr) ||
2688                      MLX5_GENEVE_CRITO_VAL(gbhdr) ||
2689                      MLX5_GENEVE_RSVD_VAL(gbhdr) || spec->rsvd1)
2690                         return rte_flow_error_set(error, ENOTSUP,
2691                                                   RTE_FLOW_ERROR_TYPE_ITEM,
2692                                                   item,
2693                                                   "Geneve protocol unsupported"
2694                                                   " fields are being used");
2695                 if (MLX5_GENEVE_OPTLEN_VAL(gbhdr) > opt_len)
2696                         return rte_flow_error_set
2697                                         (error, ENOTSUP,
2698                                          RTE_FLOW_ERROR_TYPE_ITEM,
2699                                          item,
2700                                          "Unsupported Geneve options length");
2701         }
2702         if (!(item_flags & MLX5_FLOW_LAYER_OUTER))
2703                 return rte_flow_error_set
2704                                     (error, ENOTSUP,
2705                                      RTE_FLOW_ERROR_TYPE_ITEM, item,
2706                                      "Geneve tunnel must be fully defined");
2707         return 0;
2708 }
2709
2710 /**
2711  * Validate Geneve TLV option item.
2712  *
2713  * @param[in] item
2714  *   Item specification.
2715  * @param[in] last_item
2716  *   Previous validated item in the pattern items.
2717  * @param[in] geneve_item
2718  *   Previous GENEVE item specification.
2719  * @param[in] dev
2720  *   Pointer to the rte_eth_dev structure.
2721  * @param[out] error
2722  *   Pointer to error structure.
2723  *
2724  * @return
2725  *   0 on success, a negative errno value otherwise and rte_errno is set.
2726  */
2727 int
2728 mlx5_flow_validate_item_geneve_opt(const struct rte_flow_item *item,
2729                                    uint64_t last_item,
2730                                    const struct rte_flow_item *geneve_item,
2731                                    struct rte_eth_dev *dev,
2732                                    struct rte_flow_error *error)
2733 {
2734         struct mlx5_priv *priv = dev->data->dev_private;
2735         struct mlx5_dev_ctx_shared *sh = priv->sh;
2736         struct mlx5_geneve_tlv_option_resource *geneve_opt_resource;
2737         struct mlx5_hca_attr *hca_attr = &priv->config.hca_attr;
2738         uint8_t data_max_supported =
2739                         hca_attr->max_geneve_tlv_option_data_len * 4;
2740         struct mlx5_dev_config *config = &priv->config;
2741         const struct rte_flow_item_geneve *geneve_spec;
2742         const struct rte_flow_item_geneve *geneve_mask;
2743         const struct rte_flow_item_geneve_opt *spec = item->spec;
2744         const struct rte_flow_item_geneve_opt *mask = item->mask;
2745         unsigned int i;
2746         unsigned int data_len;
2747         uint8_t tlv_option_len;
2748         uint16_t optlen_m, optlen_v;
2749         const struct rte_flow_item_geneve_opt full_mask = {
2750                 .option_class = RTE_BE16(0xffff),
2751                 .option_type = 0xff,
2752                 .option_len = 0x1f,
2753         };
2754
2755         if (!mask)
2756                 mask = &rte_flow_item_geneve_opt_mask;
2757         if (!spec)
2758                 return rte_flow_error_set
2759                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
2760                         "Geneve TLV opt class/type/length must be specified");
2761         if ((uint32_t)spec->option_len > MLX5_GENEVE_OPTLEN_MASK)
2762                 return rte_flow_error_set
2763                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
2764                         "Geneve TLV opt length exceeeds the limit (31)");
2765         /* Check if class type and length masks are full. */
2766         if (full_mask.option_class != mask->option_class ||
2767             full_mask.option_type != mask->option_type ||
2768             full_mask.option_len != (mask->option_len & full_mask.option_len))
2769                 return rte_flow_error_set
2770                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
2771                         "Geneve TLV opt class/type/length masks must be full");
2772         /* Check if length is supported */
2773         if ((uint32_t)spec->option_len >
2774                         config->hca_attr.max_geneve_tlv_option_data_len)
2775                 return rte_flow_error_set
2776                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
2777                         "Geneve TLV opt length not supported");
2778         if (config->hca_attr.max_geneve_tlv_options > 1)
2779                 DRV_LOG(DEBUG,
2780                         "max_geneve_tlv_options supports more than 1 option");
2781         /* Check GENEVE item preceding. */
2782         if (!geneve_item || !(last_item & MLX5_FLOW_LAYER_GENEVE))
2783                 return rte_flow_error_set
2784                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
2785                         "Geneve opt item must be preceded with Geneve item");
2786         geneve_spec = geneve_item->spec;
2787         geneve_mask = geneve_item->mask ? geneve_item->mask :
2788                                           &rte_flow_item_geneve_mask;
2789         /* Check if GENEVE TLV option size doesn't exceed option length */
2790         if (geneve_spec && (geneve_mask->ver_opt_len_o_c_rsvd0 ||
2791                             geneve_spec->ver_opt_len_o_c_rsvd0)) {
2792                 tlv_option_len = spec->option_len & mask->option_len;
2793                 optlen_v = rte_be_to_cpu_16(geneve_spec->ver_opt_len_o_c_rsvd0);
2794                 optlen_v = MLX5_GENEVE_OPTLEN_VAL(optlen_v);
2795                 optlen_m = rte_be_to_cpu_16(geneve_mask->ver_opt_len_o_c_rsvd0);
2796                 optlen_m = MLX5_GENEVE_OPTLEN_VAL(optlen_m);
2797                 if ((optlen_v & optlen_m) <= tlv_option_len)
2798                         return rte_flow_error_set
2799                                 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
2800                                  "GENEVE TLV option length exceeds optlen");
2801         }
2802         /* Check if length is 0 or data is 0. */
2803         if (spec->data == NULL || spec->option_len == 0)
2804                 return rte_flow_error_set
2805                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
2806                         "Geneve TLV opt with zero data/length not supported");
2807         /* Check not all data & mask are 0. */
2808         data_len = spec->option_len * 4;
2809         if (mask->data == NULL) {
2810                 for (i = 0; i < data_len; i++)
2811                         if (spec->data[i])
2812                                 break;
2813                 if (i == data_len)
2814                         return rte_flow_error_set(error, ENOTSUP,
2815                                 RTE_FLOW_ERROR_TYPE_ITEM, item,
2816                                 "Can't match on Geneve option data 0");
2817         } else {
2818                 for (i = 0; i < data_len; i++)
2819                         if (spec->data[i] & mask->data[i])
2820                                 break;
2821                 if (i == data_len)
2822                         return rte_flow_error_set(error, ENOTSUP,
2823                                 RTE_FLOW_ERROR_TYPE_ITEM, item,
2824                                 "Can't match on Geneve option data and mask 0");
2825                 /* Check data mask supported. */
2826                 for (i = data_max_supported; i < data_len ; i++)
2827                         if (mask->data[i])
2828                                 return rte_flow_error_set(error, ENOTSUP,
2829                                         RTE_FLOW_ERROR_TYPE_ITEM, item,
2830                                         "Data mask is of unsupported size");
2831         }
2832         /* Check GENEVE option is supported in NIC. */
2833         if (!config->hca_attr.geneve_tlv_opt)
2834                 return rte_flow_error_set
2835                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM, item,
2836                         "Geneve TLV opt not supported");
2837         /* Check if we already have geneve option with different type/class. */
2838         rte_spinlock_lock(&sh->geneve_tlv_opt_sl);
2839         geneve_opt_resource = sh->geneve_tlv_option_resource;
2840         if (geneve_opt_resource != NULL)
2841                 if (geneve_opt_resource->option_class != spec->option_class ||
2842                     geneve_opt_resource->option_type != spec->option_type ||
2843                     geneve_opt_resource->length != spec->option_len) {
2844                         rte_spinlock_unlock(&sh->geneve_tlv_opt_sl);
2845                         return rte_flow_error_set(error, ENOTSUP,
2846                                 RTE_FLOW_ERROR_TYPE_ITEM, item,
2847                                 "Only one Geneve TLV option supported");
2848                 }
2849         rte_spinlock_unlock(&sh->geneve_tlv_opt_sl);
2850         return 0;
2851 }
2852
2853 /**
2854  * Validate MPLS item.
2855  *
2856  * @param[in] dev
2857  *   Pointer to the rte_eth_dev structure.
2858  * @param[in] item
2859  *   Item specification.
2860  * @param[in] item_flags
2861  *   Bit-fields that holds the items detected until now.
2862  * @param[in] prev_layer
2863  *   The protocol layer indicated in previous item.
2864  * @param[out] error
2865  *   Pointer to error structure.
2866  *
2867  * @return
2868  *   0 on success, a negative errno value otherwise and rte_errno is set.
2869  */
2870 int
2871 mlx5_flow_validate_item_mpls(struct rte_eth_dev *dev __rte_unused,
2872                              const struct rte_flow_item *item __rte_unused,
2873                              uint64_t item_flags __rte_unused,
2874                              uint64_t prev_layer __rte_unused,
2875                              struct rte_flow_error *error)
2876 {
2877 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
2878         const struct rte_flow_item_mpls *mask = item->mask;
2879         struct mlx5_priv *priv = dev->data->dev_private;
2880         int ret;
2881
2882         if (!priv->config.mpls_en)
2883                 return rte_flow_error_set(error, ENOTSUP,
2884                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2885                                           "MPLS not supported or"
2886                                           " disabled in firmware"
2887                                           " configuration.");
2888         /* MPLS over IP, UDP, GRE is allowed */
2889         if (!(prev_layer & (MLX5_FLOW_LAYER_OUTER_L3 |
2890                             MLX5_FLOW_LAYER_OUTER_L4_UDP |
2891                             MLX5_FLOW_LAYER_GRE |
2892                             MLX5_FLOW_LAYER_GRE_KEY)))
2893                 return rte_flow_error_set(error, EINVAL,
2894                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2895                                           "protocol filtering not compatible"
2896                                           " with MPLS layer");
2897         /* Multi-tunnel isn't allowed but MPLS over GRE is an exception. */
2898         if ((item_flags & MLX5_FLOW_LAYER_TUNNEL) &&
2899             !(item_flags & MLX5_FLOW_LAYER_GRE))
2900                 return rte_flow_error_set(error, ENOTSUP,
2901                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2902                                           "multiple tunnel layers not"
2903                                           " supported");
2904         if (!mask)
2905                 mask = &rte_flow_item_mpls_mask;
2906         ret = mlx5_flow_item_acceptable
2907                 (item, (const uint8_t *)mask,
2908                  (const uint8_t *)&rte_flow_item_mpls_mask,
2909                  sizeof(struct rte_flow_item_mpls),
2910                  MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2911         if (ret < 0)
2912                 return ret;
2913         return 0;
2914 #else
2915         return rte_flow_error_set(error, ENOTSUP,
2916                                   RTE_FLOW_ERROR_TYPE_ITEM, item,
2917                                   "MPLS is not supported by Verbs, please"
2918                                   " update.");
2919 #endif
2920 }
2921
2922 /**
2923  * Validate NVGRE item.
2924  *
2925  * @param[in] item
2926  *   Item specification.
2927  * @param[in] item_flags
2928  *   Bit flags to mark detected items.
2929  * @param[in] target_protocol
2930  *   The next protocol in the previous item.
2931  * @param[out] error
2932  *   Pointer to error structure.
2933  *
2934  * @return
2935  *   0 on success, a negative errno value otherwise and rte_errno is set.
2936  */
2937 int
2938 mlx5_flow_validate_item_nvgre(const struct rte_flow_item *item,
2939                               uint64_t item_flags,
2940                               uint8_t target_protocol,
2941                               struct rte_flow_error *error)
2942 {
2943         const struct rte_flow_item_nvgre *mask = item->mask;
2944         int ret;
2945
2946         if (target_protocol != 0xff && target_protocol != IPPROTO_GRE)
2947                 return rte_flow_error_set(error, EINVAL,
2948                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2949                                           "protocol filtering not compatible"
2950                                           " with this GRE layer");
2951         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2952                 return rte_flow_error_set(error, ENOTSUP,
2953                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2954                                           "multiple tunnel layers not"
2955                                           " supported");
2956         if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3))
2957                 return rte_flow_error_set(error, ENOTSUP,
2958                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2959                                           "L3 Layer is missing");
2960         if (!mask)
2961                 mask = &rte_flow_item_nvgre_mask;
2962         ret = mlx5_flow_item_acceptable
2963                 (item, (const uint8_t *)mask,
2964                  (const uint8_t *)&rte_flow_item_nvgre_mask,
2965                  sizeof(struct rte_flow_item_nvgre),
2966                  MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2967         if (ret < 0)
2968                 return ret;
2969         return 0;
2970 }
2971
2972 /**
2973  * Validate eCPRI item.
2974  *
2975  * @param[in] item
2976  *   Item specification.
2977  * @param[in] item_flags
2978  *   Bit-fields that holds the items detected until now.
2979  * @param[in] last_item
2980  *   Previous validated item in the pattern items.
2981  * @param[in] ether_type
2982  *   Type in the ethernet layer header (including dot1q).
2983  * @param[in] acc_mask
2984  *   Acceptable mask, if NULL default internal default mask
2985  *   will be used to check whether item fields are supported.
2986  * @param[out] error
2987  *   Pointer to error structure.
2988  *
2989  * @return
2990  *   0 on success, a negative errno value otherwise and rte_errno is set.
2991  */
2992 int
2993 mlx5_flow_validate_item_ecpri(const struct rte_flow_item *item,
2994                               uint64_t item_flags,
2995                               uint64_t last_item,
2996                               uint16_t ether_type,
2997                               const struct rte_flow_item_ecpri *acc_mask,
2998                               struct rte_flow_error *error)
2999 {
3000         const struct rte_flow_item_ecpri *mask = item->mask;
3001         const struct rte_flow_item_ecpri nic_mask = {
3002                 .hdr = {
3003                         .common = {
3004                                 .u32 =
3005                                 RTE_BE32(((const struct rte_ecpri_common_hdr) {
3006                                         .type = 0xFF,
3007                                         }).u32),
3008                         },
3009                         .dummy[0] = 0xFFFFFFFF,
3010                 },
3011         };
3012         const uint64_t outer_l2_vlan = (MLX5_FLOW_LAYER_OUTER_L2 |
3013                                         MLX5_FLOW_LAYER_OUTER_VLAN);
3014         struct rte_flow_item_ecpri mask_lo;
3015
3016         if (!(last_item & outer_l2_vlan) &&
3017             last_item != MLX5_FLOW_LAYER_OUTER_L4_UDP)
3018                 return rte_flow_error_set(error, EINVAL,
3019                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3020                                           "eCPRI can only follow L2/VLAN layer or UDP layer");
3021         if ((last_item & outer_l2_vlan) && ether_type &&
3022             ether_type != RTE_ETHER_TYPE_ECPRI)
3023                 return rte_flow_error_set(error, EINVAL,
3024                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3025                                           "eCPRI cannot follow L2/VLAN layer which ether type is not 0xAEFE");
3026         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
3027                 return rte_flow_error_set(error, EINVAL,
3028                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3029                                           "eCPRI with tunnel is not supported right now");
3030         if (item_flags & MLX5_FLOW_LAYER_OUTER_L3)
3031                 return rte_flow_error_set(error, ENOTSUP,
3032                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3033                                           "multiple L3 layers not supported");
3034         else if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_TCP)
3035                 return rte_flow_error_set(error, EINVAL,
3036                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3037                                           "eCPRI cannot coexist with a TCP layer");
3038         /* In specification, eCPRI could be over UDP layer. */
3039         else if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)
3040                 return rte_flow_error_set(error, EINVAL,
3041                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3042                                           "eCPRI over UDP layer is not yet supported right now");
3043         /* Mask for type field in common header could be zero. */
3044         if (!mask)
3045                 mask = &rte_flow_item_ecpri_mask;
3046         mask_lo.hdr.common.u32 = rte_be_to_cpu_32(mask->hdr.common.u32);
3047         /* Input mask is in big-endian format. */
3048         if (mask_lo.hdr.common.type != 0 && mask_lo.hdr.common.type != 0xff)
3049                 return rte_flow_error_set(error, EINVAL,
3050                                           RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
3051                                           "partial mask is not supported for protocol");
3052         else if (mask_lo.hdr.common.type == 0 && mask->hdr.dummy[0] != 0)
3053                 return rte_flow_error_set(error, EINVAL,
3054                                           RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
3055                                           "message header mask must be after a type mask");
3056         return mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
3057                                          acc_mask ? (const uint8_t *)acc_mask
3058                                                   : (const uint8_t *)&nic_mask,
3059                                          sizeof(struct rte_flow_item_ecpri),
3060                                          MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
3061 }
3062
3063 /**
3064  * Release resource related QUEUE/RSS action split.
3065  *
3066  * @param dev
3067  *   Pointer to Ethernet device.
3068  * @param flow
3069  *   Flow to release id's from.
3070  */
3071 static void
3072 flow_mreg_split_qrss_release(struct rte_eth_dev *dev,
3073                              struct rte_flow *flow)
3074 {
3075         struct mlx5_priv *priv = dev->data->dev_private;
3076         uint32_t handle_idx;
3077         struct mlx5_flow_handle *dev_handle;
3078
3079         SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles,
3080                        handle_idx, dev_handle, next)
3081                 if (dev_handle->split_flow_id &&
3082                     !dev_handle->is_meter_flow_id)
3083                         mlx5_ipool_free(priv->sh->ipool
3084                                         [MLX5_IPOOL_RSS_EXPANTION_FLOW_ID],
3085                                         dev_handle->split_flow_id);
3086 }
3087
3088 static int
3089 flow_null_validate(struct rte_eth_dev *dev __rte_unused,
3090                    const struct rte_flow_attr *attr __rte_unused,
3091                    const struct rte_flow_item items[] __rte_unused,
3092                    const struct rte_flow_action actions[] __rte_unused,
3093                    bool external __rte_unused,
3094                    int hairpin __rte_unused,
3095                    struct rte_flow_error *error)
3096 {
3097         return rte_flow_error_set(error, ENOTSUP,
3098                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
3099 }
3100
3101 static struct mlx5_flow *
3102 flow_null_prepare(struct rte_eth_dev *dev __rte_unused,
3103                   const struct rte_flow_attr *attr __rte_unused,
3104                   const struct rte_flow_item items[] __rte_unused,
3105                   const struct rte_flow_action actions[] __rte_unused,
3106                   struct rte_flow_error *error)
3107 {
3108         rte_flow_error_set(error, ENOTSUP,
3109                            RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
3110         return NULL;
3111 }
3112
3113 static int
3114 flow_null_translate(struct rte_eth_dev *dev __rte_unused,
3115                     struct mlx5_flow *dev_flow __rte_unused,
3116                     const struct rte_flow_attr *attr __rte_unused,
3117                     const struct rte_flow_item items[] __rte_unused,
3118                     const struct rte_flow_action actions[] __rte_unused,
3119                     struct rte_flow_error *error)
3120 {
3121         return rte_flow_error_set(error, ENOTSUP,
3122                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
3123 }
3124
3125 static int
3126 flow_null_apply(struct rte_eth_dev *dev __rte_unused,
3127                 struct rte_flow *flow __rte_unused,
3128                 struct rte_flow_error *error)
3129 {
3130         return rte_flow_error_set(error, ENOTSUP,
3131                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
3132 }
3133
3134 static void
3135 flow_null_remove(struct rte_eth_dev *dev __rte_unused,
3136                  struct rte_flow *flow __rte_unused)
3137 {
3138 }
3139
3140 static void
3141 flow_null_destroy(struct rte_eth_dev *dev __rte_unused,
3142                   struct rte_flow *flow __rte_unused)
3143 {
3144 }
3145
3146 static int
3147 flow_null_query(struct rte_eth_dev *dev __rte_unused,
3148                 struct rte_flow *flow __rte_unused,
3149                 const struct rte_flow_action *actions __rte_unused,
3150                 void *data __rte_unused,
3151                 struct rte_flow_error *error)
3152 {
3153         return rte_flow_error_set(error, ENOTSUP,
3154                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
3155 }
3156
3157 static int
3158 flow_null_sync_domain(struct rte_eth_dev *dev __rte_unused,
3159                       uint32_t domains __rte_unused,
3160                       uint32_t flags __rte_unused)
3161 {
3162         return 0;
3163 }
3164
3165 /* Void driver to protect from null pointer reference. */
3166 const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops = {
3167         .validate = flow_null_validate,
3168         .prepare = flow_null_prepare,
3169         .translate = flow_null_translate,
3170         .apply = flow_null_apply,
3171         .remove = flow_null_remove,
3172         .destroy = flow_null_destroy,
3173         .query = flow_null_query,
3174         .sync_domain = flow_null_sync_domain,
3175 };
3176
3177 /**
3178  * Select flow driver type according to flow attributes and device
3179  * configuration.
3180  *
3181  * @param[in] dev
3182  *   Pointer to the dev structure.
3183  * @param[in] attr
3184  *   Pointer to the flow attributes.
3185  *
3186  * @return
3187  *   flow driver type, MLX5_FLOW_TYPE_MAX otherwise.
3188  */
3189 static enum mlx5_flow_drv_type
3190 flow_get_drv_type(struct rte_eth_dev *dev, const struct rte_flow_attr *attr)
3191 {
3192         struct mlx5_priv *priv = dev->data->dev_private;
3193         /* The OS can determine first a specific flow type (DV, VERBS) */
3194         enum mlx5_flow_drv_type type = mlx5_flow_os_get_type();
3195
3196         if (type != MLX5_FLOW_TYPE_MAX)
3197                 return type;
3198         /* If no OS specific type - continue with DV/VERBS selection */
3199         if (attr->transfer && priv->config.dv_esw_en)
3200                 type = MLX5_FLOW_TYPE_DV;
3201         if (!attr->transfer)
3202                 type = priv->config.dv_flow_en ? MLX5_FLOW_TYPE_DV :
3203                                                  MLX5_FLOW_TYPE_VERBS;
3204         return type;
3205 }
3206
3207 #define flow_get_drv_ops(type) flow_drv_ops[type]
3208
3209 /**
3210  * Flow driver validation API. This abstracts calling driver specific functions.
3211  * The type of flow driver is determined according to flow attributes.
3212  *
3213  * @param[in] dev
3214  *   Pointer to the dev structure.
3215  * @param[in] attr
3216  *   Pointer to the flow attributes.
3217  * @param[in] items
3218  *   Pointer to the list of items.
3219  * @param[in] actions
3220  *   Pointer to the list of actions.
3221  * @param[in] external
3222  *   This flow rule is created by request external to PMD.
3223  * @param[in] hairpin
3224  *   Number of hairpin TX actions, 0 means classic flow.
3225  * @param[out] error
3226  *   Pointer to the error structure.
3227  *
3228  * @return
3229  *   0 on success, a negative errno value otherwise and rte_errno is set.
3230  */
3231 static inline int
3232 flow_drv_validate(struct rte_eth_dev *dev,
3233                   const struct rte_flow_attr *attr,
3234                   const struct rte_flow_item items[],
3235                   const struct rte_flow_action actions[],
3236                   bool external, int hairpin, struct rte_flow_error *error)
3237 {
3238         const struct mlx5_flow_driver_ops *fops;
3239         enum mlx5_flow_drv_type type = flow_get_drv_type(dev, attr);
3240
3241         fops = flow_get_drv_ops(type);
3242         return fops->validate(dev, attr, items, actions, external,
3243                               hairpin, error);
3244 }
3245
3246 /**
3247  * Flow driver preparation API. This abstracts calling driver specific
3248  * functions. Parent flow (rte_flow) should have driver type (drv_type). It
3249  * calculates the size of memory required for device flow, allocates the memory,
3250  * initializes the device flow and returns the pointer.
3251  *
3252  * @note
3253  *   This function initializes device flow structure such as dv or verbs in
3254  *   struct mlx5_flow. However, it is caller's responsibility to initialize the
3255  *   rest. For example, adding returning device flow to flow->dev_flow list and
3256  *   setting backward reference to the flow should be done out of this function.
3257  *   layers field is not filled either.
3258  *
3259  * @param[in] dev
3260  *   Pointer to the dev structure.
3261  * @param[in] attr
3262  *   Pointer to the flow attributes.
3263  * @param[in] items
3264  *   Pointer to the list of items.
3265  * @param[in] actions
3266  *   Pointer to the list of actions.
3267  * @param[in] flow_idx
3268  *   This memory pool index to the flow.
3269  * @param[out] error
3270  *   Pointer to the error structure.
3271  *
3272  * @return
3273  *   Pointer to device flow on success, otherwise NULL and rte_errno is set.
3274  */
3275 static inline struct mlx5_flow *
3276 flow_drv_prepare(struct rte_eth_dev *dev,
3277                  const struct rte_flow *flow,
3278                  const struct rte_flow_attr *attr,
3279                  const struct rte_flow_item items[],
3280                  const struct rte_flow_action actions[],
3281                  uint32_t flow_idx,
3282                  struct rte_flow_error *error)
3283 {
3284         const struct mlx5_flow_driver_ops *fops;
3285         enum mlx5_flow_drv_type type = flow->drv_type;
3286         struct mlx5_flow *mlx5_flow = NULL;
3287
3288         MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
3289         fops = flow_get_drv_ops(type);
3290         mlx5_flow = fops->prepare(dev, attr, items, actions, error);
3291         if (mlx5_flow)
3292                 mlx5_flow->flow_idx = flow_idx;
3293         return mlx5_flow;
3294 }
3295
3296 /**
3297  * Flow driver translation API. This abstracts calling driver specific
3298  * functions. Parent flow (rte_flow) should have driver type (drv_type). It
3299  * translates a generic flow into a driver flow. flow_drv_prepare() must
3300  * precede.
3301  *
3302  * @note
3303  *   dev_flow->layers could be filled as a result of parsing during translation
3304  *   if needed by flow_drv_apply(). dev_flow->flow->actions can also be filled
3305  *   if necessary. As a flow can have multiple dev_flows by RSS flow expansion,
3306  *   flow->actions could be overwritten even though all the expanded dev_flows
3307  *   have the same actions.
3308  *
3309  * @param[in] dev
3310  *   Pointer to the rte dev structure.
3311  * @param[in, out] dev_flow
3312  *   Pointer to the mlx5 flow.
3313  * @param[in] attr
3314  *   Pointer to the flow attributes.
3315  * @param[in] items
3316  *   Pointer to the list of items.
3317  * @param[in] actions
3318  *   Pointer to the list of actions.
3319  * @param[out] error
3320  *   Pointer to the error structure.
3321  *
3322  * @return
3323  *   0 on success, a negative errno value otherwise and rte_errno is set.
3324  */
3325 static inline int
3326 flow_drv_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
3327                    const struct rte_flow_attr *attr,
3328                    const struct rte_flow_item items[],
3329                    const struct rte_flow_action actions[],
3330                    struct rte_flow_error *error)
3331 {
3332         const struct mlx5_flow_driver_ops *fops;
3333         enum mlx5_flow_drv_type type = dev_flow->flow->drv_type;
3334
3335         MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
3336         fops = flow_get_drv_ops(type);
3337         return fops->translate(dev, dev_flow, attr, items, actions, error);
3338 }
3339
3340 /**
3341  * Flow driver apply API. This abstracts calling driver specific functions.
3342  * Parent flow (rte_flow) should have driver type (drv_type). It applies
3343  * translated driver flows on to device. flow_drv_translate() must precede.
3344  *
3345  * @param[in] dev
3346  *   Pointer to Ethernet device structure.
3347  * @param[in, out] flow
3348  *   Pointer to flow structure.
3349  * @param[out] error
3350  *   Pointer to error structure.
3351  *
3352  * @return
3353  *   0 on success, a negative errno value otherwise and rte_errno is set.
3354  */
3355 static inline int
3356 flow_drv_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
3357                struct rte_flow_error *error)
3358 {
3359         const struct mlx5_flow_driver_ops *fops;
3360         enum mlx5_flow_drv_type type = flow->drv_type;
3361
3362         MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
3363         fops = flow_get_drv_ops(type);
3364         return fops->apply(dev, flow, error);
3365 }
3366
3367 /**
3368  * Flow driver destroy API. This abstracts calling driver specific functions.
3369  * Parent flow (rte_flow) should have driver type (drv_type). It removes a flow
3370  * on device and releases resources of the flow.
3371  *
3372  * @param[in] dev
3373  *   Pointer to Ethernet device.
3374  * @param[in, out] flow
3375  *   Pointer to flow structure.
3376  */
3377 static inline void
3378 flow_drv_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
3379 {
3380         const struct mlx5_flow_driver_ops *fops;
3381         enum mlx5_flow_drv_type type = flow->drv_type;
3382
3383         flow_mreg_split_qrss_release(dev, flow);
3384         MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
3385         fops = flow_get_drv_ops(type);
3386         fops->destroy(dev, flow);
3387 }
3388
3389 /**
3390  * Flow driver find RSS policy tbl API. This abstracts calling driver
3391  * specific functions. Parent flow (rte_flow) should have driver
3392  * type (drv_type). It will find the RSS policy table that has the rss_desc.
3393  *
3394  * @param[in] dev
3395  *   Pointer to Ethernet device.
3396  * @param[in, out] flow
3397  *   Pointer to flow structure.
3398  * @param[in] policy
3399  *   Pointer to meter policy table.
3400  * @param[in] rss_desc
3401  *   Pointer to rss_desc
3402  */
3403 static struct mlx5_flow_meter_sub_policy *
3404 flow_drv_meter_sub_policy_rss_prepare(struct rte_eth_dev *dev,
3405                 struct rte_flow *flow,
3406                 struct mlx5_flow_meter_policy *policy,
3407                 struct mlx5_flow_rss_desc *rss_desc[MLX5_MTR_RTE_COLORS])
3408 {
3409         const struct mlx5_flow_driver_ops *fops;
3410         enum mlx5_flow_drv_type type = flow->drv_type;
3411
3412         MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
3413         fops = flow_get_drv_ops(type);
3414         return fops->meter_sub_policy_rss_prepare(dev, policy, rss_desc);
3415 }
3416
3417 /**
3418  * Get RSS action from the action list.
3419  *
3420  * @param[in] dev
3421  *   Pointer to Ethernet device.
3422  * @param[in] actions
3423  *   Pointer to the list of actions.
3424  * @param[in] flow
3425  *   Parent flow structure pointer.
3426  *
3427  * @return
3428  *   Pointer to the RSS action if exist, else return NULL.
3429  */
3430 static const struct rte_flow_action_rss*
3431 flow_get_rss_action(struct rte_eth_dev *dev,
3432                     const struct rte_flow_action actions[])
3433 {
3434         struct mlx5_priv *priv = dev->data->dev_private;
3435         const struct rte_flow_action_rss *rss = NULL;
3436
3437         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
3438                 switch (actions->type) {
3439                 case RTE_FLOW_ACTION_TYPE_RSS:
3440                         rss = actions->conf;
3441                         break;
3442                 case RTE_FLOW_ACTION_TYPE_SAMPLE:
3443                 {
3444                         const struct rte_flow_action_sample *sample =
3445                                                                 actions->conf;
3446                         const struct rte_flow_action *act = sample->actions;
3447                         for (; act->type != RTE_FLOW_ACTION_TYPE_END; act++)
3448                                 if (act->type == RTE_FLOW_ACTION_TYPE_RSS)
3449                                         rss = act->conf;
3450                         break;
3451                 }
3452                 case RTE_FLOW_ACTION_TYPE_METER:
3453                 {
3454                         uint32_t mtr_idx;
3455                         struct mlx5_flow_meter_info *fm;
3456                         struct mlx5_flow_meter_policy *policy;
3457                         const struct rte_flow_action_meter *mtr = actions->conf;
3458
3459                         fm = mlx5_flow_meter_find(priv, mtr->mtr_id, &mtr_idx);
3460                         if (fm) {
3461                                 policy = mlx5_flow_meter_policy_find(dev,
3462                                                 fm->policy_id, NULL);
3463                                 if (policy && policy->is_rss)
3464                                         rss =
3465                                 policy->act_cnt[RTE_COLOR_GREEN].rss->conf;
3466                         }
3467                         break;
3468                 }
3469                 default:
3470                         break;
3471                 }
3472         }
3473         return rss;
3474 }
3475
3476 /**
3477  * Get ASO age action by index.
3478  *
3479  * @param[in] dev
3480  *   Pointer to the Ethernet device structure.
3481  * @param[in] age_idx
3482  *   Index to the ASO age action.
3483  *
3484  * @return
3485  *   The specified ASO age action.
3486  */
3487 struct mlx5_aso_age_action*
3488 flow_aso_age_get_by_idx(struct rte_eth_dev *dev, uint32_t age_idx)
3489 {
3490         uint16_t pool_idx = age_idx & UINT16_MAX;
3491         uint16_t offset = (age_idx >> 16) & UINT16_MAX;
3492         struct mlx5_priv *priv = dev->data->dev_private;
3493         struct mlx5_aso_age_mng *mng = priv->sh->aso_age_mng;
3494         struct mlx5_aso_age_pool *pool = mng->pools[pool_idx];
3495
3496         return &pool->actions[offset - 1];
3497 }
3498
3499 /* maps indirect action to translated direct in some actions array */
3500 struct mlx5_translated_action_handle {
3501         struct rte_flow_action_handle *action; /**< Indirect action handle. */
3502         int index; /**< Index in related array of rte_flow_action. */
3503 };
3504
3505 /**
3506  * Translates actions of type RTE_FLOW_ACTION_TYPE_INDIRECT to related
3507  * direct action if translation possible.
3508  * This functionality used to run same execution path for both direct and
3509  * indirect actions on flow create. All necessary preparations for indirect
3510  * action handling should be performed on *handle* actions list returned
3511  * from this call.
3512  *
3513  * @param[in] dev
3514  *   Pointer to Ethernet device.
3515  * @param[in] actions
3516  *   List of actions to translate.
3517  * @param[out] handle
3518  *   List to store translated indirect action object handles.
3519  * @param[in, out] indir_n
3520  *   Size of *handle* array. On return should be updated with number of
3521  *   indirect actions retrieved from the *actions* list.
3522  * @param[out] translated_actions
3523  *   List of actions where all indirect actions were translated to direct
3524  *   if possible. NULL if no translation took place.
3525  * @param[out] error
3526  *   Pointer to the error structure.
3527  *
3528  * @return
3529  *   0 on success, a negative errno value otherwise and rte_errno is set.
3530  */
3531 static int
3532 flow_action_handles_translate(struct rte_eth_dev *dev,
3533                               const struct rte_flow_action actions[],
3534                               struct mlx5_translated_action_handle *handle,
3535                               int *indir_n,
3536                               struct rte_flow_action **translated_actions,
3537                               struct rte_flow_error *error)
3538 {
3539         struct mlx5_priv *priv = dev->data->dev_private;
3540         struct rte_flow_action *translated = NULL;
3541         size_t actions_size;
3542         int n;
3543         int copied_n = 0;
3544         struct mlx5_translated_action_handle *handle_end = NULL;
3545
3546         for (n = 0; actions[n].type != RTE_FLOW_ACTION_TYPE_END; n++) {
3547                 if (actions[n].type != RTE_FLOW_ACTION_TYPE_INDIRECT)
3548                         continue;
3549                 if (copied_n == *indir_n) {
3550                         return rte_flow_error_set
3551                                 (error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION_NUM,
3552                                  NULL, "too many shared actions");
3553                 }
3554                 rte_memcpy(&handle[copied_n].action, &actions[n].conf,
3555                            sizeof(actions[n].conf));
3556                 handle[copied_n].index = n;
3557                 copied_n++;
3558         }
3559         n++;
3560         *indir_n = copied_n;
3561         if (!copied_n)
3562                 return 0;
3563         actions_size = sizeof(struct rte_flow_action) * n;
3564         translated = mlx5_malloc(MLX5_MEM_ZERO, actions_size, 0, SOCKET_ID_ANY);
3565         if (!translated) {
3566                 rte_errno = ENOMEM;
3567                 return -ENOMEM;
3568         }
3569         memcpy(translated, actions, actions_size);
3570         for (handle_end = handle + copied_n; handle < handle_end; handle++) {
3571                 struct mlx5_shared_action_rss *shared_rss;
3572                 uint32_t act_idx = (uint32_t)(uintptr_t)handle->action;
3573                 uint32_t type = act_idx >> MLX5_INDIRECT_ACTION_TYPE_OFFSET;
3574                 uint32_t idx = act_idx &
3575                                ((1u << MLX5_INDIRECT_ACTION_TYPE_OFFSET) - 1);
3576
3577                 switch (type) {
3578                 case MLX5_INDIRECT_ACTION_TYPE_RSS:
3579                         shared_rss = mlx5_ipool_get
3580                           (priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS], idx);
3581                         translated[handle->index].type =
3582                                 RTE_FLOW_ACTION_TYPE_RSS;
3583                         translated[handle->index].conf =
3584                                 &shared_rss->origin;
3585                         break;
3586                 case MLX5_INDIRECT_ACTION_TYPE_COUNT:
3587                         translated[handle->index].type =
3588                                                 (enum rte_flow_action_type)
3589                                                 MLX5_RTE_FLOW_ACTION_TYPE_COUNT;
3590                         translated[handle->index].conf = (void *)(uintptr_t)idx;
3591                         break;
3592                 case MLX5_INDIRECT_ACTION_TYPE_AGE:
3593                         if (priv->sh->flow_hit_aso_en) {
3594                                 translated[handle->index].type =
3595                                         (enum rte_flow_action_type)
3596                                         MLX5_RTE_FLOW_ACTION_TYPE_AGE;
3597                                 translated[handle->index].conf =
3598                                                          (void *)(uintptr_t)idx;
3599                                 break;
3600                         }
3601                         /* Fall-through */
3602                 case MLX5_INDIRECT_ACTION_TYPE_CT:
3603                         if (priv->sh->ct_aso_en) {
3604                                 translated[handle->index].type =
3605                                         RTE_FLOW_ACTION_TYPE_CONNTRACK;
3606                                 translated[handle->index].conf =
3607                                                          (void *)(uintptr_t)idx;
3608                                 break;
3609                         }
3610                         /* Fall-through */
3611                 default:
3612                         mlx5_free(translated);
3613                         return rte_flow_error_set
3614                                 (error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION,
3615                                  NULL, "invalid indirect action type");
3616                 }
3617         }
3618         *translated_actions = translated;
3619         return 0;
3620 }
3621
3622 /**
3623  * Get Shared RSS action from the action list.
3624  *
3625  * @param[in] dev
3626  *   Pointer to Ethernet device.
3627  * @param[in] shared
3628  *   Pointer to the list of actions.
3629  * @param[in] shared_n
3630  *   Actions list length.
3631  *
3632  * @return
3633  *   The MLX5 RSS action ID if exists, otherwise return 0.
3634  */
3635 static uint32_t
3636 flow_get_shared_rss_action(struct rte_eth_dev *dev,
3637                            struct mlx5_translated_action_handle *handle,
3638                            int shared_n)
3639 {
3640         struct mlx5_translated_action_handle *handle_end;
3641         struct mlx5_priv *priv = dev->data->dev_private;
3642         struct mlx5_shared_action_rss *shared_rss;
3643
3644
3645         for (handle_end = handle + shared_n; handle < handle_end; handle++) {
3646                 uint32_t act_idx = (uint32_t)(uintptr_t)handle->action;
3647                 uint32_t type = act_idx >> MLX5_INDIRECT_ACTION_TYPE_OFFSET;
3648                 uint32_t idx = act_idx &
3649                                ((1u << MLX5_INDIRECT_ACTION_TYPE_OFFSET) - 1);
3650                 switch (type) {
3651                 case MLX5_INDIRECT_ACTION_TYPE_RSS:
3652                         shared_rss = mlx5_ipool_get
3653                                 (priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS],
3654                                                                            idx);
3655                         __atomic_add_fetch(&shared_rss->refcnt, 1,
3656                                            __ATOMIC_RELAXED);
3657                         return idx;
3658                 default:
3659                         break;
3660                 }
3661         }
3662         return 0;
3663 }
3664
3665 static unsigned int
3666 find_graph_root(const struct rte_flow_item pattern[], uint32_t rss_level)
3667 {
3668         const struct rte_flow_item *item;
3669         unsigned int has_vlan = 0;
3670
3671         for (item = pattern; item->type != RTE_FLOW_ITEM_TYPE_END; item++) {
3672                 if (item->type == RTE_FLOW_ITEM_TYPE_VLAN) {
3673                         has_vlan = 1;
3674                         break;
3675                 }
3676         }
3677         if (has_vlan)
3678                 return rss_level < 2 ? MLX5_EXPANSION_ROOT_ETH_VLAN :
3679                                        MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN;
3680         return rss_level < 2 ? MLX5_EXPANSION_ROOT :
3681                                MLX5_EXPANSION_ROOT_OUTER;
3682 }
3683
3684 /**
3685  *  Get layer flags from the prefix flow.
3686  *
3687  *  Some flows may be split to several subflows, the prefix subflow gets the
3688  *  match items and the suffix sub flow gets the actions.
3689  *  Some actions need the user defined match item flags to get the detail for
3690  *  the action.
3691  *  This function helps the suffix flow to get the item layer flags from prefix
3692  *  subflow.
3693  *
3694  * @param[in] dev_flow
3695  *   Pointer the created preifx subflow.
3696  *
3697  * @return
3698  *   The layers get from prefix subflow.
3699  */
3700 static inline uint64_t
3701 flow_get_prefix_layer_flags(struct mlx5_flow *dev_flow)
3702 {
3703         uint64_t layers = 0;
3704
3705         /*
3706          * Layers bits could be localization, but usually the compiler will
3707          * help to do the optimization work for source code.
3708          * If no decap actions, use the layers directly.
3709          */
3710         if (!(dev_flow->act_flags & MLX5_FLOW_ACTION_DECAP))
3711                 return dev_flow->handle->layers;
3712         /* Convert L3 layers with decap action. */
3713         if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L3_IPV4)
3714                 layers |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
3715         else if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L3_IPV6)
3716                 layers |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
3717         /* Convert L4 layers with decap action.  */
3718         if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L4_TCP)
3719                 layers |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
3720         else if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L4_UDP)
3721                 layers |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
3722         return layers;
3723 }
3724
3725 /**
3726  * Get metadata split action information.
3727  *
3728  * @param[in] actions
3729  *   Pointer to the list of actions.
3730  * @param[out] qrss
3731  *   Pointer to the return pointer.
3732  * @param[out] qrss_type
3733  *   Pointer to the action type to return. RTE_FLOW_ACTION_TYPE_END is returned
3734  *   if no QUEUE/RSS is found.
3735  * @param[out] encap_idx
3736  *   Pointer to the index of the encap action if exists, otherwise the last
3737  *   action index.
3738  *
3739  * @return
3740  *   Total number of actions.
3741  */
3742 static int
3743 flow_parse_metadata_split_actions_info(const struct rte_flow_action actions[],
3744                                        const struct rte_flow_action **qrss,
3745                                        int *encap_idx)
3746 {
3747         const struct rte_flow_action_raw_encap *raw_encap;
3748         int actions_n = 0;
3749         int raw_decap_idx = -1;
3750
3751         *encap_idx = -1;
3752         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
3753                 switch (actions->type) {
3754                 case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
3755                 case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
3756                         *encap_idx = actions_n;
3757                         break;
3758                 case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
3759                         raw_decap_idx = actions_n;
3760                         break;
3761                 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
3762                         raw_encap = actions->conf;
3763                         if (raw_encap->size > MLX5_ENCAPSULATION_DECISION_SIZE)
3764                                 *encap_idx = raw_decap_idx != -1 ?
3765                                                       raw_decap_idx : actions_n;
3766                         break;
3767                 case RTE_FLOW_ACTION_TYPE_QUEUE:
3768                 case RTE_FLOW_ACTION_TYPE_RSS:
3769                         *qrss = actions;
3770                         break;
3771                 default:
3772                         break;
3773                 }
3774                 actions_n++;
3775         }
3776         if (*encap_idx == -1)
3777                 *encap_idx = actions_n;
3778         /* Count RTE_FLOW_ACTION_TYPE_END. */
3779         return actions_n + 1;
3780 }
3781
3782 /**
3783  * Check if the action will change packet.
3784  *
3785  * @param dev
3786  *   Pointer to Ethernet device.
3787  * @param[in] type
3788  *   action type.
3789  *
3790  * @return
3791  *   true if action will change packet, false otherwise.
3792  */
3793 static bool flow_check_modify_action_type(struct rte_eth_dev *dev,
3794                                           enum rte_flow_action_type type)
3795 {
3796         struct mlx5_priv *priv = dev->data->dev_private;
3797
3798         switch (type) {
3799         case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
3800         case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
3801         case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
3802         case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
3803         case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
3804         case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
3805         case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
3806         case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
3807         case RTE_FLOW_ACTION_TYPE_DEC_TTL:
3808         case RTE_FLOW_ACTION_TYPE_SET_TTL:
3809         case RTE_FLOW_ACTION_TYPE_INC_TCP_SEQ:
3810         case RTE_FLOW_ACTION_TYPE_DEC_TCP_SEQ:
3811         case RTE_FLOW_ACTION_TYPE_INC_TCP_ACK:
3812         case RTE_FLOW_ACTION_TYPE_DEC_TCP_ACK:
3813         case RTE_FLOW_ACTION_TYPE_SET_IPV4_DSCP:
3814         case RTE_FLOW_ACTION_TYPE_SET_IPV6_DSCP:
3815         case RTE_FLOW_ACTION_TYPE_SET_META:
3816         case RTE_FLOW_ACTION_TYPE_SET_TAG:
3817         case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
3818         case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
3819         case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
3820         case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
3821         case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
3822         case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP:
3823         case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
3824         case RTE_FLOW_ACTION_TYPE_NVGRE_DECAP:
3825         case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
3826         case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
3827         case RTE_FLOW_ACTION_TYPE_MODIFY_FIELD:
3828                 return true;
3829         case RTE_FLOW_ACTION_TYPE_FLAG:
3830         case RTE_FLOW_ACTION_TYPE_MARK:
3831                 if (priv->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY)
3832                         return true;
3833                 else
3834                         return false;
3835         default:
3836                 return false;
3837         }
3838 }
3839
3840 /**
3841  * Check meter action from the action list.
3842  *
3843  * @param dev
3844  *   Pointer to Ethernet device.
3845  * @param[in] actions
3846  *   Pointer to the list of actions.
3847  * @param[out] has_mtr
3848  *   Pointer to the meter exist flag.
3849  * @param[out] has_modify
3850  *   Pointer to the flag showing there's packet change action.
3851  * @param[out] meter_id
3852  *   Pointer to the meter id.
3853  *
3854  * @return
3855  *   Total number of actions.
3856  */
3857 static int
3858 flow_check_meter_action(struct rte_eth_dev *dev,
3859                         const struct rte_flow_action actions[],
3860                         bool *has_mtr, bool *has_modify, uint32_t *meter_id)
3861 {
3862         const struct rte_flow_action_meter *mtr = NULL;
3863         int actions_n = 0;
3864
3865         MLX5_ASSERT(has_mtr);
3866         *has_mtr = false;
3867         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
3868                 switch (actions->type) {
3869                 case RTE_FLOW_ACTION_TYPE_METER:
3870                         mtr = actions->conf;
3871                         *meter_id = mtr->mtr_id;
3872                         *has_mtr = true;
3873                         break;
3874                 default:
3875                         break;
3876                 }
3877                 if (!*has_mtr)
3878                         *has_modify |= flow_check_modify_action_type(dev,
3879                                                                 actions->type);
3880                 actions_n++;
3881         }
3882         /* Count RTE_FLOW_ACTION_TYPE_END. */
3883         return actions_n + 1;
3884 }
3885
3886 /**
3887  * Check if the flow should be split due to hairpin.
3888  * The reason for the split is that in current HW we can't
3889  * support encap and push-vlan on Rx, so if a flow contains
3890  * these actions we move it to Tx.
3891  *
3892  * @param dev
3893  *   Pointer to Ethernet device.
3894  * @param[in] attr
3895  *   Flow rule attributes.
3896  * @param[in] actions
3897  *   Associated actions (list terminated by the END action).
3898  *
3899  * @return
3900  *   > 0 the number of actions and the flow should be split,
3901  *   0 when no split required.
3902  */
3903 static int
3904 flow_check_hairpin_split(struct rte_eth_dev *dev,
3905                          const struct rte_flow_attr *attr,
3906                          const struct rte_flow_action actions[])
3907 {
3908         int queue_action = 0;
3909         int action_n = 0;
3910         int split = 0;
3911         const struct rte_flow_action_queue *queue;
3912         const struct rte_flow_action_rss *rss;
3913         const struct rte_flow_action_raw_encap *raw_encap;
3914         const struct rte_eth_hairpin_conf *conf;
3915
3916         if (!attr->ingress)
3917                 return 0;
3918         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
3919                 switch (actions->type) {
3920                 case RTE_FLOW_ACTION_TYPE_QUEUE:
3921                         queue = actions->conf;
3922                         if (queue == NULL)
3923                                 return 0;
3924                         conf = mlx5_rxq_get_hairpin_conf(dev, queue->index);
3925                         if (conf == NULL || conf->tx_explicit != 0)
3926                                 return 0;
3927                         queue_action = 1;
3928                         action_n++;
3929                         break;
3930                 case RTE_FLOW_ACTION_TYPE_RSS:
3931                         rss = actions->conf;
3932                         if (rss == NULL || rss->queue_num == 0)
3933                                 return 0;
3934                         conf = mlx5_rxq_get_hairpin_conf(dev, rss->queue[0]);
3935                         if (conf == NULL || conf->tx_explicit != 0)
3936                                 return 0;
3937                         queue_action = 1;
3938                         action_n++;
3939                         break;
3940                 case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
3941                 case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
3942                 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
3943                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
3944                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
3945                         split++;
3946                         action_n++;
3947                         break;
3948                 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
3949                         raw_encap = actions->conf;
3950                         if (raw_encap->size > MLX5_ENCAPSULATION_DECISION_SIZE)
3951                                 split++;
3952                         action_n++;
3953                         break;
3954                 default:
3955                         action_n++;
3956                         break;
3957                 }
3958         }
3959         if (split && queue_action)
3960                 return action_n;
3961         return 0;
3962 }
3963
3964 /* Declare flow create/destroy prototype in advance. */
3965 static uint32_t
3966 flow_list_create(struct rte_eth_dev *dev, uint32_t *list,
3967                  const struct rte_flow_attr *attr,
3968                  const struct rte_flow_item items[],
3969                  const struct rte_flow_action actions[],
3970                  bool external, struct rte_flow_error *error);
3971
3972 static void
3973 flow_list_destroy(struct rte_eth_dev *dev, uint32_t *list,
3974                   uint32_t flow_idx);
3975
3976 int
3977 flow_dv_mreg_match_cb(struct mlx5_hlist *list __rte_unused,
3978                       struct mlx5_hlist_entry *entry,
3979                       uint64_t key, void *cb_ctx __rte_unused)
3980 {
3981         struct mlx5_flow_mreg_copy_resource *mcp_res =
3982                 container_of(entry, typeof(*mcp_res), hlist_ent);
3983
3984         return mcp_res->mark_id != key;
3985 }
3986
3987 struct mlx5_hlist_entry *
3988 flow_dv_mreg_create_cb(struct mlx5_hlist *list, uint64_t key,
3989                        void *cb_ctx)
3990 {
3991         struct rte_eth_dev *dev = list->ctx;
3992         struct mlx5_priv *priv = dev->data->dev_private;
3993         struct mlx5_flow_cb_ctx *ctx = cb_ctx;
3994         struct mlx5_flow_mreg_copy_resource *mcp_res;
3995         struct rte_flow_error *error = ctx->error;
3996         uint32_t idx = 0;
3997         int ret;
3998         uint32_t mark_id = key;
3999         struct rte_flow_attr attr = {
4000                 .group = MLX5_FLOW_MREG_CP_TABLE_GROUP,
4001                 .ingress = 1,
4002         };
4003         struct mlx5_rte_flow_item_tag tag_spec = {
4004                 .data = mark_id,
4005         };
4006         struct rte_flow_item items[] = {
4007                 [1] = { .type = RTE_FLOW_ITEM_TYPE_END, },
4008         };
4009         struct rte_flow_action_mark ftag = {
4010                 .id = mark_id,
4011         };
4012         struct mlx5_flow_action_copy_mreg cp_mreg = {
4013                 .dst = REG_B,
4014                 .src = REG_NON,
4015         };
4016         struct rte_flow_action_jump jump = {
4017                 .group = MLX5_FLOW_MREG_ACT_TABLE_GROUP,
4018         };
4019         struct rte_flow_action actions[] = {
4020                 [3] = { .type = RTE_FLOW_ACTION_TYPE_END, },
4021         };
4022
4023         /* Fill the register fileds in the flow. */
4024         ret = mlx5_flow_get_reg_id(dev, MLX5_FLOW_MARK, 0, error);
4025         if (ret < 0)
4026                 return NULL;
4027         tag_spec.id = ret;
4028         ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_RX, 0, error);
4029         if (ret < 0)
4030                 return NULL;
4031         cp_mreg.src = ret;
4032         /* Provide the full width of FLAG specific value. */
4033         if (mark_id == (priv->sh->dv_regc0_mask & MLX5_FLOW_MARK_DEFAULT))
4034                 tag_spec.data = MLX5_FLOW_MARK_DEFAULT;
4035         /* Build a new flow. */
4036         if (mark_id != MLX5_DEFAULT_COPY_ID) {
4037                 items[0] = (struct rte_flow_item){
4038                         .type = (enum rte_flow_item_type)
4039                                 MLX5_RTE_FLOW_ITEM_TYPE_TAG,
4040                         .spec = &tag_spec,
4041                 };
4042                 items[1] = (struct rte_flow_item){
4043                         .type = RTE_FLOW_ITEM_TYPE_END,
4044                 };
4045                 actions[0] = (struct rte_flow_action){
4046                         .type = (enum rte_flow_action_type)
4047                                 MLX5_RTE_FLOW_ACTION_TYPE_MARK,
4048                         .conf = &ftag,
4049                 };
4050                 actions[1] = (struct rte_flow_action){
4051                         .type = (enum rte_flow_action_type)
4052                                 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
4053                         .conf = &cp_mreg,
4054                 };
4055                 actions[2] = (struct rte_flow_action){
4056                         .type = RTE_FLOW_ACTION_TYPE_JUMP,
4057                         .conf = &jump,
4058                 };
4059                 actions[3] = (struct rte_flow_action){
4060                         .type = RTE_FLOW_ACTION_TYPE_END,
4061                 };
4062         } else {
4063                 /* Default rule, wildcard match. */
4064                 attr.priority = MLX5_FLOW_LOWEST_PRIO_INDICATOR;
4065                 items[0] = (struct rte_flow_item){
4066                         .type = RTE_FLOW_ITEM_TYPE_END,
4067                 };
4068                 actions[0] = (struct rte_flow_action){
4069                         .type = (enum rte_flow_action_type)
4070                                 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
4071                         .conf = &cp_mreg,
4072                 };
4073                 actions[1] = (struct rte_flow_action){
4074                         .type = RTE_FLOW_ACTION_TYPE_JUMP,
4075                         .conf = &jump,
4076                 };
4077                 actions[2] = (struct rte_flow_action){
4078                         .type = RTE_FLOW_ACTION_TYPE_END,
4079                 };
4080         }
4081         /* Build a new entry. */
4082         mcp_res = mlx5_ipool_zmalloc(priv->sh->ipool[MLX5_IPOOL_MCP], &idx);
4083         if (!mcp_res) {
4084                 rte_errno = ENOMEM;
4085                 return NULL;
4086         }
4087         mcp_res->idx = idx;
4088         mcp_res->mark_id = mark_id;
4089         /*
4090          * The copy Flows are not included in any list. There
4091          * ones are referenced from other Flows and can not
4092          * be applied, removed, deleted in ardbitrary order
4093          * by list traversing.
4094          */
4095         mcp_res->rix_flow = flow_list_create(dev, NULL, &attr, items,
4096                                          actions, false, error);
4097         if (!mcp_res->rix_flow) {
4098                 mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MCP], idx);
4099                 return NULL;
4100         }
4101         return &mcp_res->hlist_ent;
4102 }
4103
4104 /**
4105  * Add a flow of copying flow metadata registers in RX_CP_TBL.
4106  *
4107  * As mark_id is unique, if there's already a registered flow for the mark_id,
4108  * return by increasing the reference counter of the resource. Otherwise, create
4109  * the resource (mcp_res) and flow.
4110  *
4111  * Flow looks like,
4112  *   - If ingress port is ANY and reg_c[1] is mark_id,
4113  *     flow_tag := mark_id, reg_b := reg_c[0] and jump to RX_ACT_TBL.
4114  *
4115  * For default flow (zero mark_id), flow is like,
4116  *   - If ingress port is ANY,
4117  *     reg_b := reg_c[0] and jump to RX_ACT_TBL.
4118  *
4119  * @param dev
4120  *   Pointer to Ethernet device.
4121  * @param mark_id
4122  *   ID of MARK action, zero means default flow for META.
4123  * @param[out] error
4124  *   Perform verbose error reporting if not NULL.
4125  *
4126  * @return
4127  *   Associated resource on success, NULL otherwise and rte_errno is set.
4128  */
4129 static struct mlx5_flow_mreg_copy_resource *
4130 flow_mreg_add_copy_action(struct rte_eth_dev *dev, uint32_t mark_id,
4131                           struct rte_flow_error *error)
4132 {
4133         struct mlx5_priv *priv = dev->data->dev_private;
4134         struct mlx5_hlist_entry *entry;
4135         struct mlx5_flow_cb_ctx ctx = {
4136                 .dev = dev,
4137                 .error = error,
4138         };
4139
4140         /* Check if already registered. */
4141         MLX5_ASSERT(priv->mreg_cp_tbl);
4142         entry = mlx5_hlist_register(priv->mreg_cp_tbl, mark_id, &ctx);
4143         if (!entry)
4144                 return NULL;
4145         return container_of(entry, struct mlx5_flow_mreg_copy_resource,
4146                             hlist_ent);
4147 }
4148
4149 void
4150 flow_dv_mreg_remove_cb(struct mlx5_hlist *list, struct mlx5_hlist_entry *entry)
4151 {
4152         struct mlx5_flow_mreg_copy_resource *mcp_res =
4153                 container_of(entry, typeof(*mcp_res), hlist_ent);
4154         struct rte_eth_dev *dev = list->ctx;
4155         struct mlx5_priv *priv = dev->data->dev_private;
4156
4157         MLX5_ASSERT(mcp_res->rix_flow);
4158         flow_list_destroy(dev, NULL, mcp_res->rix_flow);
4159         mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MCP], mcp_res->idx);
4160 }
4161
4162 /**
4163  * Release flow in RX_CP_TBL.
4164  *
4165  * @param dev
4166  *   Pointer to Ethernet device.
4167  * @flow
4168  *   Parent flow for wich copying is provided.
4169  */
4170 static void
4171 flow_mreg_del_copy_action(struct rte_eth_dev *dev,
4172                           struct rte_flow *flow)
4173 {
4174         struct mlx5_flow_mreg_copy_resource *mcp_res;
4175         struct mlx5_priv *priv = dev->data->dev_private;
4176
4177         if (!flow->rix_mreg_copy)
4178                 return;
4179         mcp_res = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_MCP],
4180                                  flow->rix_mreg_copy);
4181         if (!mcp_res || !priv->mreg_cp_tbl)
4182                 return;
4183         MLX5_ASSERT(mcp_res->rix_flow);
4184         mlx5_hlist_unregister(priv->mreg_cp_tbl, &mcp_res->hlist_ent);
4185         flow->rix_mreg_copy = 0;
4186 }
4187
4188 /**
4189  * Remove the default copy action from RX_CP_TBL.
4190  *
4191  * This functions is called in the mlx5_dev_start(). No thread safe
4192  * is guaranteed.
4193  *
4194  * @param dev
4195  *   Pointer to Ethernet device.
4196  */
4197 static void
4198 flow_mreg_del_default_copy_action(struct rte_eth_dev *dev)
4199 {
4200         struct mlx5_hlist_entry *entry;
4201         struct mlx5_priv *priv = dev->data->dev_private;
4202
4203         /* Check if default flow is registered. */
4204         if (!priv->mreg_cp_tbl)
4205                 return;
4206         entry = mlx5_hlist_lookup(priv->mreg_cp_tbl,
4207                                   MLX5_DEFAULT_COPY_ID, NULL);
4208         if (!entry)
4209                 return;
4210         mlx5_hlist_unregister(priv->mreg_cp_tbl, entry);
4211 }
4212
4213 /**
4214  * Add the default copy action in in RX_CP_TBL.
4215  *
4216  * This functions is called in the mlx5_dev_start(). No thread safe
4217  * is guaranteed.
4218  *
4219  * @param dev
4220  *   Pointer to Ethernet device.
4221  * @param[out] error
4222  *   Perform verbose error reporting if not NULL.
4223  *
4224  * @return
4225  *   0 for success, negative value otherwise and rte_errno is set.
4226  */
4227 static int
4228 flow_mreg_add_default_copy_action(struct rte_eth_dev *dev,
4229                                   struct rte_flow_error *error)
4230 {
4231         struct mlx5_priv *priv = dev->data->dev_private;
4232         struct mlx5_flow_mreg_copy_resource *mcp_res;
4233
4234         /* Check whether extensive metadata feature is engaged. */
4235         if (!priv->config.dv_flow_en ||
4236             priv->config.dv_xmeta_en == MLX5_XMETA_MODE_LEGACY ||
4237             !mlx5_flow_ext_mreg_supported(dev) ||
4238             !priv->sh->dv_regc0_mask)
4239                 return 0;
4240         /*
4241          * Add default mreg copy flow may be called multiple time, but
4242          * only be called once in stop. Avoid register it twice.
4243          */
4244         if (mlx5_hlist_lookup(priv->mreg_cp_tbl, MLX5_DEFAULT_COPY_ID, NULL))
4245                 return 0;
4246         mcp_res = flow_mreg_add_copy_action(dev, MLX5_DEFAULT_COPY_ID, error);
4247         if (!mcp_res)
4248                 return -rte_errno;
4249         return 0;
4250 }
4251
4252 /**
4253  * Add a flow of copying flow metadata registers in RX_CP_TBL.
4254  *
4255  * All the flow having Q/RSS action should be split by
4256  * flow_mreg_split_qrss_prep() to pass by RX_CP_TBL. A flow in the RX_CP_TBL
4257  * performs the following,
4258  *   - CQE->flow_tag := reg_c[1] (MARK)
4259  *   - CQE->flow_table_metadata (reg_b) := reg_c[0] (META)
4260  * As CQE's flow_tag is not a register, it can't be simply copied from reg_c[1]
4261  * but there should be a flow per each MARK ID set by MARK action.
4262  *
4263  * For the aforementioned reason, if there's a MARK action in flow's action
4264  * list, a corresponding flow should be added to the RX_CP_TBL in order to copy
4265  * the MARK ID to CQE's flow_tag like,
4266  *   - If reg_c[1] is mark_id,
4267  *     flow_tag := mark_id, reg_b := reg_c[0] and jump to RX_ACT_TBL.
4268  *
4269  * For SET_META action which stores value in reg_c[0], as the destination is
4270  * also a flow metadata register (reg_b), adding a default flow is enough. Zero
4271  * MARK ID means the default flow. The default flow looks like,
4272  *   - For all flow, reg_b := reg_c[0] and jump to RX_ACT_TBL.
4273  *
4274  * @param dev
4275  *   Pointer to Ethernet device.
4276  * @param flow
4277  *   Pointer to flow structure.
4278  * @param[in] actions
4279  *   Pointer to the list of actions.
4280  * @param[out] error
4281  *   Perform verbose error reporting if not NULL.
4282  *
4283  * @return
4284  *   0 on success, negative value otherwise and rte_errno is set.
4285  */
4286 static int
4287 flow_mreg_update_copy_table(struct rte_eth_dev *dev,
4288                             struct rte_flow *flow,
4289                             const struct rte_flow_action *actions,
4290                             struct rte_flow_error *error)
4291 {
4292         struct mlx5_priv *priv = dev->data->dev_private;
4293         struct mlx5_dev_config *config = &priv->config;
4294         struct mlx5_flow_mreg_copy_resource *mcp_res;
4295         const struct rte_flow_action_mark *mark;
4296
4297         /* Check whether extensive metadata feature is engaged. */
4298         if (!config->dv_flow_en ||
4299             config->dv_xmeta_en == MLX5_XMETA_MODE_LEGACY ||
4300             !mlx5_flow_ext_mreg_supported(dev) ||
4301             !priv->sh->dv_regc0_mask)
4302                 return 0;
4303         /* Find MARK action. */
4304         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
4305                 switch (actions->type) {
4306                 case RTE_FLOW_ACTION_TYPE_FLAG:
4307                         mcp_res = flow_mreg_add_copy_action
4308                                 (dev, MLX5_FLOW_MARK_DEFAULT, error);
4309                         if (!mcp_res)
4310                                 return -rte_errno;
4311                         flow->rix_mreg_copy = mcp_res->idx;
4312                         return 0;
4313                 case RTE_FLOW_ACTION_TYPE_MARK:
4314                         mark = (const struct rte_flow_action_mark *)
4315                                 actions->conf;
4316                         mcp_res =
4317                                 flow_mreg_add_copy_action(dev, mark->id, error);
4318                         if (!mcp_res)
4319                                 return -rte_errno;
4320                         flow->rix_mreg_copy = mcp_res->idx;
4321                         return 0;
4322                 default:
4323                         break;
4324                 }
4325         }
4326         return 0;
4327 }
4328
4329 #define MLX5_MAX_SPLIT_ACTIONS 24
4330 #define MLX5_MAX_SPLIT_ITEMS 24
4331
4332 /**
4333  * Split the hairpin flow.
4334  * Since HW can't support encap and push-vlan on Rx, we move these
4335  * actions to Tx.
4336  * If the count action is after the encap then we also
4337  * move the count action. in this case the count will also measure
4338  * the outer bytes.
4339  *
4340  * @param dev
4341  *   Pointer to Ethernet device.
4342  * @param[in] actions
4343  *   Associated actions (list terminated by the END action).
4344  * @param[out] actions_rx
4345  *   Rx flow actions.
4346  * @param[out] actions_tx
4347  *   Tx flow actions..
4348  * @param[out] pattern_tx
4349  *   The pattern items for the Tx flow.
4350  * @param[out] flow_id
4351  *   The flow ID connected to this flow.
4352  *
4353  * @return
4354  *   0 on success.
4355  */
4356 static int
4357 flow_hairpin_split(struct rte_eth_dev *dev,
4358                    const struct rte_flow_action actions[],
4359                    struct rte_flow_action actions_rx[],
4360                    struct rte_flow_action actions_tx[],
4361                    struct rte_flow_item pattern_tx[],
4362                    uint32_t flow_id)
4363 {
4364         const struct rte_flow_action_raw_encap *raw_encap;
4365         const struct rte_flow_action_raw_decap *raw_decap;
4366         struct mlx5_rte_flow_action_set_tag *set_tag;
4367         struct rte_flow_action *tag_action;
4368         struct mlx5_rte_flow_item_tag *tag_item;
4369         struct rte_flow_item *item;
4370         char *addr;
4371         int encap = 0;
4372
4373         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
4374                 switch (actions->type) {
4375                 case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
4376                 case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
4377                 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
4378                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
4379                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
4380                         rte_memcpy(actions_tx, actions,
4381                                sizeof(struct rte_flow_action));
4382                         actions_tx++;
4383                         break;
4384                 case RTE_FLOW_ACTION_TYPE_COUNT:
4385                         if (encap) {
4386                                 rte_memcpy(actions_tx, actions,
4387                                            sizeof(struct rte_flow_action));
4388                                 actions_tx++;
4389                         } else {
4390                                 rte_memcpy(actions_rx, actions,
4391                                            sizeof(struct rte_flow_action));
4392                                 actions_rx++;
4393                         }
4394                         break;
4395                 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
4396                         raw_encap = actions->conf;
4397                         if (raw_encap->size > MLX5_ENCAPSULATION_DECISION_SIZE) {
4398                                 memcpy(actions_tx, actions,
4399                                        sizeof(struct rte_flow_action));
4400                                 actions_tx++;
4401                                 encap = 1;
4402                         } else {
4403                                 rte_memcpy(actions_rx, actions,
4404                                            sizeof(struct rte_flow_action));
4405                                 actions_rx++;
4406                         }
4407                         break;
4408                 case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
4409                         raw_decap = actions->conf;
4410                         if (raw_decap->size < MLX5_ENCAPSULATION_DECISION_SIZE) {
4411                                 memcpy(actions_tx, actions,
4412                                        sizeof(struct rte_flow_action));
4413                                 actions_tx++;
4414                         } else {
4415                                 rte_memcpy(actions_rx, actions,
4416                                            sizeof(struct rte_flow_action));
4417                                 actions_rx++;
4418                         }
4419                         break;
4420                 default:
4421                         rte_memcpy(actions_rx, actions,
4422                                    sizeof(struct rte_flow_action));
4423                         actions_rx++;
4424                         break;
4425                 }
4426         }
4427         /* Add set meta action and end action for the Rx flow. */
4428         tag_action = actions_rx;
4429         tag_action->type = (enum rte_flow_action_type)
4430                            MLX5_RTE_FLOW_ACTION_TYPE_TAG;
4431         actions_rx++;
4432         rte_memcpy(actions_rx, actions, sizeof(struct rte_flow_action));
4433         actions_rx++;
4434         set_tag = (void *)actions_rx;
4435         *set_tag = (struct mlx5_rte_flow_action_set_tag) {
4436                 .id = mlx5_flow_get_reg_id(dev, MLX5_HAIRPIN_RX, 0, NULL),
4437                 .data = flow_id,
4438         };
4439         MLX5_ASSERT(set_tag->id > REG_NON);
4440         tag_action->conf = set_tag;
4441         /* Create Tx item list. */
4442         rte_memcpy(actions_tx, actions, sizeof(struct rte_flow_action));
4443         addr = (void *)&pattern_tx[2];
4444         item = pattern_tx;
4445         item->type = (enum rte_flow_item_type)
4446                      MLX5_RTE_FLOW_ITEM_TYPE_TAG;
4447         tag_item = (void *)addr;
4448         tag_item->data = flow_id;
4449         tag_item->id = mlx5_flow_get_reg_id(dev, MLX5_HAIRPIN_TX, 0, NULL);
4450         MLX5_ASSERT(set_tag->id > REG_NON);
4451         item->spec = tag_item;
4452         addr += sizeof(struct mlx5_rte_flow_item_tag);
4453         tag_item = (void *)addr;
4454         tag_item->data = UINT32_MAX;
4455         tag_item->id = UINT16_MAX;
4456         item->mask = tag_item;
4457         item->last = NULL;
4458         item++;
4459         item->type = RTE_FLOW_ITEM_TYPE_END;
4460         return 0;
4461 }
4462
4463 /**
4464  * The last stage of splitting chain, just creates the subflow
4465  * without any modification.
4466  *
4467  * @param[in] dev
4468  *   Pointer to Ethernet device.
4469  * @param[in] flow
4470  *   Parent flow structure pointer.
4471  * @param[in, out] sub_flow
4472  *   Pointer to return the created subflow, may be NULL.
4473  * @param[in] attr
4474  *   Flow rule attributes.
4475  * @param[in] items
4476  *   Pattern specification (list terminated by the END pattern item).
4477  * @param[in] actions
4478  *   Associated actions (list terminated by the END action).
4479  * @param[in] flow_split_info
4480  *   Pointer to flow split info structure.
4481  * @param[out] error
4482  *   Perform verbose error reporting if not NULL.
4483  * @return
4484  *   0 on success, negative value otherwise
4485  */
4486 static int
4487 flow_create_split_inner(struct rte_eth_dev *dev,
4488                         struct rte_flow *flow,
4489                         struct mlx5_flow **sub_flow,
4490                         const struct rte_flow_attr *attr,
4491                         const struct rte_flow_item items[],
4492                         const struct rte_flow_action actions[],
4493                         struct mlx5_flow_split_info *flow_split_info,
4494                         struct rte_flow_error *error)
4495 {
4496         struct mlx5_flow *dev_flow;
4497
4498         dev_flow = flow_drv_prepare(dev, flow, attr, items, actions,
4499                                     flow_split_info->flow_idx, error);
4500         if (!dev_flow)
4501                 return -rte_errno;
4502         dev_flow->flow = flow;
4503         dev_flow->external = flow_split_info->external;
4504         dev_flow->skip_scale = flow_split_info->skip_scale;
4505         /* Subflow object was created, we must include one in the list. */
4506         SILIST_INSERT(&flow->dev_handles, dev_flow->handle_idx,
4507                       dev_flow->handle, next);
4508         /*
4509          * If dev_flow is as one of the suffix flow, some actions in suffix
4510          * flow may need some user defined item layer flags, and pass the
4511          * Metadate rxq mark flag to suffix flow as well.
4512          */
4513         if (flow_split_info->prefix_layers)
4514                 dev_flow->handle->layers = flow_split_info->prefix_layers;
4515         if (flow_split_info->prefix_mark)
4516                 dev_flow->handle->mark = 1;
4517         if (sub_flow)
4518                 *sub_flow = dev_flow;
4519 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
4520         dev_flow->dv.table_id = flow_split_info->table_id;
4521 #endif
4522         return flow_drv_translate(dev, dev_flow, attr, items, actions, error);
4523 }
4524
4525 /**
4526  * Get the sub policy of a meter.
4527  *
4528  * @param[in] dev
4529  *   Pointer to Ethernet device.
4530  * @param[in] flow
4531  *   Parent flow structure pointer.
4532  * @param[in] policy_id;
4533  *   Meter Policy id.
4534  * @param[in] attr
4535  *   Flow rule attributes.
4536  * @param[in] items
4537  *   Pattern specification (list terminated by the END pattern item).
4538  * @param[out] error
4539  *   Perform verbose error reporting if not NULL.
4540  *
4541  * @return
4542  *   Pointer to the meter sub policy, NULL otherwise and rte_errno is set.
4543  */
4544 static struct mlx5_flow_meter_sub_policy *
4545 get_meter_sub_policy(struct rte_eth_dev *dev,
4546                      struct rte_flow *flow,
4547                      uint32_t policy_id,
4548                      const struct rte_flow_attr *attr,
4549                      const struct rte_flow_item items[],
4550                      struct rte_flow_error *error)
4551 {
4552         struct mlx5_flow_meter_policy *policy;
4553         struct mlx5_flow_meter_sub_policy *sub_policy = NULL;
4554
4555         policy = mlx5_flow_meter_policy_find(dev, policy_id, NULL);
4556         if (!policy) {
4557                 rte_flow_error_set(error, EINVAL,
4558                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
4559                                    "Failed to find Meter Policy.");
4560                 goto exit;
4561         }
4562         if (policy->is_rss) {
4563                 struct mlx5_flow_workspace *wks =
4564                                 mlx5_flow_get_thread_workspace();
4565                 struct mlx5_flow_rss_desc rss_desc_v[MLX5_MTR_RTE_COLORS];
4566                 struct mlx5_flow_rss_desc *rss_desc[MLX5_MTR_RTE_COLORS] = {0};
4567                 uint32_t i;
4568
4569                 MLX5_ASSERT(wks);
4570                 /**
4571                  * This is a tmp dev_flow,
4572                  * no need to register any matcher for it in translate.
4573                  */
4574                 wks->skip_matcher_reg = 1;
4575                 for (i = 0; i < MLX5_MTR_RTE_COLORS; i++) {
4576                         struct mlx5_flow dev_flow = {0};
4577                         struct mlx5_flow_handle dev_handle = { {0} };
4578                         const void *rss_act = policy->act_cnt[i].rss->conf;
4579                         struct rte_flow_action rss_actions[2] = {
4580                                 [0] = {
4581                                         .type = RTE_FLOW_ACTION_TYPE_RSS,
4582                                         .conf = rss_act
4583                                 },
4584                                 [1] = {
4585                                         .type = RTE_FLOW_ACTION_TYPE_END,
4586                                         .conf = NULL
4587                                 }
4588                         };
4589
4590                         dev_flow.handle = &dev_handle;
4591                         dev_flow.ingress = attr->ingress;
4592                         dev_flow.flow = flow;
4593                         dev_flow.external = 0;
4594 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
4595                         dev_flow.dv.transfer = attr->transfer;
4596 #endif
4597                         /* Translate RSS action to get rss hash fields. */
4598                         if (flow_drv_translate(dev, &dev_flow, attr,
4599                                                 items, rss_actions, error))
4600                                 goto exit;
4601                         rss_desc_v[i] = wks->rss_desc;
4602                         rss_desc_v[i].key_len = MLX5_RSS_HASH_KEY_LEN;
4603                         rss_desc_v[i].hash_fields = dev_flow.hash_fields;
4604                         rss_desc_v[i].queue_num = rss_desc_v[i].hash_fields ?
4605                                                   rss_desc_v[i].queue_num : 1;
4606                         rss_desc[i] = &rss_desc_v[i];
4607                 }
4608                 sub_policy = flow_drv_meter_sub_policy_rss_prepare(dev,
4609                                                 flow, policy, rss_desc);
4610         } else {
4611                 enum mlx5_meter_domain mtr_domain =
4612                         attr->transfer ? MLX5_MTR_DOMAIN_TRANSFER :
4613                                 attr->egress ? MLX5_MTR_DOMAIN_EGRESS :
4614                                         MLX5_MTR_DOMAIN_INGRESS;
4615                 sub_policy = policy->sub_policys[mtr_domain][0];
4616         }
4617         if (!sub_policy) {
4618                 rte_flow_error_set(error, EINVAL,
4619                         RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
4620                         "Failed to get meter sub-policy.");
4621                 goto exit;
4622         }
4623 exit:
4624         return sub_policy;
4625 }
4626
4627 /**
4628  * Split the meter flow.
4629  *
4630  * As meter flow will split to three sub flow, other than meter
4631  * action, the other actions make sense to only meter accepts
4632  * the packet. If it need to be dropped, no other additional
4633  * actions should be take.
4634  *
4635  * One kind of special action which decapsulates the L3 tunnel
4636  * header will be in the prefix sub flow, as not to take the
4637  * L3 tunnel header into account.
4638  *
4639  * @param[in] dev
4640  *   Pointer to Ethernet device.
4641  * @param[in] flow
4642  *   Parent flow structure pointer.
4643  * @param[in] fm
4644  *   Pointer to flow meter structure.
4645  * @param[in] attr
4646  *   Flow rule attributes.
4647  * @param[in] items
4648  *   Pattern specification (list terminated by the END pattern item).
4649  * @param[out] sfx_items
4650  *   Suffix flow match items (list terminated by the END pattern item).
4651  * @param[in] actions
4652  *   Associated actions (list terminated by the END action).
4653  * @param[out] actions_sfx
4654  *   Suffix flow actions.
4655  * @param[out] actions_pre
4656  *   Prefix flow actions.
4657  * @param[out] mtr_flow_id
4658  *   Pointer to meter flow id.
4659  * @param[out] error
4660  *   Perform verbose error reporting if not NULL.
4661  *
4662  * @return
4663  *   0 on success, a negative errno value otherwise and rte_errno is set.
4664  */
4665 static int
4666 flow_meter_split_prep(struct rte_eth_dev *dev,
4667                       struct rte_flow *flow,
4668                       struct mlx5_flow_meter_info *fm,
4669                       const struct rte_flow_attr *attr,
4670                       const struct rte_flow_item items[],
4671                       struct rte_flow_item sfx_items[],
4672                       const struct rte_flow_action actions[],
4673                       struct rte_flow_action actions_sfx[],
4674                       struct rte_flow_action actions_pre[],
4675                       uint32_t *mtr_flow_id,
4676                       struct rte_flow_error *error)
4677 {
4678         struct mlx5_priv *priv = dev->data->dev_private;
4679         struct rte_flow_action *tag_action = NULL;
4680         struct rte_flow_item *tag_item;
4681         struct mlx5_rte_flow_action_set_tag *set_tag;
4682         const struct rte_flow_action_raw_encap *raw_encap;
4683         const struct rte_flow_action_raw_decap *raw_decap;
4684         struct mlx5_rte_flow_item_tag *tag_item_spec;
4685         struct mlx5_rte_flow_item_tag *tag_item_mask;
4686         uint32_t tag_id = 0;
4687         bool copy_vlan = false;
4688         struct rte_flow_action *hw_mtr_action;
4689         struct rte_flow_action *action_pre_head = NULL;
4690         bool mtr_first = priv->sh->meter_aso_en &&
4691                         (attr->egress ||
4692                         (attr->transfer && priv->representor_id != UINT16_MAX));
4693         uint8_t mtr_id_offset = priv->mtr_reg_share ? MLX5_MTR_COLOR_BITS : 0;
4694         uint8_t mtr_reg_bits = priv->mtr_reg_share ?
4695                                 MLX5_MTR_IDLE_BITS_IN_COLOR_REG : MLX5_REG_BITS;
4696         uint32_t flow_id = 0;
4697         uint32_t flow_id_reversed = 0;
4698         uint8_t flow_id_bits = 0;
4699         int shift;
4700
4701         /* For ASO meter, meter must be before tag in TX direction. */
4702         if (mtr_first) {
4703                 action_pre_head = actions_pre++;
4704                 /* Leave space for tag action. */
4705                 tag_action = actions_pre++;
4706         }
4707         /* Prepare the actions for prefix and suffix flow. */
4708         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
4709                 struct rte_flow_action *action_cur = NULL;
4710
4711                 switch (actions->type) {
4712                 case RTE_FLOW_ACTION_TYPE_METER:
4713                         if (mtr_first) {
4714                                 action_cur = action_pre_head;
4715                         } else {
4716                                 /* Leave space for tag action. */
4717                                 tag_action = actions_pre++;
4718                                 action_cur = actions_pre++;
4719                         }
4720                         break;
4721                 case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP:
4722                 case RTE_FLOW_ACTION_TYPE_NVGRE_DECAP:
4723                         action_cur = actions_pre++;
4724                         break;
4725                 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
4726                         raw_encap = actions->conf;
4727                         if (raw_encap->size < MLX5_ENCAPSULATION_DECISION_SIZE)
4728                                 action_cur = actions_pre++;
4729                         break;
4730                 case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
4731                         raw_decap = actions->conf;
4732                         if (raw_decap->size > MLX5_ENCAPSULATION_DECISION_SIZE)
4733                                 action_cur = actions_pre++;
4734                         break;
4735                 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
4736                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
4737                         copy_vlan = true;
4738                         break;
4739                 default:
4740                         break;
4741                 }
4742                 if (!action_cur)
4743                         action_cur = (fm->def_policy) ?
4744                                         actions_sfx++ : actions_pre++;
4745                 memcpy(action_cur, actions, sizeof(struct rte_flow_action));
4746         }
4747         /* Add end action to the actions. */
4748         actions_sfx->type = RTE_FLOW_ACTION_TYPE_END;
4749         if (priv->sh->meter_aso_en) {
4750                 /**
4751                  * For ASO meter, need to add an extra jump action explicitly,
4752                  * to jump from meter to policer table.
4753                  */
4754                 struct mlx5_flow_meter_sub_policy *sub_policy;
4755                 struct mlx5_flow_tbl_data_entry *tbl_data;
4756
4757                 if (!fm->def_policy) {
4758                         sub_policy = get_meter_sub_policy(dev, flow,
4759                                                           fm->policy_id, attr,
4760                                                           items, error);
4761                         if (!sub_policy)
4762                                 return -rte_errno;
4763                 } else {
4764                         enum mlx5_meter_domain mtr_domain =
4765                         attr->transfer ? MLX5_MTR_DOMAIN_TRANSFER :
4766                                 attr->egress ? MLX5_MTR_DOMAIN_EGRESS :
4767                                         MLX5_MTR_DOMAIN_INGRESS;
4768
4769                         sub_policy =
4770                         &priv->sh->mtrmng->def_policy[mtr_domain]->sub_policy;
4771                 }
4772                 tbl_data = container_of(sub_policy->tbl_rsc,
4773                                         struct mlx5_flow_tbl_data_entry, tbl);
4774                 hw_mtr_action = actions_pre++;
4775                 hw_mtr_action->type = (enum rte_flow_action_type)
4776                                       MLX5_RTE_FLOW_ACTION_TYPE_JUMP;
4777                 hw_mtr_action->conf = tbl_data->jump.action;
4778         }
4779         actions_pre->type = RTE_FLOW_ACTION_TYPE_END;
4780         actions_pre++;
4781         if (!tag_action)
4782                 return rte_flow_error_set(error, ENOMEM,
4783                                         RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
4784                                         "No tag action space.");
4785         if (!mtr_flow_id) {
4786                 tag_action->type = RTE_FLOW_ACTION_TYPE_VOID;
4787                 goto exit;
4788         }
4789         /* Only default-policy Meter creates mtr flow id. */
4790         if (fm->def_policy) {
4791                 mlx5_ipool_malloc(fm->flow_ipool, &tag_id);
4792                 if (!tag_id)
4793                         return rte_flow_error_set(error, ENOMEM,
4794                                         RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
4795                                         "Failed to allocate meter flow id.");
4796                 flow_id = tag_id - 1;
4797                 flow_id_bits = MLX5_REG_BITS - __builtin_clz(flow_id);
4798                 flow_id_bits = flow_id_bits ? flow_id_bits : 1;
4799                 if ((flow_id_bits + priv->sh->mtrmng->max_mtr_bits) >
4800                     mtr_reg_bits) {
4801                         mlx5_ipool_free(fm->flow_ipool, tag_id);
4802                         return rte_flow_error_set(error, EINVAL,
4803                                         RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
4804                                         "Meter flow id exceeds max limit.");
4805                 }
4806                 if (flow_id_bits > priv->sh->mtrmng->max_mtr_flow_bits)
4807                         priv->sh->mtrmng->max_mtr_flow_bits = flow_id_bits;
4808         }
4809         /* Prepare the suffix subflow items. */
4810         tag_item = sfx_items++;
4811         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
4812                 int item_type = items->type;
4813
4814                 switch (item_type) {
4815                 case RTE_FLOW_ITEM_TYPE_PORT_ID:
4816                         memcpy(sfx_items, items, sizeof(*sfx_items));
4817                         sfx_items++;
4818                         break;
4819                 case RTE_FLOW_ITEM_TYPE_VLAN:
4820                         if (copy_vlan) {
4821                                 memcpy(sfx_items, items, sizeof(*sfx_items));
4822                                 /*
4823                                  * Convert to internal match item, it is used
4824                                  * for vlan push and set vid.
4825                                  */
4826                                 sfx_items->type = (enum rte_flow_item_type)
4827                                                   MLX5_RTE_FLOW_ITEM_TYPE_VLAN;
4828                                 sfx_items++;
4829                         }
4830                         break;
4831                 default:
4832                         break;
4833                 }
4834         }
4835         sfx_items->type = RTE_FLOW_ITEM_TYPE_END;
4836         sfx_items++;
4837         /* Build tag actions and items for meter_id/meter flow_id. */
4838         set_tag = (struct mlx5_rte_flow_action_set_tag *)actions_pre;
4839         tag_item_spec = (struct mlx5_rte_flow_item_tag *)sfx_items;
4840         tag_item_mask = tag_item_spec + 1;
4841         /* Both flow_id and meter_id share the same register. */
4842         *set_tag = (struct mlx5_rte_flow_action_set_tag) {
4843                 .id = (enum modify_reg)mlx5_flow_get_reg_id(dev, MLX5_MTR_ID,
4844                                                             0, error),
4845                 .offset = mtr_id_offset,
4846                 .length = mtr_reg_bits,
4847                 .data = flow->meter,
4848         };
4849         /*
4850          * The color Reg bits used by flow_id are growing from
4851          * msb to lsb, so must do bit reverse for flow_id val in RegC.
4852          */
4853         for (shift = 0; shift < flow_id_bits; shift++)
4854                 flow_id_reversed = (flow_id_reversed << 1) |
4855                                 ((flow_id >> shift) & 0x1);
4856         set_tag->data |=
4857                 flow_id_reversed << (mtr_reg_bits - flow_id_bits);
4858         tag_item_spec->id = set_tag->id;
4859         tag_item_spec->data = set_tag->data << mtr_id_offset;
4860         tag_item_mask->data = UINT32_MAX << mtr_id_offset;
4861         tag_action->type = (enum rte_flow_action_type)
4862                                 MLX5_RTE_FLOW_ACTION_TYPE_TAG;
4863         tag_action->conf = set_tag;
4864         tag_item->type = (enum rte_flow_item_type)
4865                                 MLX5_RTE_FLOW_ITEM_TYPE_TAG;
4866         tag_item->spec = tag_item_spec;
4867         tag_item->last = NULL;
4868         tag_item->mask = tag_item_mask;
4869 exit:
4870         if (mtr_flow_id)
4871                 *mtr_flow_id = tag_id;
4872         return 0;
4873 }
4874
4875 /**
4876  * Split action list having QUEUE/RSS for metadata register copy.
4877  *
4878  * Once Q/RSS action is detected in user's action list, the flow action
4879  * should be split in order to copy metadata registers, which will happen in
4880  * RX_CP_TBL like,
4881  *   - CQE->flow_tag := reg_c[1] (MARK)
4882  *   - CQE->flow_table_metadata (reg_b) := reg_c[0] (META)
4883  * The Q/RSS action will be performed on RX_ACT_TBL after passing by RX_CP_TBL.
4884  * This is because the last action of each flow must be a terminal action
4885  * (QUEUE, RSS or DROP).
4886  *
4887  * Flow ID must be allocated to identify actions in the RX_ACT_TBL and it is
4888  * stored and kept in the mlx5_flow structure per each sub_flow.
4889  *
4890  * The Q/RSS action is replaced with,
4891  *   - SET_TAG, setting the allocated flow ID to reg_c[2].
4892  * And the following JUMP action is added at the end,
4893  *   - JUMP, to RX_CP_TBL.
4894  *
4895  * A flow to perform remained Q/RSS action will be created in RX_ACT_TBL by
4896  * flow_create_split_metadata() routine. The flow will look like,
4897  *   - If flow ID matches (reg_c[2]), perform Q/RSS.
4898  *
4899  * @param dev
4900  *   Pointer to Ethernet device.
4901  * @param[out] split_actions
4902  *   Pointer to store split actions to jump to CP_TBL.
4903  * @param[in] actions
4904  *   Pointer to the list of original flow actions.
4905  * @param[in] qrss
4906  *   Pointer to the Q/RSS action.
4907  * @param[in] actions_n
4908  *   Number of original actions.
4909  * @param[out] error
4910  *   Perform verbose error reporting if not NULL.
4911  *
4912  * @return
4913  *   non-zero unique flow_id on success, otherwise 0 and
4914  *   error/rte_error are set.
4915  */
4916 static uint32_t
4917 flow_mreg_split_qrss_prep(struct rte_eth_dev *dev,
4918                           struct rte_flow_action *split_actions,
4919                           const struct rte_flow_action *actions,
4920                           const struct rte_flow_action *qrss,
4921                           int actions_n, struct rte_flow_error *error)
4922 {
4923         struct mlx5_priv *priv = dev->data->dev_private;
4924         struct mlx5_rte_flow_action_set_tag *set_tag;
4925         struct rte_flow_action_jump *jump;
4926         const int qrss_idx = qrss - actions;
4927         uint32_t flow_id = 0;
4928         int ret = 0;
4929
4930         /*
4931          * Given actions will be split
4932          * - Replace QUEUE/RSS action with SET_TAG to set flow ID.
4933          * - Add jump to mreg CP_TBL.
4934          * As a result, there will be one more action.
4935          */
4936         ++actions_n;
4937         memcpy(split_actions, actions, sizeof(*split_actions) * actions_n);
4938         set_tag = (void *)(split_actions + actions_n);
4939         /*
4940          * If tag action is not set to void(it means we are not the meter
4941          * suffix flow), add the tag action. Since meter suffix flow already
4942          * has the tag added.
4943          */
4944         if (split_actions[qrss_idx].type != RTE_FLOW_ACTION_TYPE_VOID) {
4945                 /*
4946                  * Allocate the new subflow ID. This one is unique within
4947                  * device and not shared with representors. Otherwise,
4948                  * we would have to resolve multi-thread access synch
4949                  * issue. Each flow on the shared device is appended
4950                  * with source vport identifier, so the resulting
4951                  * flows will be unique in the shared (by master and
4952                  * representors) domain even if they have coinciding
4953                  * IDs.
4954                  */
4955                 mlx5_ipool_malloc(priv->sh->ipool
4956                                   [MLX5_IPOOL_RSS_EXPANTION_FLOW_ID], &flow_id);
4957                 if (!flow_id)
4958                         return rte_flow_error_set(error, ENOMEM,
4959                                                   RTE_FLOW_ERROR_TYPE_ACTION,
4960                                                   NULL, "can't allocate id "
4961                                                   "for split Q/RSS subflow");
4962                 /* Internal SET_TAG action to set flow ID. */
4963                 *set_tag = (struct mlx5_rte_flow_action_set_tag){
4964                         .data = flow_id,
4965                 };
4966                 ret = mlx5_flow_get_reg_id(dev, MLX5_COPY_MARK, 0, error);
4967                 if (ret < 0)
4968                         return ret;
4969                 set_tag->id = ret;
4970                 /* Construct new actions array. */
4971                 /* Replace QUEUE/RSS action. */
4972                 split_actions[qrss_idx] = (struct rte_flow_action){
4973                         .type = (enum rte_flow_action_type)
4974                                 MLX5_RTE_FLOW_ACTION_TYPE_TAG,
4975                         .conf = set_tag,
4976                 };
4977         }
4978         /* JUMP action to jump to mreg copy table (CP_TBL). */
4979         jump = (void *)(set_tag + 1);
4980         *jump = (struct rte_flow_action_jump){
4981                 .group = MLX5_FLOW_MREG_CP_TABLE_GROUP,
4982         };
4983         split_actions[actions_n - 2] = (struct rte_flow_action){
4984                 .type = RTE_FLOW_ACTION_TYPE_JUMP,
4985                 .conf = jump,
4986         };
4987         split_actions[actions_n - 1] = (struct rte_flow_action){
4988                 .type = RTE_FLOW_ACTION_TYPE_END,
4989         };
4990         return flow_id;
4991 }
4992
4993 /**
4994  * Extend the given action list for Tx metadata copy.
4995  *
4996  * Copy the given action list to the ext_actions and add flow metadata register
4997  * copy action in order to copy reg_a set by WQE to reg_c[0].
4998  *
4999  * @param[out] ext_actions
5000  *   Pointer to the extended action list.
5001  * @param[in] actions
5002  *   Pointer to the list of actions.
5003  * @param[in] actions_n
5004  *   Number of actions in the list.
5005  * @param[out] error
5006  *   Perform verbose error reporting if not NULL.
5007  * @param[in] encap_idx
5008  *   The encap action inndex.
5009  *
5010  * @return
5011  *   0 on success, negative value otherwise
5012  */
5013 static int
5014 flow_mreg_tx_copy_prep(struct rte_eth_dev *dev,
5015                        struct rte_flow_action *ext_actions,
5016                        const struct rte_flow_action *actions,
5017                        int actions_n, struct rte_flow_error *error,
5018                        int encap_idx)
5019 {
5020         struct mlx5_flow_action_copy_mreg *cp_mreg =
5021                 (struct mlx5_flow_action_copy_mreg *)
5022                         (ext_actions + actions_n + 1);
5023         int ret;
5024
5025         ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_RX, 0, error);
5026         if (ret < 0)
5027                 return ret;
5028         cp_mreg->dst = ret;
5029         ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_TX, 0, error);
5030         if (ret < 0)
5031                 return ret;
5032         cp_mreg->src = ret;
5033         if (encap_idx != 0)
5034                 memcpy(ext_actions, actions, sizeof(*ext_actions) * encap_idx);
5035         if (encap_idx == actions_n - 1) {
5036                 ext_actions[actions_n - 1] = (struct rte_flow_action){
5037                         .type = (enum rte_flow_action_type)
5038                                 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
5039                         .conf = cp_mreg,
5040                 };
5041                 ext_actions[actions_n] = (struct rte_flow_action){
5042                         .type = RTE_FLOW_ACTION_TYPE_END,
5043                 };
5044         } else {
5045                 ext_actions[encap_idx] = (struct rte_flow_action){
5046                         .type = (enum rte_flow_action_type)
5047                                 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
5048                         .conf = cp_mreg,
5049                 };
5050                 memcpy(ext_actions + encap_idx + 1, actions + encap_idx,
5051                                 sizeof(*ext_actions) * (actions_n - encap_idx));
5052         }
5053         return 0;
5054 }
5055
5056 /**
5057  * Check the match action from the action list.
5058  *
5059  * @param[in] actions
5060  *   Pointer to the list of actions.
5061  * @param[in] attr
5062  *   Flow rule attributes.
5063  * @param[in] action
5064  *   The action to be check if exist.
5065  * @param[out] match_action_pos
5066  *   Pointer to the position of the matched action if exists, otherwise is -1.
5067  * @param[out] qrss_action_pos
5068  *   Pointer to the position of the Queue/RSS action if exists, otherwise is -1.
5069  * @param[out] modify_after_mirror
5070  *   Pointer to the flag of modify action after FDB mirroring.
5071  *
5072  * @return
5073  *   > 0 the total number of actions.
5074  *   0 if not found match action in action list.
5075  */
5076 static int
5077 flow_check_match_action(const struct rte_flow_action actions[],
5078                         const struct rte_flow_attr *attr,
5079                         enum rte_flow_action_type action,
5080                         int *match_action_pos, int *qrss_action_pos,
5081                         int *modify_after_mirror)
5082 {
5083         const struct rte_flow_action_sample *sample;
5084         int actions_n = 0;
5085         uint32_t ratio = 0;
5086         int sub_type = 0;
5087         int flag = 0;
5088         int fdb_mirror = 0;
5089
5090         *match_action_pos = -1;
5091         *qrss_action_pos = -1;
5092         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
5093                 if (actions->type == action) {
5094                         flag = 1;
5095                         *match_action_pos = actions_n;
5096                 }
5097                 switch (actions->type) {
5098                 case RTE_FLOW_ACTION_TYPE_QUEUE:
5099                 case RTE_FLOW_ACTION_TYPE_RSS:
5100                         *qrss_action_pos = actions_n;
5101                         break;
5102                 case RTE_FLOW_ACTION_TYPE_SAMPLE:
5103                         sample = actions->conf;
5104                         ratio = sample->ratio;
5105                         sub_type = ((const struct rte_flow_action *)
5106                                         (sample->actions))->type;
5107                         if (ratio == 1 && attr->transfer)
5108                                 fdb_mirror = 1;
5109                         break;
5110                 case RTE_FLOW_ACTION_TYPE_SET_MAC_SRC:
5111                 case RTE_FLOW_ACTION_TYPE_SET_MAC_DST:
5112                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC:
5113                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DST:
5114                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_SRC:
5115                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DST:
5116                 case RTE_FLOW_ACTION_TYPE_SET_TP_SRC:
5117                 case RTE_FLOW_ACTION_TYPE_SET_TP_DST:
5118                 case RTE_FLOW_ACTION_TYPE_DEC_TTL:
5119                 case RTE_FLOW_ACTION_TYPE_SET_TTL:
5120                 case RTE_FLOW_ACTION_TYPE_INC_TCP_SEQ:
5121                 case RTE_FLOW_ACTION_TYPE_DEC_TCP_SEQ:
5122                 case RTE_FLOW_ACTION_TYPE_INC_TCP_ACK:
5123                 case RTE_FLOW_ACTION_TYPE_DEC_TCP_ACK:
5124                 case RTE_FLOW_ACTION_TYPE_SET_IPV4_DSCP:
5125                 case RTE_FLOW_ACTION_TYPE_SET_IPV6_DSCP:
5126                 case RTE_FLOW_ACTION_TYPE_FLAG:
5127                 case RTE_FLOW_ACTION_TYPE_MARK:
5128                 case RTE_FLOW_ACTION_TYPE_SET_META:
5129                 case RTE_FLOW_ACTION_TYPE_SET_TAG:
5130                 case RTE_FLOW_ACTION_TYPE_OF_POP_VLAN:
5131                 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
5132                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
5133                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
5134                 case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP:
5135                 case RTE_FLOW_ACTION_TYPE_NVGRE_DECAP:
5136                 case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
5137                 case RTE_FLOW_ACTION_TYPE_MODIFY_FIELD:
5138                         if (fdb_mirror)
5139                                 *modify_after_mirror = 1;
5140                         break;
5141                 default:
5142                         break;
5143                 }
5144                 actions_n++;
5145         }
5146         if (flag && fdb_mirror && !*modify_after_mirror) {
5147                 /* FDB mirroring uses the destination array to implement
5148                  * instead of FLOW_SAMPLER object.
5149                  */
5150                 if (sub_type != RTE_FLOW_ACTION_TYPE_END)
5151                         flag = 0;
5152         }
5153         /* Count RTE_FLOW_ACTION_TYPE_END. */
5154         return flag ? actions_n + 1 : 0;
5155 }
5156
5157 #define SAMPLE_SUFFIX_ITEM 2
5158
5159 /**
5160  * Split the sample flow.
5161  *
5162  * As sample flow will split to two sub flow, sample flow with
5163  * sample action, the other actions will move to new suffix flow.
5164  *
5165  * Also add unique tag id with tag action in the sample flow,
5166  * the same tag id will be as match in the suffix flow.
5167  *
5168  * @param dev
5169  *   Pointer to Ethernet device.
5170  * @param[in] add_tag
5171  *   Add extra tag action flag.
5172  * @param[out] sfx_items
5173  *   Suffix flow match items (list terminated by the END pattern item).
5174  * @param[in] actions
5175  *   Associated actions (list terminated by the END action).
5176  * @param[out] actions_sfx
5177  *   Suffix flow actions.
5178  * @param[out] actions_pre
5179  *   Prefix flow actions.
5180  * @param[in] actions_n
5181  *  The total number of actions.
5182  * @param[in] sample_action_pos
5183  *   The sample action position.
5184  * @param[in] qrss_action_pos
5185  *   The Queue/RSS action position.
5186  * @param[in] jump_table
5187  *   Add extra jump action flag.
5188  * @param[out] error
5189  *   Perform verbose error reporting if not NULL.
5190  *
5191  * @return
5192  *   0 on success, or unique flow_id, a negative errno value
5193  *   otherwise and rte_errno is set.
5194  */
5195 static int
5196 flow_sample_split_prep(struct rte_eth_dev *dev,
5197                        int add_tag,
5198                        struct rte_flow_item sfx_items[],
5199                        const struct rte_flow_action actions[],
5200                        struct rte_flow_action actions_sfx[],
5201                        struct rte_flow_action actions_pre[],
5202                        int actions_n,
5203                        int sample_action_pos,
5204                        int qrss_action_pos,
5205                        int jump_table,
5206                        struct rte_flow_error *error)
5207 {
5208         struct mlx5_priv *priv = dev->data->dev_private;
5209         struct mlx5_rte_flow_action_set_tag *set_tag;
5210         struct mlx5_rte_flow_item_tag *tag_spec;
5211         struct mlx5_rte_flow_item_tag *tag_mask;
5212         struct rte_flow_action_jump *jump_action;
5213         uint32_t tag_id = 0;
5214         int index;
5215         int append_index = 0;
5216         int ret;
5217
5218         if (sample_action_pos < 0)
5219                 return rte_flow_error_set(error, EINVAL,
5220                                           RTE_FLOW_ERROR_TYPE_ACTION,
5221                                           NULL, "invalid position of sample "
5222                                           "action in list");
5223         /* Prepare the actions for prefix and suffix flow. */
5224         if (qrss_action_pos >= 0 && qrss_action_pos < sample_action_pos) {
5225                 index = qrss_action_pos;
5226                 /* Put the preceding the Queue/RSS action into prefix flow. */
5227                 if (index != 0)
5228                         memcpy(actions_pre, actions,
5229                                sizeof(struct rte_flow_action) * index);
5230                 /* Put others preceding the sample action into prefix flow. */
5231                 if (sample_action_pos > index + 1)
5232                         memcpy(actions_pre + index, actions + index + 1,
5233                                sizeof(struct rte_flow_action) *
5234                                (sample_action_pos - index - 1));
5235                 index = sample_action_pos - 1;
5236                 /* Put Queue/RSS action into Suffix flow. */
5237                 memcpy(actions_sfx, actions + qrss_action_pos,
5238                        sizeof(struct rte_flow_action));
5239                 actions_sfx++;
5240         } else {
5241                 index = sample_action_pos;
5242                 if (index != 0)
5243                         memcpy(actions_pre, actions,
5244                                sizeof(struct rte_flow_action) * index);
5245         }
5246         /* For CX5, add an extra tag action for NIC-RX and E-Switch ingress.
5247          * For CX6DX and above, metadata registers Cx preserve their value,
5248          * add an extra tag action for NIC-RX and E-Switch Domain.
5249          */
5250         if (add_tag) {
5251                 /* Prepare the prefix tag action. */
5252                 append_index++;
5253                 set_tag = (void *)(actions_pre + actions_n + append_index);
5254                 ret = mlx5_flow_get_reg_id(dev, MLX5_APP_TAG, 0, error);
5255                 if (ret < 0)
5256                         return ret;
5257                 mlx5_ipool_malloc(priv->sh->ipool
5258                                   [MLX5_IPOOL_RSS_EXPANTION_FLOW_ID], &tag_id);
5259                 *set_tag = (struct mlx5_rte_flow_action_set_tag) {
5260                         .id = ret,
5261                         .data = tag_id,
5262                 };
5263                 /* Prepare the suffix subflow items. */
5264                 tag_spec = (void *)(sfx_items + SAMPLE_SUFFIX_ITEM);
5265                 tag_spec->data = tag_id;
5266                 tag_spec->id = set_tag->id;
5267                 tag_mask = tag_spec + 1;
5268                 tag_mask->data = UINT32_MAX;
5269                 sfx_items[0] = (struct rte_flow_item){
5270                         .type = (enum rte_flow_item_type)
5271                                 MLX5_RTE_FLOW_ITEM_TYPE_TAG,
5272                         .spec = tag_spec,
5273                         .last = NULL,
5274                         .mask = tag_mask,
5275                 };
5276                 sfx_items[1] = (struct rte_flow_item){
5277                         .type = (enum rte_flow_item_type)
5278                                 RTE_FLOW_ITEM_TYPE_END,
5279                 };
5280                 /* Prepare the tag action in prefix subflow. */
5281                 actions_pre[index++] =
5282                         (struct rte_flow_action){
5283                         .type = (enum rte_flow_action_type)
5284                                 MLX5_RTE_FLOW_ACTION_TYPE_TAG,
5285                         .conf = set_tag,
5286                 };
5287         }
5288         memcpy(actions_pre + index, actions + sample_action_pos,
5289                sizeof(struct rte_flow_action));
5290         index += 1;
5291         /* For the modify action after the sample action in E-Switch mirroring,
5292          * Add the extra jump action in prefix subflow and jump into the next
5293          * table, then do the modify action in the new table.
5294          */
5295         if (jump_table) {
5296                 /* Prepare the prefix jump action. */
5297                 append_index++;
5298                 jump_action = (void *)(actions_pre + actions_n + append_index);
5299                 jump_action->group = jump_table;
5300                 actions_pre[index++] =
5301                         (struct rte_flow_action){
5302                         .type = (enum rte_flow_action_type)
5303                                 RTE_FLOW_ACTION_TYPE_JUMP,
5304                         .conf = jump_action,
5305                 };
5306         }
5307         actions_pre[index] = (struct rte_flow_action){
5308                 .type = (enum rte_flow_action_type)
5309                         RTE_FLOW_ACTION_TYPE_END,
5310         };
5311         /* Put the actions after sample into Suffix flow. */
5312         memcpy(actions_sfx, actions + sample_action_pos + 1,
5313                sizeof(struct rte_flow_action) *
5314                (actions_n - sample_action_pos - 1));
5315         return tag_id;
5316 }
5317
5318 /**
5319  * The splitting for metadata feature.
5320  *
5321  * - Q/RSS action on NIC Rx should be split in order to pass by
5322  *   the mreg copy table (RX_CP_TBL) and then it jumps to the
5323  *   action table (RX_ACT_TBL) which has the split Q/RSS action.
5324  *
5325  * - All the actions on NIC Tx should have a mreg copy action to
5326  *   copy reg_a from WQE to reg_c[0].
5327  *
5328  * @param dev
5329  *   Pointer to Ethernet device.
5330  * @param[in] flow
5331  *   Parent flow structure pointer.
5332  * @param[in] attr
5333  *   Flow rule attributes.
5334  * @param[in] items
5335  *   Pattern specification (list terminated by the END pattern item).
5336  * @param[in] actions
5337  *   Associated actions (list terminated by the END action).
5338  * @param[in] flow_split_info
5339  *   Pointer to flow split info structure.
5340  * @param[out] error
5341  *   Perform verbose error reporting if not NULL.
5342  * @return
5343  *   0 on success, negative value otherwise
5344  */
5345 static int
5346 flow_create_split_metadata(struct rte_eth_dev *dev,
5347                            struct rte_flow *flow,
5348                            const struct rte_flow_attr *attr,
5349                            const struct rte_flow_item items[],
5350                            const struct rte_flow_action actions[],
5351                            struct mlx5_flow_split_info *flow_split_info,
5352                            struct rte_flow_error *error)
5353 {
5354         struct mlx5_priv *priv = dev->data->dev_private;
5355         struct mlx5_dev_config *config = &priv->config;
5356         const struct rte_flow_action *qrss = NULL;
5357         struct rte_flow_action *ext_actions = NULL;
5358         struct mlx5_flow *dev_flow = NULL;
5359         uint32_t qrss_id = 0;
5360         int mtr_sfx = 0;
5361         size_t act_size;
5362         int actions_n;
5363         int encap_idx;
5364         int ret;
5365
5366         /* Check whether extensive metadata feature is engaged. */
5367         if (!config->dv_flow_en ||
5368             config->dv_xmeta_en == MLX5_XMETA_MODE_LEGACY ||
5369             !mlx5_flow_ext_mreg_supported(dev))
5370                 return flow_create_split_inner(dev, flow, NULL, attr, items,
5371                                                actions, flow_split_info, error);
5372         actions_n = flow_parse_metadata_split_actions_info(actions, &qrss,
5373                                                            &encap_idx);
5374         if (qrss) {
5375                 /* Exclude hairpin flows from splitting. */
5376                 if (qrss->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
5377                         const struct rte_flow_action_queue *queue;
5378
5379                         queue = qrss->conf;
5380                         if (mlx5_rxq_get_type(dev, queue->index) ==
5381                             MLX5_RXQ_TYPE_HAIRPIN)
5382                                 qrss = NULL;
5383                 } else if (qrss->type == RTE_FLOW_ACTION_TYPE_RSS) {
5384                         const struct rte_flow_action_rss *rss;
5385
5386                         rss = qrss->conf;
5387                         if (mlx5_rxq_get_type(dev, rss->queue[0]) ==
5388                             MLX5_RXQ_TYPE_HAIRPIN)
5389                                 qrss = NULL;
5390                 }
5391         }
5392         if (qrss) {
5393                 /* Check if it is in meter suffix table. */
5394                 mtr_sfx = attr->group == (attr->transfer ?
5395                           (MLX5_FLOW_TABLE_LEVEL_METER - 1) :
5396                           MLX5_FLOW_TABLE_LEVEL_METER);
5397                 /*
5398                  * Q/RSS action on NIC Rx should be split in order to pass by
5399                  * the mreg copy table (RX_CP_TBL) and then it jumps to the
5400                  * action table (RX_ACT_TBL) which has the split Q/RSS action.
5401                  */
5402                 act_size = sizeof(struct rte_flow_action) * (actions_n + 1) +
5403                            sizeof(struct rte_flow_action_set_tag) +
5404                            sizeof(struct rte_flow_action_jump);
5405                 ext_actions = mlx5_malloc(MLX5_MEM_ZERO, act_size, 0,
5406                                           SOCKET_ID_ANY);
5407                 if (!ext_actions)
5408                         return rte_flow_error_set(error, ENOMEM,
5409                                                   RTE_FLOW_ERROR_TYPE_ACTION,
5410                                                   NULL, "no memory to split "
5411                                                   "metadata flow");
5412                 /*
5413                  * If we are the suffix flow of meter, tag already exist.
5414                  * Set the tag action to void.
5415                  */
5416                 if (mtr_sfx)
5417                         ext_actions[qrss - actions].type =
5418                                                 RTE_FLOW_ACTION_TYPE_VOID;
5419                 else
5420                         ext_actions[qrss - actions].type =
5421                                                 (enum rte_flow_action_type)
5422                                                 MLX5_RTE_FLOW_ACTION_TYPE_TAG;
5423                 /*
5424                  * Create the new actions list with removed Q/RSS action
5425                  * and appended set tag and jump to register copy table
5426                  * (RX_CP_TBL). We should preallocate unique tag ID here
5427                  * in advance, because it is needed for set tag action.
5428                  */
5429                 qrss_id = flow_mreg_split_qrss_prep(dev, ext_actions, actions,
5430                                                     qrss, actions_n, error);
5431                 if (!mtr_sfx && !qrss_id) {
5432                         ret = -rte_errno;
5433                         goto exit;
5434                 }
5435         } else if (attr->egress && !attr->transfer) {
5436                 /*
5437                  * All the actions on NIC Tx should have a metadata register
5438                  * copy action to copy reg_a from WQE to reg_c[meta]
5439                  */
5440                 act_size = sizeof(struct rte_flow_action) * (actions_n + 1) +
5441                            sizeof(struct mlx5_flow_action_copy_mreg);
5442                 ext_actions = mlx5_malloc(MLX5_MEM_ZERO, act_size, 0,
5443                                           SOCKET_ID_ANY);
5444                 if (!ext_actions)
5445                         return rte_flow_error_set(error, ENOMEM,
5446                                                   RTE_FLOW_ERROR_TYPE_ACTION,
5447                                                   NULL, "no memory to split "
5448                                                   "metadata flow");
5449                 /* Create the action list appended with copy register. */
5450                 ret = flow_mreg_tx_copy_prep(dev, ext_actions, actions,
5451                                              actions_n, error, encap_idx);
5452                 if (ret < 0)
5453                         goto exit;
5454         }
5455         /* Add the unmodified original or prefix subflow. */
5456         ret = flow_create_split_inner(dev, flow, &dev_flow, attr,
5457                                       items, ext_actions ? ext_actions :
5458                                       actions, flow_split_info, error);
5459         if (ret < 0)
5460                 goto exit;
5461         MLX5_ASSERT(dev_flow);
5462         if (qrss) {
5463                 const struct rte_flow_attr q_attr = {
5464                         .group = MLX5_FLOW_MREG_ACT_TABLE_GROUP,
5465                         .ingress = 1,
5466                 };
5467                 /* Internal PMD action to set register. */
5468                 struct mlx5_rte_flow_item_tag q_tag_spec = {
5469                         .data = qrss_id,
5470                         .id = REG_NON,
5471                 };
5472                 struct rte_flow_item q_items[] = {
5473                         {
5474                                 .type = (enum rte_flow_item_type)
5475                                         MLX5_RTE_FLOW_ITEM_TYPE_TAG,
5476                                 .spec = &q_tag_spec,
5477                                 .last = NULL,
5478                                 .mask = NULL,
5479                         },
5480                         {
5481                                 .type = RTE_FLOW_ITEM_TYPE_END,
5482                         },
5483                 };
5484                 struct rte_flow_action q_actions[] = {
5485                         {
5486                                 .type = qrss->type,
5487                                 .conf = qrss->conf,
5488                         },
5489                         {
5490                                 .type = RTE_FLOW_ACTION_TYPE_END,
5491                         },
5492                 };
5493                 uint64_t layers = flow_get_prefix_layer_flags(dev_flow);
5494
5495                 /*
5496                  * Configure the tag item only if there is no meter subflow.
5497                  * Since tag is already marked in the meter suffix subflow
5498                  * we can just use the meter suffix items as is.
5499                  */
5500                 if (qrss_id) {
5501                         /* Not meter subflow. */
5502                         MLX5_ASSERT(!mtr_sfx);
5503                         /*
5504                          * Put unique id in prefix flow due to it is destroyed
5505                          * after suffix flow and id will be freed after there
5506                          * is no actual flows with this id and identifier
5507                          * reallocation becomes possible (for example, for
5508                          * other flows in other threads).
5509                          */
5510                         dev_flow->handle->split_flow_id = qrss_id;
5511                         ret = mlx5_flow_get_reg_id(dev, MLX5_COPY_MARK, 0,
5512                                                    error);
5513                         if (ret < 0)
5514                                 goto exit;
5515                         q_tag_spec.id = ret;
5516                 }
5517                 dev_flow = NULL;
5518                 /* Add suffix subflow to execute Q/RSS. */
5519                 flow_split_info->prefix_layers = layers;
5520                 flow_split_info->prefix_mark = 0;
5521                 ret = flow_create_split_inner(dev, flow, &dev_flow,
5522                                               &q_attr, mtr_sfx ? items :
5523                                               q_items, q_actions,
5524                                               flow_split_info, error);
5525                 if (ret < 0)
5526                         goto exit;
5527                 /* qrss ID should be freed if failed. */
5528                 qrss_id = 0;
5529                 MLX5_ASSERT(dev_flow);
5530         }
5531
5532 exit:
5533         /*
5534          * We do not destroy the partially created sub_flows in case of error.
5535          * These ones are included into parent flow list and will be destroyed
5536          * by flow_drv_destroy.
5537          */
5538         mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_RSS_EXPANTION_FLOW_ID],
5539                         qrss_id);
5540         mlx5_free(ext_actions);
5541         return ret;
5542 }
5543
5544 /**
5545  * Create meter internal drop flow with the original pattern.
5546  *
5547  * @param dev
5548  *   Pointer to Ethernet device.
5549  * @param[in] flow
5550  *   Parent flow structure pointer.
5551  * @param[in] attr
5552  *   Flow rule attributes.
5553  * @param[in] items
5554  *   Pattern specification (list terminated by the END pattern item).
5555  * @param[in] flow_split_info
5556  *   Pointer to flow split info structure.
5557  * @param[in] fm
5558  *   Pointer to flow meter structure.
5559  * @param[out] error
5560  *   Perform verbose error reporting if not NULL.
5561  * @return
5562  *   0 on success, negative value otherwise
5563  */
5564 static uint32_t
5565 flow_meter_create_drop_flow_with_org_pattern(struct rte_eth_dev *dev,
5566                         struct rte_flow *flow,
5567                         const struct rte_flow_attr *attr,
5568                         const struct rte_flow_item items[],
5569                         struct mlx5_flow_split_info *flow_split_info,
5570                         struct mlx5_flow_meter_info *fm,
5571                         struct rte_flow_error *error)
5572 {
5573         struct mlx5_flow *dev_flow = NULL;
5574         struct rte_flow_attr drop_attr = *attr;
5575         struct rte_flow_action drop_actions[3];
5576         struct mlx5_flow_split_info drop_split_info = *flow_split_info;
5577
5578         MLX5_ASSERT(fm->drop_cnt);
5579         drop_actions[0].type =
5580                 (enum rte_flow_action_type)MLX5_RTE_FLOW_ACTION_TYPE_COUNT;
5581         drop_actions[0].conf = (void *)(uintptr_t)fm->drop_cnt;
5582         drop_actions[1].type = RTE_FLOW_ACTION_TYPE_DROP;
5583         drop_actions[1].conf = NULL;
5584         drop_actions[2].type = RTE_FLOW_ACTION_TYPE_END;
5585         drop_actions[2].conf = NULL;
5586         drop_split_info.external = false;
5587         drop_split_info.skip_scale |= 1 << MLX5_SCALE_FLOW_GROUP_BIT;
5588         drop_split_info.table_id = MLX5_MTR_TABLE_ID_DROP;
5589         drop_attr.group = MLX5_FLOW_TABLE_LEVEL_METER;
5590         return flow_create_split_inner(dev, flow, &dev_flow,
5591                                 &drop_attr, items, drop_actions,
5592                                 &drop_split_info, error);
5593 }
5594
5595 /**
5596  * The splitting for meter feature.
5597  *
5598  * - The meter flow will be split to two flows as prefix and
5599  *   suffix flow. The packets make sense only it pass the prefix
5600  *   meter action.
5601  *
5602  * - Reg_C_5 is used for the packet to match betweend prefix and
5603  *   suffix flow.
5604  *
5605  * @param dev
5606  *   Pointer to Ethernet device.
5607  * @param[in] flow
5608  *   Parent flow structure pointer.
5609  * @param[in] attr
5610  *   Flow rule attributes.
5611  * @param[in] items
5612  *   Pattern specification (list terminated by the END pattern item).
5613  * @param[in] actions
5614  *   Associated actions (list terminated by the END action).
5615  * @param[in] flow_split_info
5616  *   Pointer to flow split info structure.
5617  * @param[out] error
5618  *   Perform verbose error reporting if not NULL.
5619  * @return
5620  *   0 on success, negative value otherwise
5621  */
5622 static int
5623 flow_create_split_meter(struct rte_eth_dev *dev,
5624                         struct rte_flow *flow,
5625                         const struct rte_flow_attr *attr,
5626                         const struct rte_flow_item items[],
5627                         const struct rte_flow_action actions[],
5628                         struct mlx5_flow_split_info *flow_split_info,
5629                         struct rte_flow_error *error)
5630 {
5631         struct mlx5_priv *priv = dev->data->dev_private;
5632         struct mlx5_flow_workspace *wks = mlx5_flow_get_thread_workspace();
5633         struct rte_flow_action *sfx_actions = NULL;
5634         struct rte_flow_action *pre_actions = NULL;
5635         struct rte_flow_item *sfx_items = NULL;
5636         struct mlx5_flow *dev_flow = NULL;
5637         struct rte_flow_attr sfx_attr = *attr;
5638         struct mlx5_flow_meter_info *fm = NULL;
5639         uint8_t skip_scale_restore;
5640         bool has_mtr = false;
5641         bool has_modify = false;
5642         bool set_mtr_reg = true;
5643         uint32_t meter_id = 0;
5644         uint32_t mtr_idx = 0;
5645         uint32_t mtr_flow_id = 0;
5646         size_t act_size;
5647         size_t item_size;
5648         int actions_n = 0;
5649         int ret = 0;
5650
5651         if (priv->mtr_en)
5652                 actions_n = flow_check_meter_action(dev, actions, &has_mtr,
5653                                                     &has_modify, &meter_id);
5654         if (has_mtr) {
5655                 if (flow->meter) {
5656                         fm = flow_dv_meter_find_by_idx(priv, flow->meter);
5657                         if (!fm)
5658                                 return rte_flow_error_set(error, EINVAL,
5659                                                 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
5660                                                 NULL, "Meter not found.");
5661                 } else {
5662                         fm = mlx5_flow_meter_find(priv, meter_id, &mtr_idx);
5663                         if (!fm)
5664                                 return rte_flow_error_set(error, EINVAL,
5665                                                 RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
5666                                                 NULL, "Meter not found.");
5667                         ret = mlx5_flow_meter_attach(priv, fm,
5668                                                      &sfx_attr, error);
5669                         if (ret)
5670                                 return -rte_errno;
5671                         flow->meter = mtr_idx;
5672                 }
5673                 MLX5_ASSERT(wks);
5674                 wks->fm = fm;
5675                 /*
5676                  * If it isn't default-policy Meter, and
5677                  * 1. There's no action in flow to change
5678                  *    packet (modify/encap/decap etc.), OR
5679                  * 2. No drop count needed for this meter.
5680                  * no need to use regC to save meter id anymore.
5681                  */
5682                 if (!fm->def_policy && (!has_modify || !fm->drop_cnt))
5683                         set_mtr_reg = false;
5684                 /* Prefix actions: meter, decap, encap, tag, jump, end. */
5685                 act_size = sizeof(struct rte_flow_action) * (actions_n + 6) +
5686                            sizeof(struct mlx5_rte_flow_action_set_tag);
5687                 /* Suffix items: tag, vlan, port id, end. */
5688 #define METER_SUFFIX_ITEM 4
5689                 item_size = sizeof(struct rte_flow_item) * METER_SUFFIX_ITEM +
5690                             sizeof(struct mlx5_rte_flow_item_tag) * 2;
5691                 sfx_actions = mlx5_malloc(MLX5_MEM_ZERO, (act_size + item_size),
5692                                           0, SOCKET_ID_ANY);
5693                 if (!sfx_actions)
5694                         return rte_flow_error_set(error, ENOMEM,
5695                                                   RTE_FLOW_ERROR_TYPE_ACTION,
5696                                                   NULL, "no memory to split "
5697                                                   "meter flow");
5698                 sfx_items = (struct rte_flow_item *)((char *)sfx_actions +
5699                              act_size);
5700                 /* There's no suffix flow for meter of non-default policy. */
5701                 if (!fm->def_policy)
5702                         pre_actions = sfx_actions + 1;
5703                 else
5704                         pre_actions = sfx_actions + actions_n;
5705                 ret = flow_meter_split_prep(dev, flow, fm, &sfx_attr,
5706                                             items, sfx_items, actions,
5707                                             sfx_actions, pre_actions,
5708                                             (set_mtr_reg ? &mtr_flow_id : NULL),
5709                                             error);
5710                 if (ret) {
5711                         ret = -rte_errno;
5712                         goto exit;
5713                 }
5714                 /* Add the prefix subflow. */
5715                 flow_split_info->prefix_mark = 0;
5716                 skip_scale_restore = flow_split_info->skip_scale;
5717                 flow_split_info->skip_scale |=
5718                         1 << MLX5_SCALE_JUMP_FLOW_GROUP_BIT;
5719                 ret = flow_create_split_inner(dev, flow, &dev_flow,
5720                                               attr, items, pre_actions,
5721                                               flow_split_info, error);
5722                 flow_split_info->skip_scale = skip_scale_restore;
5723                 if (ret) {
5724                         if (mtr_flow_id)
5725                                 mlx5_ipool_free(fm->flow_ipool, mtr_flow_id);
5726                         ret = -rte_errno;
5727                         goto exit;
5728                 }
5729                 if (mtr_flow_id) {
5730                         dev_flow->handle->split_flow_id = mtr_flow_id;
5731                         dev_flow->handle->is_meter_flow_id = 1;
5732                 }
5733                 if (!fm->def_policy) {
5734                         if (!set_mtr_reg && fm->drop_cnt)
5735                                 ret =
5736                         flow_meter_create_drop_flow_with_org_pattern(dev, flow,
5737                                                         &sfx_attr, items,
5738                                                         flow_split_info,
5739                                                         fm, error);
5740                         goto exit;
5741                 }
5742                 /* Setting the sfx group atrr. */
5743                 sfx_attr.group = sfx_attr.transfer ?
5744                                 (MLX5_FLOW_TABLE_LEVEL_METER - 1) :
5745                                  MLX5_FLOW_TABLE_LEVEL_METER;
5746                 flow_split_info->prefix_layers =
5747                                 flow_get_prefix_layer_flags(dev_flow);
5748                 flow_split_info->prefix_mark = dev_flow->handle->mark;
5749                 flow_split_info->table_id = MLX5_MTR_TABLE_ID_SUFFIX;
5750         }
5751         /* Add the prefix subflow. */
5752         ret = flow_create_split_metadata(dev, flow,
5753                                          &sfx_attr, sfx_items ?
5754                                          sfx_items : items,
5755                                          sfx_actions ? sfx_actions : actions,
5756                                          flow_split_info, error);
5757 exit:
5758         if (sfx_actions)
5759                 mlx5_free(sfx_actions);
5760         return ret;
5761 }
5762
5763 /**
5764  * The splitting for sample feature.
5765  *
5766  * Once Sample action is detected in the action list, the flow actions should
5767  * be split into prefix sub flow and suffix sub flow.
5768  *
5769  * The original items remain in the prefix sub flow, all actions preceding the
5770  * sample action and the sample action itself will be copied to the prefix
5771  * sub flow, the actions following the sample action will be copied to the
5772  * suffix sub flow, Queue action always be located in the suffix sub flow.
5773  *
5774  * In order to make the packet from prefix sub flow matches with suffix sub
5775  * flow, an extra tag action be added into prefix sub flow, and the suffix sub
5776  * flow uses tag item with the unique flow id.
5777  *
5778  * @param dev
5779  *   Pointer to Ethernet device.
5780  * @param[in] flow
5781  *   Parent flow structure pointer.
5782  * @param[in] attr
5783  *   Flow rule attributes.
5784  * @param[in] items
5785  *   Pattern specification (list terminated by the END pattern item).
5786  * @param[in] actions
5787  *   Associated actions (list terminated by the END action).
5788  * @param[in] flow_split_info
5789  *   Pointer to flow split info structure.
5790  * @param[out] error
5791  *   Perform verbose error reporting if not NULL.
5792  * @return
5793  *   0 on success, negative value otherwise
5794  */
5795 static int
5796 flow_create_split_sample(struct rte_eth_dev *dev,
5797                          struct rte_flow *flow,
5798                          const struct rte_flow_attr *attr,
5799                          const struct rte_flow_item items[],
5800                          const struct rte_flow_action actions[],
5801                          struct mlx5_flow_split_info *flow_split_info,
5802                          struct rte_flow_error *error)
5803 {
5804         struct mlx5_priv *priv = dev->data->dev_private;
5805         struct rte_flow_action *sfx_actions = NULL;
5806         struct rte_flow_action *pre_actions = NULL;
5807         struct rte_flow_item *sfx_items = NULL;
5808         struct mlx5_flow *dev_flow = NULL;
5809         struct rte_flow_attr sfx_attr = *attr;
5810 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
5811         struct mlx5_flow_dv_sample_resource *sample_res;
5812         struct mlx5_flow_tbl_data_entry *sfx_tbl_data;
5813         struct mlx5_flow_tbl_resource *sfx_tbl;
5814 #endif
5815         size_t act_size;
5816         size_t item_size;
5817         uint32_t fdb_tx = 0;
5818         int32_t tag_id = 0;
5819         int actions_n = 0;
5820         int sample_action_pos;
5821         int qrss_action_pos;
5822         int add_tag = 0;
5823         int modify_after_mirror = 0;
5824         uint16_t jump_table = 0;
5825         const uint32_t next_ft_step = 1;
5826         int ret = 0;
5827
5828         if (priv->sampler_en)
5829                 actions_n = flow_check_match_action(actions, attr,
5830                                         RTE_FLOW_ACTION_TYPE_SAMPLE,
5831                                         &sample_action_pos, &qrss_action_pos,
5832                                         &modify_after_mirror);
5833         if (actions_n) {
5834                 /* The prefix actions must includes sample, tag, end. */
5835                 act_size = sizeof(struct rte_flow_action) * (actions_n * 2 + 1)
5836                            + sizeof(struct mlx5_rte_flow_action_set_tag);
5837                 item_size = sizeof(struct rte_flow_item) * SAMPLE_SUFFIX_ITEM +
5838                             sizeof(struct mlx5_rte_flow_item_tag) * 2;
5839                 sfx_actions = mlx5_malloc(MLX5_MEM_ZERO, (act_size +
5840                                           item_size), 0, SOCKET_ID_ANY);
5841                 if (!sfx_actions)
5842                         return rte_flow_error_set(error, ENOMEM,
5843                                                   RTE_FLOW_ERROR_TYPE_ACTION,
5844                                                   NULL, "no memory to split "
5845                                                   "sample flow");
5846                 /* The representor_id is -1 for uplink. */
5847                 fdb_tx = (attr->transfer && priv->representor_id != -1);
5848                 /*
5849                  * When reg_c_preserve is set, metadata registers Cx preserve
5850                  * their value even through packet duplication.
5851                  */
5852                 add_tag = (!fdb_tx || priv->config.hca_attr.reg_c_preserve);
5853                 if (add_tag)
5854                         sfx_items = (struct rte_flow_item *)((char *)sfx_actions
5855                                         + act_size);
5856                 if (modify_after_mirror)
5857                         jump_table = attr->group * MLX5_FLOW_TABLE_FACTOR +
5858                                      next_ft_step;
5859                 pre_actions = sfx_actions + actions_n;
5860                 tag_id = flow_sample_split_prep(dev, add_tag, sfx_items,
5861                                                 actions, sfx_actions,
5862                                                 pre_actions, actions_n,
5863                                                 sample_action_pos,
5864                                                 qrss_action_pos, jump_table,
5865                                                 error);
5866                 if (tag_id < 0 || (add_tag && !tag_id)) {
5867                         ret = -rte_errno;
5868                         goto exit;
5869                 }
5870                 if (modify_after_mirror)
5871                         flow_split_info->skip_scale =
5872                                         1 << MLX5_SCALE_JUMP_FLOW_GROUP_BIT;
5873                 /* Add the prefix subflow. */
5874                 ret = flow_create_split_inner(dev, flow, &dev_flow, attr,
5875                                               items, pre_actions,
5876                                               flow_split_info, error);
5877                 if (ret) {
5878                         ret = -rte_errno;
5879                         goto exit;
5880                 }
5881                 dev_flow->handle->split_flow_id = tag_id;
5882 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
5883                 if (!modify_after_mirror) {
5884                         /* Set the sfx group attr. */
5885                         sample_res = (struct mlx5_flow_dv_sample_resource *)
5886                                                 dev_flow->dv.sample_res;
5887                         sfx_tbl = (struct mlx5_flow_tbl_resource *)
5888                                                 sample_res->normal_path_tbl;
5889                         sfx_tbl_data = container_of(sfx_tbl,
5890                                                 struct mlx5_flow_tbl_data_entry,
5891                                                 tbl);
5892                         sfx_attr.group = sfx_attr.transfer ?
5893                         (sfx_tbl_data->level - 1) : sfx_tbl_data->level;
5894                 } else {
5895                         MLX5_ASSERT(attr->transfer);
5896                         sfx_attr.group = jump_table;
5897                 }
5898                 flow_split_info->prefix_layers =
5899                                 flow_get_prefix_layer_flags(dev_flow);
5900                 flow_split_info->prefix_mark = dev_flow->handle->mark;
5901                 /* Suffix group level already be scaled with factor, set
5902                  * MLX5_SCALE_FLOW_GROUP_BIT of skip_scale to 1 to avoid scale
5903                  * again in translation.
5904                  */
5905                 flow_split_info->skip_scale = 1 << MLX5_SCALE_FLOW_GROUP_BIT;
5906 #endif
5907         }
5908         /* Add the suffix subflow. */
5909         ret = flow_create_split_meter(dev, flow, &sfx_attr,
5910                                       sfx_items ? sfx_items : items,
5911                                       sfx_actions ? sfx_actions : actions,
5912                                       flow_split_info, error);
5913 exit:
5914         if (sfx_actions)
5915                 mlx5_free(sfx_actions);
5916         return ret;
5917 }
5918
5919 /**
5920  * Split the flow to subflow set. The splitters might be linked
5921  * in the chain, like this:
5922  * flow_create_split_outer() calls:
5923  *   flow_create_split_meter() calls:
5924  *     flow_create_split_metadata(meter_subflow_0) calls:
5925  *       flow_create_split_inner(metadata_subflow_0)
5926  *       flow_create_split_inner(metadata_subflow_1)
5927  *       flow_create_split_inner(metadata_subflow_2)
5928  *     flow_create_split_metadata(meter_subflow_1) calls:
5929  *       flow_create_split_inner(metadata_subflow_0)
5930  *       flow_create_split_inner(metadata_subflow_1)
5931  *       flow_create_split_inner(metadata_subflow_2)
5932  *
5933  * This provide flexible way to add new levels of flow splitting.
5934  * The all of successfully created subflows are included to the
5935  * parent flow dev_flow list.
5936  *
5937  * @param dev
5938  *   Pointer to Ethernet device.
5939  * @param[in] flow
5940  *   Parent flow structure pointer.
5941  * @param[in] attr
5942  *   Flow rule attributes.
5943  * @param[in] items
5944  *   Pattern specification (list terminated by the END pattern item).
5945  * @param[in] actions
5946  *   Associated actions (list terminated by the END action).
5947  * @param[in] flow_split_info
5948  *   Pointer to flow split info structure.
5949  * @param[out] error
5950  *   Perform verbose error reporting if not NULL.
5951  * @return
5952  *   0 on success, negative value otherwise
5953  */
5954 static int
5955 flow_create_split_outer(struct rte_eth_dev *dev,
5956                         struct rte_flow *flow,
5957                         const struct rte_flow_attr *attr,
5958                         const struct rte_flow_item items[],
5959                         const struct rte_flow_action actions[],
5960                         struct mlx5_flow_split_info *flow_split_info,
5961                         struct rte_flow_error *error)
5962 {
5963         int ret;
5964
5965         ret = flow_create_split_sample(dev, flow, attr, items,
5966                                        actions, flow_split_info, error);
5967         MLX5_ASSERT(ret <= 0);
5968         return ret;
5969 }
5970
5971 static struct mlx5_flow_tunnel *
5972 flow_tunnel_from_rule(struct rte_eth_dev *dev,
5973                       const struct rte_flow_attr *attr,
5974                       const struct rte_flow_item items[],
5975                       const struct rte_flow_action actions[])
5976 {
5977         struct mlx5_flow_tunnel *tunnel;
5978
5979 #pragma GCC diagnostic push
5980 #pragma GCC diagnostic ignored "-Wcast-qual"
5981         if (is_flow_tunnel_match_rule(dev, attr, items, actions))
5982                 tunnel = (struct mlx5_flow_tunnel *)items[0].spec;
5983         else if (is_flow_tunnel_steer_rule(dev, attr, items, actions))
5984                 tunnel = (struct mlx5_flow_tunnel *)actions[0].conf;
5985         else
5986                 tunnel = NULL;
5987 #pragma GCC diagnostic pop
5988
5989         return tunnel;
5990 }
5991
5992 /**
5993  * Adjust flow RSS workspace if needed.
5994  *
5995  * @param wks
5996  *   Pointer to thread flow work space.
5997  * @param rss_desc
5998  *   Pointer to RSS descriptor.
5999  * @param[in] nrssq_num
6000  *   New RSS queue number.
6001  *
6002  * @return
6003  *   0 on success, -1 otherwise and rte_errno is set.
6004  */
6005 static int
6006 flow_rss_workspace_adjust(struct mlx5_flow_workspace *wks,
6007                           struct mlx5_flow_rss_desc *rss_desc,
6008                           uint32_t nrssq_num)
6009 {
6010         if (likely(nrssq_num <= wks->rssq_num))
6011                 return 0;
6012         rss_desc->queue = realloc(rss_desc->queue,
6013                           sizeof(*rss_desc->queue) * RTE_ALIGN(nrssq_num, 2));
6014         if (!rss_desc->queue) {
6015                 rte_errno = ENOMEM;
6016                 return -1;
6017         }
6018         wks->rssq_num = RTE_ALIGN(nrssq_num, 2);
6019         return 0;
6020 }
6021
6022 /**
6023  * Create a flow and add it to @p list.
6024  *
6025  * @param dev
6026  *   Pointer to Ethernet device.
6027  * @param list
6028  *   Pointer to a TAILQ flow list. If this parameter NULL,
6029  *   no list insertion occurred, flow is just created,
6030  *   this is caller's responsibility to track the
6031  *   created flow.
6032  * @param[in] attr
6033  *   Flow rule attributes.
6034  * @param[in] items
6035  *   Pattern specification (list terminated by the END pattern item).
6036  * @param[in] actions
6037  *   Associated actions (list terminated by the END action).
6038  * @param[in] external
6039  *   This flow rule is created by request external to PMD.
6040  * @param[out] error
6041  *   Perform verbose error reporting if not NULL.
6042  *
6043  * @return
6044  *   A flow index on success, 0 otherwise and rte_errno is set.
6045  */
6046 static uint32_t
6047 flow_list_create(struct rte_eth_dev *dev, uint32_t *list,
6048                  const struct rte_flow_attr *attr,
6049                  const struct rte_flow_item items[],
6050                  const struct rte_flow_action original_actions[],
6051                  bool external, struct rte_flow_error *error)
6052 {
6053         struct mlx5_priv *priv = dev->data->dev_private;
6054         struct rte_flow *flow = NULL;
6055         struct mlx5_flow *dev_flow;
6056         const struct rte_flow_action_rss *rss = NULL;
6057         struct mlx5_translated_action_handle
6058                 indir_actions[MLX5_MAX_INDIRECT_ACTIONS];
6059         int indir_actions_n = MLX5_MAX_INDIRECT_ACTIONS;
6060         union {
6061                 struct mlx5_flow_expand_rss buf;
6062                 uint8_t buffer[2048];
6063         } expand_buffer;
6064         union {
6065                 struct rte_flow_action actions[MLX5_MAX_SPLIT_ACTIONS];
6066                 uint8_t buffer[2048];
6067         } actions_rx;
6068         union {
6069                 struct rte_flow_action actions[MLX5_MAX_SPLIT_ACTIONS];
6070                 uint8_t buffer[2048];
6071         } actions_hairpin_tx;
6072         union {
6073                 struct rte_flow_item items[MLX5_MAX_SPLIT_ITEMS];
6074                 uint8_t buffer[2048];
6075         } items_tx;
6076         struct mlx5_flow_expand_rss *buf = &expand_buffer.buf;
6077         struct mlx5_flow_rss_desc *rss_desc;
6078         const struct rte_flow_action *p_actions_rx;
6079         uint32_t i;
6080         uint32_t idx = 0;
6081         int hairpin_flow;
6082         struct rte_flow_attr attr_tx = { .priority = 0 };
6083         const struct rte_flow_action *actions;
6084         struct rte_flow_action *translated_actions = NULL;
6085         struct mlx5_flow_tunnel *tunnel;
6086         struct tunnel_default_miss_ctx default_miss_ctx = { 0, };
6087         struct mlx5_flow_workspace *wks = mlx5_flow_push_thread_workspace();
6088         struct mlx5_flow_split_info flow_split_info = {
6089                 .external = !!external,
6090                 .skip_scale = 0,
6091                 .flow_idx = 0,
6092                 .prefix_mark = 0,
6093                 .prefix_layers = 0,
6094                 .table_id = 0
6095         };
6096         int ret;
6097
6098         MLX5_ASSERT(wks);
6099         rss_desc = &wks->rss_desc;
6100         ret = flow_action_handles_translate(dev, original_actions,
6101                                             indir_actions,
6102                                             &indir_actions_n,
6103                                             &translated_actions, error);
6104         if (ret < 0) {
6105                 MLX5_ASSERT(translated_actions == NULL);
6106                 return 0;
6107         }
6108         actions = translated_actions ? translated_actions : original_actions;
6109         p_actions_rx = actions;
6110         hairpin_flow = flow_check_hairpin_split(dev, attr, actions);
6111         ret = flow_drv_validate(dev, attr, items, p_actions_rx,
6112                                 external, hairpin_flow, error);
6113         if (ret < 0)
6114                 goto error_before_hairpin_split;
6115         flow = mlx5_ipool_zmalloc(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], &idx);
6116         if (!flow) {
6117                 rte_errno = ENOMEM;
6118                 goto error_before_hairpin_split;
6119         }
6120         if (hairpin_flow > 0) {
6121                 if (hairpin_flow > MLX5_MAX_SPLIT_ACTIONS) {
6122                         rte_errno = EINVAL;
6123                         goto error_before_hairpin_split;
6124                 }
6125                 flow_hairpin_split(dev, actions, actions_rx.actions,
6126                                    actions_hairpin_tx.actions, items_tx.items,
6127                                    idx);
6128                 p_actions_rx = actions_rx.actions;
6129         }
6130         flow_split_info.flow_idx = idx;
6131         flow->drv_type = flow_get_drv_type(dev, attr);
6132         MLX5_ASSERT(flow->drv_type > MLX5_FLOW_TYPE_MIN &&
6133                     flow->drv_type < MLX5_FLOW_TYPE_MAX);
6134         memset(rss_desc, 0, offsetof(struct mlx5_flow_rss_desc, queue));
6135         /* RSS Action only works on NIC RX domain */
6136         if (attr->ingress && !attr->transfer)
6137                 rss = flow_get_rss_action(dev, p_actions_rx);
6138         if (rss) {
6139                 if (flow_rss_workspace_adjust(wks, rss_desc, rss->queue_num))
6140                         return 0;
6141                 /*
6142                  * The following information is required by
6143                  * mlx5_flow_hashfields_adjust() in advance.
6144                  */
6145                 rss_desc->level = rss->level;
6146                 /* RSS type 0 indicates default RSS type (ETH_RSS_IP). */
6147                 rss_desc->types = !rss->types ? ETH_RSS_IP : rss->types;
6148         }
6149         flow->dev_handles = 0;
6150         if (rss && rss->types) {
6151                 unsigned int graph_root;
6152
6153                 graph_root = find_graph_root(items, rss->level);
6154                 ret = mlx5_flow_expand_rss(buf, sizeof(expand_buffer.buffer),
6155                                            items, rss->types,
6156                                            mlx5_support_expansion, graph_root);
6157                 MLX5_ASSERT(ret > 0 &&
6158                        (unsigned int)ret < sizeof(expand_buffer.buffer));
6159         } else {
6160                 buf->entries = 1;
6161                 buf->entry[0].pattern = (void *)(uintptr_t)items;
6162         }
6163         rss_desc->shared_rss = flow_get_shared_rss_action(dev, indir_actions,
6164                                                       indir_actions_n);
6165         for (i = 0; i < buf->entries; ++i) {
6166                 /* Initialize flow split data. */
6167                 flow_split_info.prefix_layers = 0;
6168                 flow_split_info.prefix_mark = 0;
6169                 flow_split_info.skip_scale = 0;
6170                 /*
6171                  * The splitter may create multiple dev_flows,
6172                  * depending on configuration. In the simplest
6173                  * case it just creates unmodified original flow.
6174                  */
6175                 ret = flow_create_split_outer(dev, flow, attr,
6176                                               buf->entry[i].pattern,
6177                                               p_actions_rx, &flow_split_info,
6178                                               error);
6179                 if (ret < 0)
6180                         goto error;
6181                 if (is_flow_tunnel_steer_rule(dev, attr,
6182                                               buf->entry[i].pattern,
6183                                               p_actions_rx)) {
6184                         ret = flow_tunnel_add_default_miss(dev, flow, attr,
6185                                                            p_actions_rx,
6186                                                            idx,
6187                                                            &default_miss_ctx,
6188                                                            error);
6189                         if (ret < 0) {
6190                                 mlx5_free(default_miss_ctx.queue);
6191                                 goto error;
6192                         }
6193                 }
6194         }
6195         /* Create the tx flow. */
6196         if (hairpin_flow) {
6197                 attr_tx.group = MLX5_HAIRPIN_TX_TABLE;
6198                 attr_tx.ingress = 0;
6199                 attr_tx.egress = 1;
6200                 dev_flow = flow_drv_prepare(dev, flow, &attr_tx, items_tx.items,
6201                                          actions_hairpin_tx.actions,
6202                                          idx, error);
6203                 if (!dev_flow)
6204                         goto error;
6205                 dev_flow->flow = flow;
6206                 dev_flow->external = 0;
6207                 SILIST_INSERT(&flow->dev_handles, dev_flow->handle_idx,
6208                               dev_flow->handle, next);
6209                 ret = flow_drv_translate(dev, dev_flow, &attr_tx,
6210                                          items_tx.items,
6211                                          actions_hairpin_tx.actions, error);
6212                 if (ret < 0)
6213                         goto error;
6214         }
6215         /*
6216          * Update the metadata register copy table. If extensive
6217          * metadata feature is enabled and registers are supported
6218          * we might create the extra rte_flow for each unique
6219          * MARK/FLAG action ID.
6220          *
6221          * The table is updated for ingress Flows only, because
6222          * the egress Flows belong to the different device and
6223          * copy table should be updated in peer NIC Rx domain.
6224          */
6225         if (attr->ingress &&
6226             (external || attr->group != MLX5_FLOW_MREG_CP_TABLE_GROUP)) {
6227                 ret = flow_mreg_update_copy_table(dev, flow, actions, error);
6228                 if (ret)
6229                         goto error;
6230         }
6231         /*
6232          * If the flow is external (from application) OR device is started,
6233          * OR mreg discover, then apply immediately.
6234          */
6235         if (external || dev->data->dev_started ||
6236             (attr->group == MLX5_FLOW_MREG_CP_TABLE_GROUP &&
6237              attr->priority == MLX5_FLOW_LOWEST_PRIO_INDICATOR)) {
6238                 ret = flow_drv_apply(dev, flow, error);
6239                 if (ret < 0)
6240                         goto error;
6241         }
6242         if (list) {
6243                 rte_spinlock_lock(&priv->flow_list_lock);
6244                 ILIST_INSERT(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], list, idx,
6245                              flow, next);
6246                 rte_spinlock_unlock(&priv->flow_list_lock);
6247         }
6248         flow_rxq_flags_set(dev, flow);
6249         rte_free(translated_actions);
6250         tunnel = flow_tunnel_from_rule(dev, attr, items, actions);
6251         if (tunnel) {
6252                 flow->tunnel = 1;
6253                 flow->tunnel_id = tunnel->tunnel_id;
6254                 __atomic_add_fetch(&tunnel->refctn, 1, __ATOMIC_RELAXED);
6255                 mlx5_free(default_miss_ctx.queue);
6256         }
6257         mlx5_flow_pop_thread_workspace();
6258         return idx;
6259 error:
6260         MLX5_ASSERT(flow);
6261         ret = rte_errno; /* Save rte_errno before cleanup. */
6262         flow_mreg_del_copy_action(dev, flow);
6263         flow_drv_destroy(dev, flow);
6264         if (rss_desc->shared_rss)
6265                 __atomic_sub_fetch(&((struct mlx5_shared_action_rss *)
6266                         mlx5_ipool_get
6267                         (priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS],
6268                         rss_desc->shared_rss))->refcnt, 1, __ATOMIC_RELAXED);
6269         mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], idx);
6270         rte_errno = ret; /* Restore rte_errno. */
6271         ret = rte_errno;
6272         rte_errno = ret;
6273         mlx5_flow_pop_thread_workspace();
6274 error_before_hairpin_split:
6275         rte_free(translated_actions);
6276         return 0;
6277 }
6278
6279 /**
6280  * Create a dedicated flow rule on e-switch table 0 (root table), to direct all
6281  * incoming packets to table 1.
6282  *
6283  * Other flow rules, requested for group n, will be created in
6284  * e-switch table n+1.
6285  * Jump action to e-switch group n will be created to group n+1.
6286  *
6287  * Used when working in switchdev mode, to utilise advantages of table 1
6288  * and above.
6289  *
6290  * @param dev
6291  *   Pointer to Ethernet device.
6292  *
6293  * @return
6294  *   Pointer to flow on success, NULL otherwise and rte_errno is set.
6295  */
6296 struct rte_flow *
6297 mlx5_flow_create_esw_table_zero_flow(struct rte_eth_dev *dev)
6298 {
6299         const struct rte_flow_attr attr = {
6300                 .group = 0,
6301                 .priority = 0,
6302                 .ingress = 1,
6303                 .egress = 0,
6304                 .transfer = 1,
6305         };
6306         const struct rte_flow_item pattern = {
6307                 .type = RTE_FLOW_ITEM_TYPE_END,
6308         };
6309         struct rte_flow_action_jump jump = {
6310                 .group = 1,
6311         };
6312         const struct rte_flow_action actions[] = {
6313                 {
6314                         .type = RTE_FLOW_ACTION_TYPE_JUMP,
6315                         .conf = &jump,
6316                 },
6317                 {
6318                         .type = RTE_FLOW_ACTION_TYPE_END,
6319                 },
6320         };
6321         struct mlx5_priv *priv = dev->data->dev_private;
6322         struct rte_flow_error error;
6323
6324         return (void *)(uintptr_t)flow_list_create(dev, &priv->ctrl_flows,
6325                                                    &attr, &pattern,
6326                                                    actions, false, &error);
6327 }
6328
6329 /**
6330  * Validate a flow supported by the NIC.
6331  *
6332  * @see rte_flow_validate()
6333  * @see rte_flow_ops
6334  */
6335 int
6336 mlx5_flow_validate(struct rte_eth_dev *dev,
6337                    const struct rte_flow_attr *attr,
6338                    const struct rte_flow_item items[],
6339                    const struct rte_flow_action original_actions[],
6340                    struct rte_flow_error *error)
6341 {
6342         int hairpin_flow;
6343         struct mlx5_translated_action_handle
6344                 indir_actions[MLX5_MAX_INDIRECT_ACTIONS];
6345         int indir_actions_n = MLX5_MAX_INDIRECT_ACTIONS;
6346         const struct rte_flow_action *actions;
6347         struct rte_flow_action *translated_actions = NULL;
6348         int ret = flow_action_handles_translate(dev, original_actions,
6349                                                 indir_actions,
6350                                                 &indir_actions_n,
6351                                                 &translated_actions, error);
6352
6353         if (ret)
6354                 return ret;
6355         actions = translated_actions ? translated_actions : original_actions;
6356         hairpin_flow = flow_check_hairpin_split(dev, attr, actions);
6357         ret = flow_drv_validate(dev, attr, items, actions,
6358                                 true, hairpin_flow, error);
6359         rte_free(translated_actions);
6360         return ret;
6361 }
6362
6363 /**
6364  * Create a flow.
6365  *
6366  * @see rte_flow_create()
6367  * @see rte_flow_ops
6368  */
6369 struct rte_flow *
6370 mlx5_flow_create(struct rte_eth_dev *dev,
6371                  const struct rte_flow_attr *attr,
6372                  const struct rte_flow_item items[],
6373                  const struct rte_flow_action actions[],
6374                  struct rte_flow_error *error)
6375 {
6376         struct mlx5_priv *priv = dev->data->dev_private;
6377
6378         /*
6379          * If the device is not started yet, it is not allowed to created a
6380          * flow from application. PMD default flows and traffic control flows
6381          * are not affected.
6382          */
6383         if (unlikely(!dev->data->dev_started)) {
6384                 DRV_LOG(DEBUG, "port %u is not started when "
6385                         "inserting a flow", dev->data->port_id);
6386                 rte_flow_error_set(error, ENODEV,
6387                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
6388                                    NULL,
6389                                    "port not started");
6390                 return NULL;
6391         }
6392
6393         return (void *)(uintptr_t)flow_list_create(dev, &priv->flows,
6394                                   attr, items, actions, true, error);
6395 }
6396
6397 /**
6398  * Destroy a flow in a list.
6399  *
6400  * @param dev
6401  *   Pointer to Ethernet device.
6402  * @param list
6403  *   Pointer to the Indexed flow list. If this parameter NULL,
6404  *   there is no flow removal from the list. Be noted that as
6405  *   flow is add to the indexed list, memory of the indexed
6406  *   list points to maybe changed as flow destroyed.
6407  * @param[in] flow_idx
6408  *   Index of flow to destroy.
6409  */
6410 static void
6411 flow_list_destroy(struct rte_eth_dev *dev, uint32_t *list,
6412                   uint32_t flow_idx)
6413 {
6414         struct mlx5_priv *priv = dev->data->dev_private;
6415         struct rte_flow *flow = mlx5_ipool_get(priv->sh->ipool
6416                                                [MLX5_IPOOL_RTE_FLOW], flow_idx);
6417
6418         if (!flow)
6419                 return;
6420         /*
6421          * Update RX queue flags only if port is started, otherwise it is
6422          * already clean.
6423          */
6424         if (dev->data->dev_started)
6425                 flow_rxq_flags_trim(dev, flow);
6426         flow_drv_destroy(dev, flow);
6427         if (list) {
6428                 rte_spinlock_lock(&priv->flow_list_lock);
6429                 ILIST_REMOVE(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], list,
6430                              flow_idx, flow, next);
6431                 rte_spinlock_unlock(&priv->flow_list_lock);
6432         }
6433         if (flow->tunnel) {
6434                 struct mlx5_flow_tunnel *tunnel;
6435
6436                 tunnel = mlx5_find_tunnel_id(dev, flow->tunnel_id);
6437                 RTE_VERIFY(tunnel);
6438                 if (!__atomic_sub_fetch(&tunnel->refctn, 1, __ATOMIC_RELAXED))
6439                         mlx5_flow_tunnel_free(dev, tunnel);
6440         }
6441         flow_mreg_del_copy_action(dev, flow);
6442         mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], flow_idx);
6443 }
6444
6445 /**
6446  * Destroy all flows.
6447  *
6448  * @param dev
6449  *   Pointer to Ethernet device.
6450  * @param list
6451  *   Pointer to the Indexed flow list.
6452  * @param active
6453  *   If flushing is called avtively.
6454  */
6455 void
6456 mlx5_flow_list_flush(struct rte_eth_dev *dev, uint32_t *list, bool active)
6457 {
6458         uint32_t num_flushed = 0;
6459
6460         while (*list) {
6461                 flow_list_destroy(dev, list, *list);
6462                 num_flushed++;
6463         }
6464         if (active) {
6465                 DRV_LOG(INFO, "port %u: %u flows flushed before stopping",
6466                         dev->data->port_id, num_flushed);
6467         }
6468 }
6469
6470 /**
6471  * Stop all default actions for flows.
6472  *
6473  * @param dev
6474  *   Pointer to Ethernet device.
6475  */
6476 void
6477 mlx5_flow_stop_default(struct rte_eth_dev *dev)
6478 {
6479         flow_mreg_del_default_copy_action(dev);
6480         flow_rxq_flags_clear(dev);
6481 }
6482
6483 /**
6484  * Start all default actions for flows.
6485  *
6486  * @param dev
6487  *   Pointer to Ethernet device.
6488  * @return
6489  *   0 on success, a negative errno value otherwise and rte_errno is set.
6490  */
6491 int
6492 mlx5_flow_start_default(struct rte_eth_dev *dev)
6493 {
6494         struct rte_flow_error error;
6495
6496         /* Make sure default copy action (reg_c[0] -> reg_b) is created. */
6497         return flow_mreg_add_default_copy_action(dev, &error);
6498 }
6499
6500 /**
6501  * Release key of thread specific flow workspace data.
6502  */
6503 void
6504 flow_release_workspace(void *data)
6505 {
6506         struct mlx5_flow_workspace *wks = data;
6507         struct mlx5_flow_workspace *next;
6508
6509         while (wks) {
6510                 next = wks->next;
6511                 free(wks->rss_desc.queue);
6512                 free(wks);
6513                 wks = next;
6514         }
6515 }
6516
6517 /**
6518  * Get thread specific current flow workspace.
6519  *
6520  * @return pointer to thread specific flow workspace data, NULL on error.
6521  */
6522 struct mlx5_flow_workspace*
6523 mlx5_flow_get_thread_workspace(void)
6524 {
6525         struct mlx5_flow_workspace *data;
6526
6527         data = mlx5_flow_os_get_specific_workspace();
6528         MLX5_ASSERT(data && data->inuse);
6529         if (!data || !data->inuse)
6530                 DRV_LOG(ERR, "flow workspace not initialized.");
6531         return data;
6532 }
6533
6534 /**
6535  * Allocate and init new flow workspace.
6536  *
6537  * @return pointer to flow workspace data, NULL on error.
6538  */
6539 static struct mlx5_flow_workspace*
6540 flow_alloc_thread_workspace(void)
6541 {
6542         struct mlx5_flow_workspace *data = calloc(1, sizeof(*data));
6543
6544         if (!data) {
6545                 DRV_LOG(ERR, "Failed to allocate flow workspace "
6546                         "memory.");
6547                 return NULL;
6548         }
6549         data->rss_desc.queue = calloc(1,
6550                         sizeof(uint16_t) * MLX5_RSSQ_DEFAULT_NUM);
6551         if (!data->rss_desc.queue)
6552                 goto err;
6553         data->rssq_num = MLX5_RSSQ_DEFAULT_NUM;
6554         return data;
6555 err:
6556         if (data->rss_desc.queue)
6557                 free(data->rss_desc.queue);
6558         free(data);
6559         return NULL;
6560 }
6561
6562 /**
6563  * Get new thread specific flow workspace.
6564  *
6565  * If current workspace inuse, create new one and set as current.
6566  *
6567  * @return pointer to thread specific flow workspace data, NULL on error.
6568  */
6569 static struct mlx5_flow_workspace*
6570 mlx5_flow_push_thread_workspace(void)
6571 {
6572         struct mlx5_flow_workspace *curr;
6573         struct mlx5_flow_workspace *data;
6574
6575         curr = mlx5_flow_os_get_specific_workspace();
6576         if (!curr) {
6577                 data = flow_alloc_thread_workspace();
6578                 if (!data)
6579                         return NULL;
6580         } else if (!curr->inuse) {
6581                 data = curr;
6582         } else if (curr->next) {
6583                 data = curr->next;
6584         } else {
6585                 data = flow_alloc_thread_workspace();
6586                 if (!data)
6587                         return NULL;
6588                 curr->next = data;
6589                 data->prev = curr;
6590         }
6591         data->inuse = 1;
6592         data->flow_idx = 0;
6593         /* Set as current workspace */
6594         if (mlx5_flow_os_set_specific_workspace(data))
6595                 DRV_LOG(ERR, "Failed to set flow workspace to thread.");
6596         return data;
6597 }
6598
6599 /**
6600  * Close current thread specific flow workspace.
6601  *
6602  * If previous workspace available, set it as current.
6603  *
6604  * @return pointer to thread specific flow workspace data, NULL on error.
6605  */
6606 static void
6607 mlx5_flow_pop_thread_workspace(void)
6608 {
6609         struct mlx5_flow_workspace *data = mlx5_flow_get_thread_workspace();
6610
6611         if (!data)
6612                 return;
6613         if (!data->inuse) {
6614                 DRV_LOG(ERR, "Failed to close unused flow workspace.");
6615                 return;
6616         }
6617         data->inuse = 0;
6618         if (!data->prev)
6619                 return;
6620         if (mlx5_flow_os_set_specific_workspace(data->prev))
6621                 DRV_LOG(ERR, "Failed to set flow workspace to thread.");
6622 }
6623
6624 /**
6625  * Verify the flow list is empty
6626  *
6627  * @param dev
6628  *  Pointer to Ethernet device.
6629  *
6630  * @return the number of flows not released.
6631  */
6632 int
6633 mlx5_flow_verify(struct rte_eth_dev *dev)
6634 {
6635         struct mlx5_priv *priv = dev->data->dev_private;
6636         struct rte_flow *flow;
6637         uint32_t idx;
6638         int ret = 0;
6639
6640         ILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], priv->flows, idx,
6641                       flow, next) {
6642                 DRV_LOG(DEBUG, "port %u flow %p still referenced",
6643                         dev->data->port_id, (void *)flow);
6644                 ++ret;
6645         }
6646         return ret;
6647 }
6648
6649 /**
6650  * Enable default hairpin egress flow.
6651  *
6652  * @param dev
6653  *   Pointer to Ethernet device.
6654  * @param queue
6655  *   The queue index.
6656  *
6657  * @return
6658  *   0 on success, a negative errno value otherwise and rte_errno is set.
6659  */
6660 int
6661 mlx5_ctrl_flow_source_queue(struct rte_eth_dev *dev,
6662                             uint32_t queue)
6663 {
6664         struct mlx5_priv *priv = dev->data->dev_private;
6665         const struct rte_flow_attr attr = {
6666                 .egress = 1,
6667                 .priority = 0,
6668         };
6669         struct mlx5_rte_flow_item_tx_queue queue_spec = {
6670                 .queue = queue,
6671         };
6672         struct mlx5_rte_flow_item_tx_queue queue_mask = {
6673                 .queue = UINT32_MAX,
6674         };
6675         struct rte_flow_item items[] = {
6676                 {
6677                         .type = (enum rte_flow_item_type)
6678                                 MLX5_RTE_FLOW_ITEM_TYPE_TX_QUEUE,
6679                         .spec = &queue_spec,
6680                         .last = NULL,
6681                         .mask = &queue_mask,
6682                 },
6683                 {
6684                         .type = RTE_FLOW_ITEM_TYPE_END,
6685                 },
6686         };
6687         struct rte_flow_action_jump jump = {
6688                 .group = MLX5_HAIRPIN_TX_TABLE,
6689         };
6690         struct rte_flow_action actions[2];
6691         uint32_t flow_idx;
6692         struct rte_flow_error error;
6693
6694         actions[0].type = RTE_FLOW_ACTION_TYPE_JUMP;
6695         actions[0].conf = &jump;
6696         actions[1].type = RTE_FLOW_ACTION_TYPE_END;
6697         flow_idx = flow_list_create(dev, &priv->ctrl_flows,
6698                                 &attr, items, actions, false, &error);
6699         if (!flow_idx) {
6700                 DRV_LOG(DEBUG,
6701                         "Failed to create ctrl flow: rte_errno(%d),"
6702                         " type(%d), message(%s)",
6703                         rte_errno, error.type,
6704                         error.message ? error.message : " (no stated reason)");
6705                 return -rte_errno;
6706         }
6707         return 0;
6708 }
6709
6710 /**
6711  * Enable a control flow configured from the control plane.
6712  *
6713  * @param dev
6714  *   Pointer to Ethernet device.
6715  * @param eth_spec
6716  *   An Ethernet flow spec to apply.
6717  * @param eth_mask
6718  *   An Ethernet flow mask to apply.
6719  * @param vlan_spec
6720  *   A VLAN flow spec to apply.
6721  * @param vlan_mask
6722  *   A VLAN flow mask to apply.
6723  *
6724  * @return
6725  *   0 on success, a negative errno value otherwise and rte_errno is set.
6726  */
6727 int
6728 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
6729                     struct rte_flow_item_eth *eth_spec,
6730                     struct rte_flow_item_eth *eth_mask,
6731                     struct rte_flow_item_vlan *vlan_spec,
6732                     struct rte_flow_item_vlan *vlan_mask)
6733 {
6734         struct mlx5_priv *priv = dev->data->dev_private;
6735         const struct rte_flow_attr attr = {
6736                 .ingress = 1,
6737                 .priority = MLX5_FLOW_LOWEST_PRIO_INDICATOR,
6738         };
6739         struct rte_flow_item items[] = {
6740                 {
6741                         .type = RTE_FLOW_ITEM_TYPE_ETH,
6742                         .spec = eth_spec,
6743                         .last = NULL,
6744                         .mask = eth_mask,
6745                 },
6746                 {
6747                         .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
6748                                               RTE_FLOW_ITEM_TYPE_END,
6749                         .spec = vlan_spec,
6750                         .last = NULL,
6751                         .mask = vlan_mask,
6752                 },
6753                 {
6754                         .type = RTE_FLOW_ITEM_TYPE_END,
6755                 },
6756         };
6757         uint16_t queue[priv->reta_idx_n];
6758         struct rte_flow_action_rss action_rss = {
6759                 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
6760                 .level = 0,
6761                 .types = priv->rss_conf.rss_hf,
6762                 .key_len = priv->rss_conf.rss_key_len,
6763                 .queue_num = priv->reta_idx_n,
6764                 .key = priv->rss_conf.rss_key,
6765                 .queue = queue,
6766         };
6767         struct rte_flow_action actions[] = {
6768                 {
6769                         .type = RTE_FLOW_ACTION_TYPE_RSS,
6770                         .conf = &action_rss,
6771                 },
6772                 {
6773                         .type = RTE_FLOW_ACTION_TYPE_END,
6774                 },
6775         };
6776         uint32_t flow_idx;
6777         struct rte_flow_error error;
6778         unsigned int i;
6779
6780         if (!priv->reta_idx_n || !priv->rxqs_n) {
6781                 return 0;
6782         }
6783         if (!(dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG))
6784                 action_rss.types = 0;
6785         for (i = 0; i != priv->reta_idx_n; ++i)
6786                 queue[i] = (*priv->reta_idx)[i];
6787         flow_idx = flow_list_create(dev, &priv->ctrl_flows,
6788                                 &attr, items, actions, false, &error);
6789         if (!flow_idx)
6790                 return -rte_errno;
6791         return 0;
6792 }
6793
6794 /**
6795  * Enable a flow control configured from the control plane.
6796  *
6797  * @param dev
6798  *   Pointer to Ethernet device.
6799  * @param eth_spec
6800  *   An Ethernet flow spec to apply.
6801  * @param eth_mask
6802  *   An Ethernet flow mask to apply.
6803  *
6804  * @return
6805  *   0 on success, a negative errno value otherwise and rte_errno is set.
6806  */
6807 int
6808 mlx5_ctrl_flow(struct rte_eth_dev *dev,
6809                struct rte_flow_item_eth *eth_spec,
6810                struct rte_flow_item_eth *eth_mask)
6811 {
6812         return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
6813 }
6814
6815 /**
6816  * Create default miss flow rule matching lacp traffic
6817  *
6818  * @param dev
6819  *   Pointer to Ethernet device.
6820  * @param eth_spec
6821  *   An Ethernet flow spec to apply.
6822  *
6823  * @return
6824  *   0 on success, a negative errno value otherwise and rte_errno is set.
6825  */
6826 int
6827 mlx5_flow_lacp_miss(struct rte_eth_dev *dev)
6828 {
6829         struct mlx5_priv *priv = dev->data->dev_private;
6830         /*
6831          * The LACP matching is done by only using ether type since using
6832          * a multicast dst mac causes kernel to give low priority to this flow.
6833          */
6834         static const struct rte_flow_item_eth lacp_spec = {
6835                 .type = RTE_BE16(0x8809),
6836         };
6837         static const struct rte_flow_item_eth lacp_mask = {
6838                 .type = 0xffff,
6839         };
6840         const struct rte_flow_attr attr = {
6841                 .ingress = 1,
6842         };
6843         struct rte_flow_item items[] = {
6844                 {
6845                         .type = RTE_FLOW_ITEM_TYPE_ETH,
6846                         .spec = &lacp_spec,
6847                         .mask = &lacp_mask,
6848                 },
6849                 {
6850                         .type = RTE_FLOW_ITEM_TYPE_END,
6851                 },
6852         };
6853         struct rte_flow_action actions[] = {
6854                 {
6855                         .type = (enum rte_flow_action_type)
6856                                 MLX5_RTE_FLOW_ACTION_TYPE_DEFAULT_MISS,
6857                 },
6858                 {
6859                         .type = RTE_FLOW_ACTION_TYPE_END,
6860                 },
6861         };
6862         struct rte_flow_error error;
6863         uint32_t flow_idx = flow_list_create(dev, &priv->ctrl_flows,
6864                                 &attr, items, actions, false, &error);
6865
6866         if (!flow_idx)
6867                 return -rte_errno;
6868         return 0;
6869 }
6870
6871 /**
6872  * Destroy a flow.
6873  *
6874  * @see rte_flow_destroy()
6875  * @see rte_flow_ops
6876  */
6877 int
6878 mlx5_flow_destroy(struct rte_eth_dev *dev,
6879                   struct rte_flow *flow,
6880                   struct rte_flow_error *error __rte_unused)
6881 {
6882         struct mlx5_priv *priv = dev->data->dev_private;
6883
6884         flow_list_destroy(dev, &priv->flows, (uintptr_t)(void *)flow);
6885         return 0;
6886 }
6887
6888 /**
6889  * Destroy all flows.
6890  *
6891  * @see rte_flow_flush()
6892  * @see rte_flow_ops
6893  */
6894 int
6895 mlx5_flow_flush(struct rte_eth_dev *dev,
6896                 struct rte_flow_error *error __rte_unused)
6897 {
6898         struct mlx5_priv *priv = dev->data->dev_private;
6899
6900         mlx5_flow_list_flush(dev, &priv->flows, false);
6901         return 0;
6902 }
6903
6904 /**
6905  * Isolated mode.
6906  *
6907  * @see rte_flow_isolate()
6908  * @see rte_flow_ops
6909  */
6910 int
6911 mlx5_flow_isolate(struct rte_eth_dev *dev,
6912                   int enable,
6913                   struct rte_flow_error *error)
6914 {
6915         struct mlx5_priv *priv = dev->data->dev_private;
6916
6917         if (dev->data->dev_started) {
6918                 rte_flow_error_set(error, EBUSY,
6919                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
6920                                    NULL,
6921                                    "port must be stopped first");
6922                 return -rte_errno;
6923         }
6924         priv->isolated = !!enable;
6925         if (enable)
6926                 dev->dev_ops = &mlx5_dev_ops_isolate;
6927         else
6928                 dev->dev_ops = &mlx5_dev_ops;
6929
6930         dev->rx_descriptor_status = mlx5_rx_descriptor_status;
6931         dev->tx_descriptor_status = mlx5_tx_descriptor_status;
6932
6933         return 0;
6934 }
6935
6936 /**
6937  * Query a flow.
6938  *
6939  * @see rte_flow_query()
6940  * @see rte_flow_ops
6941  */
6942 static int
6943 flow_drv_query(struct rte_eth_dev *dev,
6944                uint32_t flow_idx,
6945                const struct rte_flow_action *actions,
6946                void *data,
6947                struct rte_flow_error *error)
6948 {
6949         struct mlx5_priv *priv = dev->data->dev_private;
6950         const struct mlx5_flow_driver_ops *fops;
6951         struct rte_flow *flow = mlx5_ipool_get(priv->sh->ipool
6952                                                [MLX5_IPOOL_RTE_FLOW],
6953                                                flow_idx);
6954         enum mlx5_flow_drv_type ftype;
6955
6956         if (!flow) {
6957                 return rte_flow_error_set(error, ENOENT,
6958                           RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
6959                           NULL,
6960                           "invalid flow handle");
6961         }
6962         ftype = flow->drv_type;
6963         MLX5_ASSERT(ftype > MLX5_FLOW_TYPE_MIN && ftype < MLX5_FLOW_TYPE_MAX);
6964         fops = flow_get_drv_ops(ftype);
6965
6966         return fops->query(dev, flow, actions, data, error);
6967 }
6968
6969 /**
6970  * Query a flow.
6971  *
6972  * @see rte_flow_query()
6973  * @see rte_flow_ops
6974  */
6975 int
6976 mlx5_flow_query(struct rte_eth_dev *dev,
6977                 struct rte_flow *flow,
6978                 const struct rte_flow_action *actions,
6979                 void *data,
6980                 struct rte_flow_error *error)
6981 {
6982         int ret;
6983
6984         ret = flow_drv_query(dev, (uintptr_t)(void *)flow, actions, data,
6985                              error);
6986         if (ret < 0)
6987                 return ret;
6988         return 0;
6989 }
6990
6991 /**
6992  * Get rte_flow callbacks.
6993  *
6994  * @param dev
6995  *   Pointer to Ethernet device structure.
6996  * @param ops
6997  *   Pointer to operation-specific structure.
6998  *
6999  * @return 0
7000  */
7001 int
7002 mlx5_flow_ops_get(struct rte_eth_dev *dev __rte_unused,
7003                   const struct rte_flow_ops **ops)
7004 {
7005         *ops = &mlx5_flow_ops;
7006         return 0;
7007 }
7008
7009 /**
7010  * Validate meter policy actions.
7011  * Dispatcher for action type specific validation.
7012  *
7013  * @param[in] dev
7014  *   Pointer to the Ethernet device structure.
7015  * @param[in] action
7016  *   The meter policy action object to validate.
7017  * @param[in] attr
7018  *   Attributes of flow to determine steering domain.
7019  * @param[out] is_rss
7020  *   Is RSS or not.
7021  * @param[out] domain_bitmap
7022  *   Domain bitmap.
7023  * @param[out] is_def_policy
7024  *   Is default policy or not.
7025  * @param[out] error
7026  *   Perform verbose error reporting if not NULL. Initialized in case of
7027  *   error only.
7028  *
7029  * @return
7030  *   0 on success, otherwise negative errno value.
7031  */
7032 int
7033 mlx5_flow_validate_mtr_acts(struct rte_eth_dev *dev,
7034                         const struct rte_flow_action *actions[RTE_COLORS],
7035                         struct rte_flow_attr *attr,
7036                         bool *is_rss,
7037                         uint8_t *domain_bitmap,
7038                         bool *is_def_policy,
7039                         struct rte_mtr_error *error)
7040 {
7041         const struct mlx5_flow_driver_ops *fops;
7042
7043         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7044         return fops->validate_mtr_acts(dev, actions, attr,
7045                         is_rss, domain_bitmap, is_def_policy, error);
7046 }
7047
7048 /**
7049  * Destroy the meter table set.
7050  *
7051  * @param[in] dev
7052  *   Pointer to Ethernet device.
7053  * @param[in] mtr_policy
7054  *   Meter policy struct.
7055  */
7056 void
7057 mlx5_flow_destroy_mtr_acts(struct rte_eth_dev *dev,
7058                       struct mlx5_flow_meter_policy *mtr_policy)
7059 {
7060         const struct mlx5_flow_driver_ops *fops;
7061
7062         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7063         fops->destroy_mtr_acts(dev, mtr_policy);
7064 }
7065
7066 /**
7067  * Create policy action, lock free,
7068  * (mutex should be acquired by caller).
7069  * Dispatcher for action type specific call.
7070  *
7071  * @param[in] dev
7072  *   Pointer to the Ethernet device structure.
7073  * @param[in] mtr_policy
7074  *   Meter policy struct.
7075  * @param[in] action
7076  *   Action specification used to create meter actions.
7077  * @param[out] error
7078  *   Perform verbose error reporting if not NULL. Initialized in case of
7079  *   error only.
7080  *
7081  * @return
7082  *   0 on success, otherwise negative errno value.
7083  */
7084 int
7085 mlx5_flow_create_mtr_acts(struct rte_eth_dev *dev,
7086                       struct mlx5_flow_meter_policy *mtr_policy,
7087                       const struct rte_flow_action *actions[RTE_COLORS],
7088                       struct rte_mtr_error *error)
7089 {
7090         const struct mlx5_flow_driver_ops *fops;
7091
7092         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7093         return fops->create_mtr_acts(dev, mtr_policy, actions, error);
7094 }
7095
7096 /**
7097  * Create policy rules, lock free,
7098  * (mutex should be acquired by caller).
7099  * Dispatcher for action type specific call.
7100  *
7101  * @param[in] dev
7102  *   Pointer to the Ethernet device structure.
7103  * @param[in] mtr_policy
7104  *   Meter policy struct.
7105  *
7106  * @return
7107  *   0 on success, -1 otherwise.
7108  */
7109 int
7110 mlx5_flow_create_policy_rules(struct rte_eth_dev *dev,
7111                              struct mlx5_flow_meter_policy *mtr_policy)
7112 {
7113         const struct mlx5_flow_driver_ops *fops;
7114
7115         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7116         return fops->create_policy_rules(dev, mtr_policy);
7117 }
7118
7119 /**
7120  * Destroy policy rules, lock free,
7121  * (mutex should be acquired by caller).
7122  * Dispatcher for action type specific call.
7123  *
7124  * @param[in] dev
7125  *   Pointer to the Ethernet device structure.
7126  * @param[in] mtr_policy
7127  *   Meter policy struct.
7128  */
7129 void
7130 mlx5_flow_destroy_policy_rules(struct rte_eth_dev *dev,
7131                              struct mlx5_flow_meter_policy *mtr_policy)
7132 {
7133         const struct mlx5_flow_driver_ops *fops;
7134
7135         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7136         fops->destroy_policy_rules(dev, mtr_policy);
7137 }
7138
7139 /**
7140  * Destroy the default policy table set.
7141  *
7142  * @param[in] dev
7143  *   Pointer to Ethernet device.
7144  */
7145 void
7146 mlx5_flow_destroy_def_policy(struct rte_eth_dev *dev)
7147 {
7148         const struct mlx5_flow_driver_ops *fops;
7149
7150         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7151         fops->destroy_def_policy(dev);
7152 }
7153
7154 /**
7155  * Destroy the default policy table set.
7156  *
7157  * @param[in] dev
7158  *   Pointer to Ethernet device.
7159  *
7160  * @return
7161  *   0 on success, -1 otherwise.
7162  */
7163 int
7164 mlx5_flow_create_def_policy(struct rte_eth_dev *dev)
7165 {
7166         const struct mlx5_flow_driver_ops *fops;
7167
7168         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7169         return fops->create_def_policy(dev);
7170 }
7171
7172 /**
7173  * Create the needed meter and suffix tables.
7174  *
7175  * @param[in] dev
7176  *   Pointer to Ethernet device.
7177  *
7178  * @return
7179  *   0 on success, -1 otherwise.
7180  */
7181 int
7182 mlx5_flow_create_mtr_tbls(struct rte_eth_dev *dev,
7183                         struct mlx5_flow_meter_info *fm,
7184                         uint32_t mtr_idx,
7185                         uint8_t domain_bitmap)
7186 {
7187         const struct mlx5_flow_driver_ops *fops;
7188
7189         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7190         return fops->create_mtr_tbls(dev, fm, mtr_idx, domain_bitmap);
7191 }
7192
7193 /**
7194  * Destroy the meter table set.
7195  *
7196  * @param[in] dev
7197  *   Pointer to Ethernet device.
7198  * @param[in] tbl
7199  *   Pointer to the meter table set.
7200  */
7201 void
7202 mlx5_flow_destroy_mtr_tbls(struct rte_eth_dev *dev,
7203                            struct mlx5_flow_meter_info *fm)
7204 {
7205         const struct mlx5_flow_driver_ops *fops;
7206
7207         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7208         fops->destroy_mtr_tbls(dev, fm);
7209 }
7210
7211 /**
7212  * Destroy the global meter drop table.
7213  *
7214  * @param[in] dev
7215  *   Pointer to Ethernet device.
7216  */
7217 void
7218 mlx5_flow_destroy_mtr_drop_tbls(struct rte_eth_dev *dev)
7219 {
7220         const struct mlx5_flow_driver_ops *fops;
7221
7222         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7223         fops->destroy_mtr_drop_tbls(dev);
7224 }
7225
7226 /**
7227  * Allocate the needed aso flow meter id.
7228  *
7229  * @param[in] dev
7230  *   Pointer to Ethernet device.
7231  *
7232  * @return
7233  *   Index to aso flow meter on success, NULL otherwise.
7234  */
7235 uint32_t
7236 mlx5_flow_mtr_alloc(struct rte_eth_dev *dev)
7237 {
7238         const struct mlx5_flow_driver_ops *fops;
7239
7240         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7241         return fops->create_meter(dev);
7242 }
7243
7244 /**
7245  * Free the aso flow meter id.
7246  *
7247  * @param[in] dev
7248  *   Pointer to Ethernet device.
7249  * @param[in] mtr_idx
7250  *  Index to aso flow meter to be free.
7251  *
7252  * @return
7253  *   0 on success.
7254  */
7255 void
7256 mlx5_flow_mtr_free(struct rte_eth_dev *dev, uint32_t mtr_idx)
7257 {
7258         const struct mlx5_flow_driver_ops *fops;
7259
7260         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7261         fops->free_meter(dev, mtr_idx);
7262 }
7263
7264 /**
7265  * Allocate a counter.
7266  *
7267  * @param[in] dev
7268  *   Pointer to Ethernet device structure.
7269  *
7270  * @return
7271  *   Index to allocated counter  on success, 0 otherwise.
7272  */
7273 uint32_t
7274 mlx5_counter_alloc(struct rte_eth_dev *dev)
7275 {
7276         const struct mlx5_flow_driver_ops *fops;
7277         struct rte_flow_attr attr = { .transfer = 0 };
7278
7279         if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
7280                 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7281                 return fops->counter_alloc(dev);
7282         }
7283         DRV_LOG(ERR,
7284                 "port %u counter allocate is not supported.",
7285                  dev->data->port_id);
7286         return 0;
7287 }
7288
7289 /**
7290  * Free a counter.
7291  *
7292  * @param[in] dev
7293  *   Pointer to Ethernet device structure.
7294  * @param[in] cnt
7295  *   Index to counter to be free.
7296  */
7297 void
7298 mlx5_counter_free(struct rte_eth_dev *dev, uint32_t cnt)
7299 {
7300         const struct mlx5_flow_driver_ops *fops;
7301         struct rte_flow_attr attr = { .transfer = 0 };
7302
7303         if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
7304                 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7305                 fops->counter_free(dev, cnt);
7306                 return;
7307         }
7308         DRV_LOG(ERR,
7309                 "port %u counter free is not supported.",
7310                  dev->data->port_id);
7311 }
7312
7313 /**
7314  * Query counter statistics.
7315  *
7316  * @param[in] dev
7317  *   Pointer to Ethernet device structure.
7318  * @param[in] cnt
7319  *   Index to counter to query.
7320  * @param[in] clear
7321  *   Set to clear counter statistics.
7322  * @param[out] pkts
7323  *   The counter hits packets number to save.
7324  * @param[out] bytes
7325  *   The counter hits bytes number to save.
7326  *
7327  * @return
7328  *   0 on success, a negative errno value otherwise.
7329  */
7330 int
7331 mlx5_counter_query(struct rte_eth_dev *dev, uint32_t cnt,
7332                    bool clear, uint64_t *pkts, uint64_t *bytes)
7333 {
7334         const struct mlx5_flow_driver_ops *fops;
7335         struct rte_flow_attr attr = { .transfer = 0 };
7336
7337         if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
7338                 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7339                 return fops->counter_query(dev, cnt, clear, pkts, bytes);
7340         }
7341         DRV_LOG(ERR,
7342                 "port %u counter query is not supported.",
7343                  dev->data->port_id);
7344         return -ENOTSUP;
7345 }
7346
7347 /**
7348  * Allocate a new memory for the counter values wrapped by all the needed
7349  * management.
7350  *
7351  * @param[in] sh
7352  *   Pointer to mlx5_dev_ctx_shared object.
7353  *
7354  * @return
7355  *   0 on success, a negative errno value otherwise.
7356  */
7357 static int
7358 mlx5_flow_create_counter_stat_mem_mng(struct mlx5_dev_ctx_shared *sh)
7359 {
7360         struct mlx5_devx_mkey_attr mkey_attr;
7361         struct mlx5_counter_stats_mem_mng *mem_mng;
7362         volatile struct flow_counter_stats *raw_data;
7363         int raws_n = MLX5_CNT_CONTAINER_RESIZE + MLX5_MAX_PENDING_QUERIES;
7364         int size = (sizeof(struct flow_counter_stats) *
7365                         MLX5_COUNTERS_PER_POOL +
7366                         sizeof(struct mlx5_counter_stats_raw)) * raws_n +
7367                         sizeof(struct mlx5_counter_stats_mem_mng);
7368         size_t pgsize = rte_mem_page_size();
7369         uint8_t *mem;
7370         int i;
7371
7372         if (pgsize == (size_t)-1) {
7373                 DRV_LOG(ERR, "Failed to get mem page size");
7374                 rte_errno = ENOMEM;
7375                 return -ENOMEM;
7376         }
7377         mem = mlx5_malloc(MLX5_MEM_ZERO, size, pgsize, SOCKET_ID_ANY);
7378         if (!mem) {
7379                 rte_errno = ENOMEM;
7380                 return -ENOMEM;
7381         }
7382         mem_mng = (struct mlx5_counter_stats_mem_mng *)(mem + size) - 1;
7383         size = sizeof(*raw_data) * MLX5_COUNTERS_PER_POOL * raws_n;
7384         mem_mng->umem = mlx5_os_umem_reg(sh->ctx, mem, size,
7385                                                  IBV_ACCESS_LOCAL_WRITE);
7386         if (!mem_mng->umem) {
7387                 rte_errno = errno;
7388                 mlx5_free(mem);
7389                 return -rte_errno;
7390         }
7391         memset(&mkey_attr, 0, sizeof(mkey_attr));
7392         mkey_attr.addr = (uintptr_t)mem;
7393         mkey_attr.size = size;
7394         mkey_attr.umem_id = mlx5_os_get_umem_id(mem_mng->umem);
7395         mkey_attr.pd = sh->pdn;
7396         mkey_attr.relaxed_ordering_write = sh->cmng.relaxed_ordering_write;
7397         mkey_attr.relaxed_ordering_read = sh->cmng.relaxed_ordering_read;
7398         mem_mng->dm = mlx5_devx_cmd_mkey_create(sh->ctx, &mkey_attr);
7399         if (!mem_mng->dm) {
7400                 mlx5_os_umem_dereg(mem_mng->umem);
7401                 rte_errno = errno;
7402                 mlx5_free(mem);
7403                 return -rte_errno;
7404         }
7405         mem_mng->raws = (struct mlx5_counter_stats_raw *)(mem + size);
7406         raw_data = (volatile struct flow_counter_stats *)mem;
7407         for (i = 0; i < raws_n; ++i) {
7408                 mem_mng->raws[i].mem_mng = mem_mng;
7409                 mem_mng->raws[i].data = raw_data + i * MLX5_COUNTERS_PER_POOL;
7410         }
7411         for (i = 0; i < MLX5_MAX_PENDING_QUERIES; ++i)
7412                 LIST_INSERT_HEAD(&sh->cmng.free_stat_raws,
7413                                  mem_mng->raws + MLX5_CNT_CONTAINER_RESIZE + i,
7414                                  next);
7415         LIST_INSERT_HEAD(&sh->cmng.mem_mngs, mem_mng, next);
7416         sh->cmng.mem_mng = mem_mng;
7417         return 0;
7418 }
7419
7420 /**
7421  * Set the statistic memory to the new counter pool.
7422  *
7423  * @param[in] sh
7424  *   Pointer to mlx5_dev_ctx_shared object.
7425  * @param[in] pool
7426  *   Pointer to the pool to set the statistic memory.
7427  *
7428  * @return
7429  *   0 on success, a negative errno value otherwise.
7430  */
7431 static int
7432 mlx5_flow_set_counter_stat_mem(struct mlx5_dev_ctx_shared *sh,
7433                                struct mlx5_flow_counter_pool *pool)
7434 {
7435         struct mlx5_flow_counter_mng *cmng = &sh->cmng;
7436         /* Resize statistic memory once used out. */
7437         if (!(pool->index % MLX5_CNT_CONTAINER_RESIZE) &&
7438             mlx5_flow_create_counter_stat_mem_mng(sh)) {
7439                 DRV_LOG(ERR, "Cannot resize counter stat mem.");
7440                 return -1;
7441         }
7442         rte_spinlock_lock(&pool->sl);
7443         pool->raw = cmng->mem_mng->raws + pool->index %
7444                     MLX5_CNT_CONTAINER_RESIZE;
7445         rte_spinlock_unlock(&pool->sl);
7446         pool->raw_hw = NULL;
7447         return 0;
7448 }
7449
7450 #define MLX5_POOL_QUERY_FREQ_US 1000000
7451
7452 /**
7453  * Set the periodic procedure for triggering asynchronous batch queries for all
7454  * the counter pools.
7455  *
7456  * @param[in] sh
7457  *   Pointer to mlx5_dev_ctx_shared object.
7458  */
7459 void
7460 mlx5_set_query_alarm(struct mlx5_dev_ctx_shared *sh)
7461 {
7462         uint32_t pools_n, us;
7463
7464         pools_n = __atomic_load_n(&sh->cmng.n_valid, __ATOMIC_RELAXED);
7465         us = MLX5_POOL_QUERY_FREQ_US / pools_n;
7466         DRV_LOG(DEBUG, "Set alarm for %u pools each %u us", pools_n, us);
7467         if (rte_eal_alarm_set(us, mlx5_flow_query_alarm, sh)) {
7468                 sh->cmng.query_thread_on = 0;
7469                 DRV_LOG(ERR, "Cannot reinitialize query alarm");
7470         } else {
7471                 sh->cmng.query_thread_on = 1;
7472         }
7473 }
7474
7475 /**
7476  * The periodic procedure for triggering asynchronous batch queries for all the
7477  * counter pools. This function is probably called by the host thread.
7478  *
7479  * @param[in] arg
7480  *   The parameter for the alarm process.
7481  */
7482 void
7483 mlx5_flow_query_alarm(void *arg)
7484 {
7485         struct mlx5_dev_ctx_shared *sh = arg;
7486         int ret;
7487         uint16_t pool_index = sh->cmng.pool_index;
7488         struct mlx5_flow_counter_mng *cmng = &sh->cmng;
7489         struct mlx5_flow_counter_pool *pool;
7490         uint16_t n_valid;
7491
7492         if (sh->cmng.pending_queries >= MLX5_MAX_PENDING_QUERIES)
7493                 goto set_alarm;
7494         rte_spinlock_lock(&cmng->pool_update_sl);
7495         pool = cmng->pools[pool_index];
7496         n_valid = cmng->n_valid;
7497         rte_spinlock_unlock(&cmng->pool_update_sl);
7498         /* Set the statistic memory to the new created pool. */
7499         if ((!pool->raw && mlx5_flow_set_counter_stat_mem(sh, pool)))
7500                 goto set_alarm;
7501         if (pool->raw_hw)
7502                 /* There is a pool query in progress. */
7503                 goto set_alarm;
7504         pool->raw_hw =
7505                 LIST_FIRST(&sh->cmng.free_stat_raws);
7506         if (!pool->raw_hw)
7507                 /* No free counter statistics raw memory. */
7508                 goto set_alarm;
7509         /*
7510          * Identify the counters released between query trigger and query
7511          * handle more efficiently. The counter released in this gap period
7512          * should wait for a new round of query as the new arrived packets
7513          * will not be taken into account.
7514          */
7515         pool->query_gen++;
7516         ret = mlx5_devx_cmd_flow_counter_query(pool->min_dcs, 0,
7517                                                MLX5_COUNTERS_PER_POOL,
7518                                                NULL, NULL,
7519                                                pool->raw_hw->mem_mng->dm->id,
7520                                                (void *)(uintptr_t)
7521                                                pool->raw_hw->data,
7522                                                sh->devx_comp,
7523                                                (uint64_t)(uintptr_t)pool);
7524         if (ret) {
7525                 DRV_LOG(ERR, "Failed to trigger asynchronous query for dcs ID"
7526                         " %d", pool->min_dcs->id);
7527                 pool->raw_hw = NULL;
7528                 goto set_alarm;
7529         }
7530         LIST_REMOVE(pool->raw_hw, next);
7531         sh->cmng.pending_queries++;
7532         pool_index++;
7533         if (pool_index >= n_valid)
7534                 pool_index = 0;
7535 set_alarm:
7536         sh->cmng.pool_index = pool_index;
7537         mlx5_set_query_alarm(sh);
7538 }
7539
7540 /**
7541  * Check and callback event for new aged flow in the counter pool
7542  *
7543  * @param[in] sh
7544  *   Pointer to mlx5_dev_ctx_shared object.
7545  * @param[in] pool
7546  *   Pointer to Current counter pool.
7547  */
7548 static void
7549 mlx5_flow_aging_check(struct mlx5_dev_ctx_shared *sh,
7550                    struct mlx5_flow_counter_pool *pool)
7551 {
7552         struct mlx5_priv *priv;
7553         struct mlx5_flow_counter *cnt;
7554         struct mlx5_age_info *age_info;
7555         struct mlx5_age_param *age_param;
7556         struct mlx5_counter_stats_raw *cur = pool->raw_hw;
7557         struct mlx5_counter_stats_raw *prev = pool->raw;
7558         const uint64_t curr_time = MLX5_CURR_TIME_SEC;
7559         const uint32_t time_delta = curr_time - pool->time_of_last_age_check;
7560         uint16_t expected = AGE_CANDIDATE;
7561         uint32_t i;
7562
7563         pool->time_of_last_age_check = curr_time;
7564         for (i = 0; i < MLX5_COUNTERS_PER_POOL; ++i) {
7565                 cnt = MLX5_POOL_GET_CNT(pool, i);
7566                 age_param = MLX5_CNT_TO_AGE(cnt);
7567                 if (__atomic_load_n(&age_param->state,
7568                                     __ATOMIC_RELAXED) != AGE_CANDIDATE)
7569                         continue;
7570                 if (cur->data[i].hits != prev->data[i].hits) {
7571                         __atomic_store_n(&age_param->sec_since_last_hit, 0,
7572                                          __ATOMIC_RELAXED);
7573                         continue;
7574                 }
7575                 if (__atomic_add_fetch(&age_param->sec_since_last_hit,
7576                                        time_delta,
7577                                        __ATOMIC_RELAXED) <= age_param->timeout)
7578                         continue;
7579                 /**
7580                  * Hold the lock first, or if between the
7581                  * state AGE_TMOUT and tailq operation the
7582                  * release happened, the release procedure
7583                  * may delete a non-existent tailq node.
7584                  */
7585                 priv = rte_eth_devices[age_param->port_id].data->dev_private;
7586                 age_info = GET_PORT_AGE_INFO(priv);
7587                 rte_spinlock_lock(&age_info->aged_sl);
7588                 if (__atomic_compare_exchange_n(&age_param->state, &expected,
7589                                                 AGE_TMOUT, false,
7590                                                 __ATOMIC_RELAXED,
7591                                                 __ATOMIC_RELAXED)) {
7592                         TAILQ_INSERT_TAIL(&age_info->aged_counters, cnt, next);
7593                         MLX5_AGE_SET(age_info, MLX5_AGE_EVENT_NEW);
7594                 }
7595                 rte_spinlock_unlock(&age_info->aged_sl);
7596         }
7597         mlx5_age_event_prepare(sh);
7598 }
7599
7600 /**
7601  * Handler for the HW respond about ready values from an asynchronous batch
7602  * query. This function is probably called by the host thread.
7603  *
7604  * @param[in] sh
7605  *   The pointer to the shared device context.
7606  * @param[in] async_id
7607  *   The Devx async ID.
7608  * @param[in] status
7609  *   The status of the completion.
7610  */
7611 void
7612 mlx5_flow_async_pool_query_handle(struct mlx5_dev_ctx_shared *sh,
7613                                   uint64_t async_id, int status)
7614 {
7615         struct mlx5_flow_counter_pool *pool =
7616                 (struct mlx5_flow_counter_pool *)(uintptr_t)async_id;
7617         struct mlx5_counter_stats_raw *raw_to_free;
7618         uint8_t query_gen = pool->query_gen ^ 1;
7619         struct mlx5_flow_counter_mng *cmng = &sh->cmng;
7620         enum mlx5_counter_type cnt_type =
7621                 pool->is_aged ? MLX5_COUNTER_TYPE_AGE :
7622                                 MLX5_COUNTER_TYPE_ORIGIN;
7623
7624         if (unlikely(status)) {
7625                 raw_to_free = pool->raw_hw;
7626         } else {
7627                 raw_to_free = pool->raw;
7628                 if (pool->is_aged)
7629                         mlx5_flow_aging_check(sh, pool);
7630                 rte_spinlock_lock(&pool->sl);
7631                 pool->raw = pool->raw_hw;
7632                 rte_spinlock_unlock(&pool->sl);
7633                 /* Be sure the new raw counters data is updated in memory. */
7634                 rte_io_wmb();
7635                 if (!TAILQ_EMPTY(&pool->counters[query_gen])) {
7636                         rte_spinlock_lock(&cmng->csl[cnt_type]);
7637                         TAILQ_CONCAT(&cmng->counters[cnt_type],
7638                                      &pool->counters[query_gen], next);
7639                         rte_spinlock_unlock(&cmng->csl[cnt_type]);
7640                 }
7641         }
7642         LIST_INSERT_HEAD(&sh->cmng.free_stat_raws, raw_to_free, next);
7643         pool->raw_hw = NULL;
7644         sh->cmng.pending_queries--;
7645 }
7646
7647 static int
7648 flow_group_to_table(uint32_t port_id, uint32_t group, uint32_t *table,
7649                     const struct flow_grp_info *grp_info,
7650                     struct rte_flow_error *error)
7651 {
7652         if (grp_info->transfer && grp_info->external &&
7653             grp_info->fdb_def_rule) {
7654                 if (group == UINT32_MAX)
7655                         return rte_flow_error_set
7656                                                 (error, EINVAL,
7657                                                  RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
7658                                                  NULL,
7659                                                  "group index not supported");
7660                 *table = group + 1;
7661         } else {
7662                 *table = group;
7663         }
7664         DRV_LOG(DEBUG, "port %u group=%#x table=%#x", port_id, group, *table);
7665         return 0;
7666 }
7667
7668 /**
7669  * Translate the rte_flow group index to HW table value.
7670  *
7671  * If tunnel offload is disabled, all group ids converted to flow table
7672  * id using the standard method.
7673  * If tunnel offload is enabled, group id can be converted using the
7674  * standard or tunnel conversion method. Group conversion method
7675  * selection depends on flags in `grp_info` parameter:
7676  * - Internal (grp_info.external == 0) groups conversion uses the
7677  *   standard method.
7678  * - Group ids in JUMP action converted with the tunnel conversion.
7679  * - Group id in rule attribute conversion depends on a rule type and
7680  *   group id value:
7681  *   ** non zero group attributes converted with the tunnel method
7682  *   ** zero group attribute in non-tunnel rule is converted using the
7683  *      standard method - there's only one root table
7684  *   ** zero group attribute in steer tunnel rule is converted with the
7685  *      standard method - single root table
7686  *   ** zero group attribute in match tunnel rule is a special OvS
7687  *      case: that value is used for portability reasons. That group
7688  *      id is converted with the tunnel conversion method.
7689  *
7690  * @param[in] dev
7691  *   Port device
7692  * @param[in] tunnel
7693  *   PMD tunnel offload object
7694  * @param[in] group
7695  *   rte_flow group index value.
7696  * @param[out] table
7697  *   HW table value.
7698  * @param[in] grp_info
7699  *   flags used for conversion
7700  * @param[out] error
7701  *   Pointer to error structure.
7702  *
7703  * @return
7704  *   0 on success, a negative errno value otherwise and rte_errno is set.
7705  */
7706 int
7707 mlx5_flow_group_to_table(struct rte_eth_dev *dev,
7708                          const struct mlx5_flow_tunnel *tunnel,
7709                          uint32_t group, uint32_t *table,
7710                          const struct flow_grp_info *grp_info,
7711                          struct rte_flow_error *error)
7712 {
7713         int ret;
7714         bool standard_translation;
7715
7716         if (!grp_info->skip_scale && grp_info->external &&
7717             group < MLX5_MAX_TABLES_EXTERNAL)
7718                 group *= MLX5_FLOW_TABLE_FACTOR;
7719         if (is_tunnel_offload_active(dev)) {
7720                 standard_translation = !grp_info->external ||
7721                                         grp_info->std_tbl_fix;
7722         } else {
7723                 standard_translation = true;
7724         }
7725         DRV_LOG(DEBUG,
7726                 "port %u group=%u transfer=%d external=%d fdb_def_rule=%d translate=%s",
7727                 dev->data->port_id, group, grp_info->transfer,
7728                 grp_info->external, grp_info->fdb_def_rule,
7729                 standard_translation ? "STANDARD" : "TUNNEL");
7730         if (standard_translation)
7731                 ret = flow_group_to_table(dev->data->port_id, group, table,
7732                                           grp_info, error);
7733         else
7734                 ret = tunnel_flow_group_to_flow_table(dev, tunnel, group,
7735                                                       table, error);
7736
7737         return ret;
7738 }
7739
7740 /**
7741  * Discover availability of metadata reg_c's.
7742  *
7743  * Iteratively use test flows to check availability.
7744  *
7745  * @param[in] dev
7746  *   Pointer to the Ethernet device structure.
7747  *
7748  * @return
7749  *   0 on success, a negative errno value otherwise and rte_errno is set.
7750  */
7751 int
7752 mlx5_flow_discover_mreg_c(struct rte_eth_dev *dev)
7753 {
7754         struct mlx5_priv *priv = dev->data->dev_private;
7755         struct mlx5_dev_config *config = &priv->config;
7756         enum modify_reg idx;
7757         int n = 0;
7758
7759         /* reg_c[0] and reg_c[1] are reserved. */
7760         config->flow_mreg_c[n++] = REG_C_0;
7761         config->flow_mreg_c[n++] = REG_C_1;
7762         /* Discover availability of other reg_c's. */
7763         for (idx = REG_C_2; idx <= REG_C_7; ++idx) {
7764                 struct rte_flow_attr attr = {
7765                         .group = MLX5_FLOW_MREG_CP_TABLE_GROUP,
7766                         .priority = MLX5_FLOW_LOWEST_PRIO_INDICATOR,
7767                         .ingress = 1,
7768                 };
7769                 struct rte_flow_item items[] = {
7770                         [0] = {
7771                                 .type = RTE_FLOW_ITEM_TYPE_END,
7772                         },
7773                 };
7774                 struct rte_flow_action actions[] = {
7775                         [0] = {
7776                                 .type = (enum rte_flow_action_type)
7777                                         MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
7778                                 .conf = &(struct mlx5_flow_action_copy_mreg){
7779                                         .src = REG_C_1,
7780                                         .dst = idx,
7781                                 },
7782                         },
7783                         [1] = {
7784                                 .type = RTE_FLOW_ACTION_TYPE_JUMP,
7785                                 .conf = &(struct rte_flow_action_jump){
7786                                         .group = MLX5_FLOW_MREG_ACT_TABLE_GROUP,
7787                                 },
7788                         },
7789                         [2] = {
7790                                 .type = RTE_FLOW_ACTION_TYPE_END,
7791                         },
7792                 };
7793                 uint32_t flow_idx;
7794                 struct rte_flow *flow;
7795                 struct rte_flow_error error;
7796
7797                 if (!config->dv_flow_en)
7798                         break;
7799                 /* Create internal flow, validation skips copy action. */
7800                 flow_idx = flow_list_create(dev, NULL, &attr, items,
7801                                             actions, false, &error);
7802                 flow = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW],
7803                                       flow_idx);
7804                 if (!flow)
7805                         continue;
7806                 config->flow_mreg_c[n++] = idx;
7807                 flow_list_destroy(dev, NULL, flow_idx);
7808         }
7809         for (; n < MLX5_MREG_C_NUM; ++n)
7810                 config->flow_mreg_c[n] = REG_NON;
7811         return 0;
7812 }
7813
7814 /**
7815  * Dump flow raw hw data to file
7816  *
7817  * @param[in] dev
7818  *    The pointer to Ethernet device.
7819  * @param[in] file
7820  *   A pointer to a file for output.
7821  * @param[out] error
7822  *   Perform verbose error reporting if not NULL. PMDs initialize this
7823  *   structure in case of error only.
7824  * @return
7825  *   0 on success, a nagative value otherwise.
7826  */
7827 int
7828 mlx5_flow_dev_dump(struct rte_eth_dev *dev, struct rte_flow *flow_idx,
7829                    FILE *file,
7830                    struct rte_flow_error *error __rte_unused)
7831 {
7832         struct mlx5_priv *priv = dev->data->dev_private;
7833         struct mlx5_dev_ctx_shared *sh = priv->sh;
7834         uint32_t handle_idx;
7835         int ret;
7836         struct mlx5_flow_handle *dh;
7837         struct rte_flow *flow;
7838
7839         if (!priv->config.dv_flow_en) {
7840                 if (fputs("device dv flow disabled\n", file) <= 0)
7841                         return -errno;
7842                 return -ENOTSUP;
7843         }
7844
7845         /* dump all */
7846         if (!flow_idx)
7847                 return mlx5_devx_cmd_flow_dump(sh->fdb_domain,
7848                                         sh->rx_domain,
7849                                         sh->tx_domain, file);
7850         /* dump one */
7851         flow = mlx5_ipool_get(priv->sh->ipool
7852                         [MLX5_IPOOL_RTE_FLOW], (uintptr_t)(void *)flow_idx);
7853         if (!flow)
7854                 return -ENOENT;
7855
7856         handle_idx = flow->dev_handles;
7857         while (handle_idx) {
7858                 dh = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW],
7859                                 handle_idx);
7860                 if (!dh)
7861                         return -ENOENT;
7862                 if (dh->drv_flow) {
7863                         ret = mlx5_devx_cmd_flow_single_dump(dh->drv_flow,
7864                                         file);
7865                         if (ret)
7866                                 return -ENOENT;
7867                 }
7868                 handle_idx = dh->next.next;
7869         }
7870         return 0;
7871 }
7872
7873 /**
7874  * Get aged-out flows.
7875  *
7876  * @param[in] dev
7877  *   Pointer to the Ethernet device structure.
7878  * @param[in] context
7879  *   The address of an array of pointers to the aged-out flows contexts.
7880  * @param[in] nb_countexts
7881  *   The length of context array pointers.
7882  * @param[out] error
7883  *   Perform verbose error reporting if not NULL. Initialized in case of
7884  *   error only.
7885  *
7886  * @return
7887  *   how many contexts get in success, otherwise negative errno value.
7888  *   if nb_contexts is 0, return the amount of all aged contexts.
7889  *   if nb_contexts is not 0 , return the amount of aged flows reported
7890  *   in the context array.
7891  */
7892 int
7893 mlx5_flow_get_aged_flows(struct rte_eth_dev *dev, void **contexts,
7894                         uint32_t nb_contexts, struct rte_flow_error *error)
7895 {
7896         const struct mlx5_flow_driver_ops *fops;
7897         struct rte_flow_attr attr = { .transfer = 0 };
7898
7899         if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
7900                 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7901                 return fops->get_aged_flows(dev, contexts, nb_contexts,
7902                                                     error);
7903         }
7904         DRV_LOG(ERR,
7905                 "port %u get aged flows is not supported.",
7906                  dev->data->port_id);
7907         return -ENOTSUP;
7908 }
7909
7910 /* Wrapper for driver action_validate op callback */
7911 static int
7912 flow_drv_action_validate(struct rte_eth_dev *dev,
7913                          const struct rte_flow_indir_action_conf *conf,
7914                          const struct rte_flow_action *action,
7915                          const struct mlx5_flow_driver_ops *fops,
7916                          struct rte_flow_error *error)
7917 {
7918         static const char err_msg[] = "indirect action validation unsupported";
7919
7920         if (!fops->action_validate) {
7921                 DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
7922                 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
7923                                    NULL, err_msg);
7924                 return -rte_errno;
7925         }
7926         return fops->action_validate(dev, conf, action, error);
7927 }
7928
7929 /**
7930  * Destroys the shared action by handle.
7931  *
7932  * @param dev
7933  *   Pointer to Ethernet device structure.
7934  * @param[in] handle
7935  *   Handle for the indirect action object to be destroyed.
7936  * @param[out] error
7937  *   Perform verbose error reporting if not NULL. PMDs initialize this
7938  *   structure in case of error only.
7939  *
7940  * @return
7941  *   0 on success, a negative errno value otherwise and rte_errno is set.
7942  *
7943  * @note: wrapper for driver action_create op callback.
7944  */
7945 static int
7946 mlx5_action_handle_destroy(struct rte_eth_dev *dev,
7947                            struct rte_flow_action_handle *handle,
7948                            struct rte_flow_error *error)
7949 {
7950         static const char err_msg[] = "indirect action destruction unsupported";
7951         struct rte_flow_attr attr = { .transfer = 0 };
7952         const struct mlx5_flow_driver_ops *fops =
7953                         flow_get_drv_ops(flow_get_drv_type(dev, &attr));
7954
7955         if (!fops->action_destroy) {
7956                 DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
7957                 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
7958                                    NULL, err_msg);
7959                 return -rte_errno;
7960         }
7961         return fops->action_destroy(dev, handle, error);
7962 }
7963
7964 /* Wrapper for driver action_destroy op callback */
7965 static int
7966 flow_drv_action_update(struct rte_eth_dev *dev,
7967                        struct rte_flow_action_handle *handle,
7968                        const void *update,
7969                        const struct mlx5_flow_driver_ops *fops,
7970                        struct rte_flow_error *error)
7971 {
7972         static const char err_msg[] = "indirect action update unsupported";
7973
7974         if (!fops->action_update) {
7975                 DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
7976                 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
7977                                    NULL, err_msg);
7978                 return -rte_errno;
7979         }
7980         return fops->action_update(dev, handle, update, error);
7981 }
7982
7983 /* Wrapper for driver action_destroy op callback */
7984 static int
7985 flow_drv_action_query(struct rte_eth_dev *dev,
7986                       const struct rte_flow_action_handle *handle,
7987                       void *data,
7988                       const struct mlx5_flow_driver_ops *fops,
7989                       struct rte_flow_error *error)
7990 {
7991         static const char err_msg[] = "indirect action query unsupported";
7992
7993         if (!fops->action_query) {
7994                 DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
7995                 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
7996                                    NULL, err_msg);
7997                 return -rte_errno;
7998         }
7999         return fops->action_query(dev, handle, data, error);
8000 }
8001
8002 /**
8003  * Create indirect action for reuse in multiple flow rules.
8004  *
8005  * @param dev
8006  *   Pointer to Ethernet device structure.
8007  * @param conf
8008  *   Pointer to indirect action object configuration.
8009  * @param[in] action
8010  *   Action configuration for indirect action object creation.
8011  * @param[out] error
8012  *   Perform verbose error reporting if not NULL. PMDs initialize this
8013  *   structure in case of error only.
8014  * @return
8015  *   A valid handle in case of success, NULL otherwise and rte_errno is set.
8016  */
8017 static struct rte_flow_action_handle *
8018 mlx5_action_handle_create(struct rte_eth_dev *dev,
8019                           const struct rte_flow_indir_action_conf *conf,
8020                           const struct rte_flow_action *action,
8021                           struct rte_flow_error *error)
8022 {
8023         static const char err_msg[] = "indirect action creation unsupported";
8024         struct rte_flow_attr attr = { .transfer = 0 };
8025         const struct mlx5_flow_driver_ops *fops =
8026                         flow_get_drv_ops(flow_get_drv_type(dev, &attr));
8027
8028         if (flow_drv_action_validate(dev, conf, action, fops, error))
8029                 return NULL;
8030         if (!fops->action_create) {
8031                 DRV_LOG(ERR, "port %u %s.", dev->data->port_id, err_msg);
8032                 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
8033                                    NULL, err_msg);
8034                 return NULL;
8035         }
8036         return fops->action_create(dev, conf, action, error);
8037 }
8038
8039 /**
8040  * Updates inplace the indirect action configuration pointed by *handle*
8041  * with the configuration provided as *update* argument.
8042  * The update of the indirect action configuration effects all flow rules
8043  * reusing the action via handle.
8044  *
8045  * @param dev
8046  *   Pointer to Ethernet device structure.
8047  * @param[in] handle
8048  *   Handle for the indirect action to be updated.
8049  * @param[in] update
8050  *   Action specification used to modify the action pointed by handle.
8051  *   *update* could be of same type with the action pointed by the *handle*
8052  *   handle argument, or some other structures like a wrapper, depending on
8053  *   the indirect action type.
8054  * @param[out] error
8055  *   Perform verbose error reporting if not NULL. PMDs initialize this
8056  *   structure in case of error only.
8057  *
8058  * @return
8059  *   0 on success, a negative errno value otherwise and rte_errno is set.
8060  */
8061 static int
8062 mlx5_action_handle_update(struct rte_eth_dev *dev,
8063                 struct rte_flow_action_handle *handle,
8064                 const void *update,
8065                 struct rte_flow_error *error)
8066 {
8067         struct rte_flow_attr attr = { .transfer = 0 };
8068         const struct mlx5_flow_driver_ops *fops =
8069                         flow_get_drv_ops(flow_get_drv_type(dev, &attr));
8070         int ret;
8071
8072         ret = flow_drv_action_validate(dev, NULL,
8073                         (const struct rte_flow_action *)update, fops, error);
8074         if (ret)
8075                 return ret;
8076         return flow_drv_action_update(dev, handle, update, fops,
8077                                       error);
8078 }
8079
8080 /**
8081  * Query the indirect action by handle.
8082  *
8083  * This function allows retrieving action-specific data such as counters.
8084  * Data is gathered by special action which may be present/referenced in
8085  * more than one flow rule definition.
8086  *
8087  * see @RTE_FLOW_ACTION_TYPE_COUNT
8088  *
8089  * @param dev
8090  *   Pointer to Ethernet device structure.
8091  * @param[in] handle
8092  *   Handle for the indirect action to query.
8093  * @param[in, out] data
8094  *   Pointer to storage for the associated query data type.
8095  * @param[out] error
8096  *   Perform verbose error reporting if not NULL. PMDs initialize this
8097  *   structure in case of error only.
8098  *
8099  * @return
8100  *   0 on success, a negative errno value otherwise and rte_errno is set.
8101  */
8102 static int
8103 mlx5_action_handle_query(struct rte_eth_dev *dev,
8104                          const struct rte_flow_action_handle *handle,
8105                          void *data,
8106                          struct rte_flow_error *error)
8107 {
8108         struct rte_flow_attr attr = { .transfer = 0 };
8109         const struct mlx5_flow_driver_ops *fops =
8110                         flow_get_drv_ops(flow_get_drv_type(dev, &attr));
8111
8112         return flow_drv_action_query(dev, handle, data, fops, error);
8113 }
8114
8115 /**
8116  * Destroy all indirect actions (shared RSS).
8117  *
8118  * @param dev
8119  *   Pointer to Ethernet device.
8120  *
8121  * @return
8122  *   0 on success, a negative errno value otherwise and rte_errno is set.
8123  */
8124 int
8125 mlx5_action_handle_flush(struct rte_eth_dev *dev)
8126 {
8127         struct rte_flow_error error;
8128         struct mlx5_priv *priv = dev->data->dev_private;
8129         struct mlx5_shared_action_rss *shared_rss;
8130         int ret = 0;
8131         uint32_t idx;
8132
8133         ILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_RSS_SHARED_ACTIONS],
8134                       priv->rss_shared_actions, idx, shared_rss, next) {
8135                 ret |= mlx5_action_handle_destroy(dev,
8136                        (struct rte_flow_action_handle *)(uintptr_t)idx, &error);
8137         }
8138         return ret;
8139 }
8140
8141 #ifndef HAVE_MLX5DV_DR
8142 #define MLX5_DOMAIN_SYNC_FLOW ((1 << 0) | (1 << 1))
8143 #else
8144 #define MLX5_DOMAIN_SYNC_FLOW \
8145         (MLX5DV_DR_DOMAIN_SYNC_FLAGS_SW | MLX5DV_DR_DOMAIN_SYNC_FLAGS_HW)
8146 #endif
8147
8148 int rte_pmd_mlx5_sync_flow(uint16_t port_id, uint32_t domains)
8149 {
8150         struct rte_eth_dev *dev = &rte_eth_devices[port_id];
8151         const struct mlx5_flow_driver_ops *fops;
8152         int ret;
8153         struct rte_flow_attr attr = { .transfer = 0 };
8154
8155         fops = flow_get_drv_ops(flow_get_drv_type(dev, &attr));
8156         ret = fops->sync_domain(dev, domains, MLX5_DOMAIN_SYNC_FLOW);
8157         if (ret > 0)
8158                 ret = -ret;
8159         return ret;
8160 }
8161
8162 /**
8163  * tunnel offload functionalilty is defined for DV environment only
8164  */
8165 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
8166 __extension__
8167 union tunnel_offload_mark {
8168         uint32_t val;
8169         struct {
8170                 uint32_t app_reserve:8;
8171                 uint32_t table_id:15;
8172                 uint32_t transfer:1;
8173                 uint32_t _unused_:8;
8174         };
8175 };
8176
8177 static bool
8178 mlx5_access_tunnel_offload_db
8179         (struct rte_eth_dev *dev,
8180          bool (*match)(struct rte_eth_dev *,
8181                        struct mlx5_flow_tunnel *, const void *),
8182          void (*hit)(struct rte_eth_dev *, struct mlx5_flow_tunnel *, void *),
8183          void (*miss)(struct rte_eth_dev *, void *),
8184          void *ctx, bool lock_op);
8185
8186 static int
8187 flow_tunnel_add_default_miss(struct rte_eth_dev *dev,
8188                              struct rte_flow *flow,
8189                              const struct rte_flow_attr *attr,
8190                              const struct rte_flow_action *app_actions,
8191                              uint32_t flow_idx,
8192                              struct tunnel_default_miss_ctx *ctx,
8193                              struct rte_flow_error *error)
8194 {
8195         struct mlx5_priv *priv = dev->data->dev_private;
8196         struct mlx5_flow *dev_flow;
8197         struct rte_flow_attr miss_attr = *attr;
8198         const struct mlx5_flow_tunnel *tunnel = app_actions[0].conf;
8199         const struct rte_flow_item miss_items[2] = {
8200                 {
8201                         .type = RTE_FLOW_ITEM_TYPE_ETH,
8202                         .spec = NULL,
8203                         .last = NULL,
8204                         .mask = NULL
8205                 },
8206                 {
8207                         .type = RTE_FLOW_ITEM_TYPE_END,
8208                         .spec = NULL,
8209                         .last = NULL,
8210                         .mask = NULL
8211                 }
8212         };
8213         union tunnel_offload_mark mark_id;
8214         struct rte_flow_action_mark miss_mark;
8215         struct rte_flow_action miss_actions[3] = {
8216                 [0] = { .type = RTE_FLOW_ACTION_TYPE_MARK, .conf = &miss_mark },
8217                 [2] = { .type = RTE_FLOW_ACTION_TYPE_END,  .conf = NULL }
8218         };
8219         const struct rte_flow_action_jump *jump_data;
8220         uint32_t i, flow_table = 0; /* prevent compilation warning */
8221         struct flow_grp_info grp_info = {
8222                 .external = 1,
8223                 .transfer = attr->transfer,
8224                 .fdb_def_rule = !!priv->fdb_def_rule,
8225                 .std_tbl_fix = 0,
8226         };
8227         int ret;
8228
8229         if (!attr->transfer) {
8230                 uint32_t q_size;
8231
8232                 miss_actions[1].type = RTE_FLOW_ACTION_TYPE_RSS;
8233                 q_size = priv->reta_idx_n * sizeof(ctx->queue[0]);
8234                 ctx->queue = mlx5_malloc(MLX5_MEM_SYS | MLX5_MEM_ZERO, q_size,
8235                                          0, SOCKET_ID_ANY);
8236                 if (!ctx->queue)
8237                         return rte_flow_error_set
8238                                 (error, ENOMEM,
8239                                 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
8240                                 NULL, "invalid default miss RSS");
8241                 ctx->action_rss.func = RTE_ETH_HASH_FUNCTION_DEFAULT,
8242                 ctx->action_rss.level = 0,
8243                 ctx->action_rss.types = priv->rss_conf.rss_hf,
8244                 ctx->action_rss.key_len = priv->rss_conf.rss_key_len,
8245                 ctx->action_rss.queue_num = priv->reta_idx_n,
8246                 ctx->action_rss.key = priv->rss_conf.rss_key,
8247                 ctx->action_rss.queue = ctx->queue;
8248                 if (!priv->reta_idx_n || !priv->rxqs_n)
8249                         return rte_flow_error_set
8250                                 (error, EINVAL,
8251                                 RTE_FLOW_ERROR_TYPE_ACTION_CONF,
8252                                 NULL, "invalid port configuration");
8253                 if (!(dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG))
8254                         ctx->action_rss.types = 0;
8255                 for (i = 0; i != priv->reta_idx_n; ++i)
8256                         ctx->queue[i] = (*priv->reta_idx)[i];
8257         } else {
8258                 miss_actions[1].type = RTE_FLOW_ACTION_TYPE_JUMP;
8259                 ctx->miss_jump.group = MLX5_TNL_MISS_FDB_JUMP_GRP;
8260         }
8261         miss_actions[1].conf = (typeof(miss_actions[1].conf))ctx->raw;
8262         for (; app_actions->type != RTE_FLOW_ACTION_TYPE_JUMP; app_actions++);
8263         jump_data = app_actions->conf;
8264         miss_attr.priority = MLX5_TNL_MISS_RULE_PRIORITY;
8265         miss_attr.group = jump_data->group;
8266         ret = mlx5_flow_group_to_table(dev, tunnel, jump_data->group,
8267                                        &flow_table, &grp_info, error);
8268         if (ret)
8269                 return rte_flow_error_set(error, EINVAL,
8270                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
8271                                           NULL, "invalid tunnel id");
8272         mark_id.app_reserve = 0;
8273         mark_id.table_id = tunnel_flow_tbl_to_id(flow_table);
8274         mark_id.transfer = !!attr->transfer;
8275         mark_id._unused_ = 0;
8276         miss_mark.id = mark_id.val;
8277         dev_flow = flow_drv_prepare(dev, flow, &miss_attr,
8278                                     miss_items, miss_actions, flow_idx, error);
8279         if (!dev_flow)
8280                 return -rte_errno;
8281         dev_flow->flow = flow;
8282         dev_flow->external = true;
8283         dev_flow->tunnel = tunnel;
8284         /* Subflow object was created, we must include one in the list. */
8285         SILIST_INSERT(&flow->dev_handles, dev_flow->handle_idx,
8286                       dev_flow->handle, next);
8287         DRV_LOG(DEBUG,
8288                 "port %u tunnel type=%d id=%u miss rule priority=%u group=%u",
8289                 dev->data->port_id, tunnel->app_tunnel.type,
8290                 tunnel->tunnel_id, miss_attr.priority, miss_attr.group);
8291         ret = flow_drv_translate(dev, dev_flow, &miss_attr, miss_items,
8292                                   miss_actions, error);
8293         if (!ret)
8294                 ret = flow_mreg_update_copy_table(dev, flow, miss_actions,
8295                                                   error);
8296
8297         return ret;
8298 }
8299
8300 static const struct mlx5_flow_tbl_data_entry  *
8301 tunnel_mark_decode(struct rte_eth_dev *dev, uint32_t mark)
8302 {
8303         struct mlx5_priv *priv = dev->data->dev_private;
8304         struct mlx5_dev_ctx_shared *sh = priv->sh;
8305         struct mlx5_hlist_entry *he;
8306         union tunnel_offload_mark mbits = { .val = mark };
8307         union mlx5_flow_tbl_key table_key = {
8308                 {
8309                         .level = tunnel_id_to_flow_tbl(mbits.table_id),
8310                         .id = 0,
8311                         .reserved = 0,
8312                         .dummy = 0,
8313                         .is_fdb = !!mbits.transfer,
8314                         .is_egress = 0,
8315                 }
8316         };
8317         he = mlx5_hlist_lookup(sh->flow_tbls, table_key.v64, NULL);
8318         return he ?
8319                container_of(he, struct mlx5_flow_tbl_data_entry, entry) : NULL;
8320 }
8321
8322 static void
8323 mlx5_flow_tunnel_grp2tbl_remove_cb(struct mlx5_hlist *list,
8324                                    struct mlx5_hlist_entry *entry)
8325 {
8326         struct mlx5_dev_ctx_shared *sh = list->ctx;
8327         struct tunnel_tbl_entry *tte = container_of(entry, typeof(*tte), hash);
8328
8329         mlx5_ipool_free(sh->ipool[MLX5_IPOOL_TNL_TBL_ID],
8330                         tunnel_flow_tbl_to_id(tte->flow_table));
8331         mlx5_free(tte);
8332 }
8333
8334 static int
8335 mlx5_flow_tunnel_grp2tbl_match_cb(struct mlx5_hlist *list __rte_unused,
8336                                   struct mlx5_hlist_entry *entry,
8337                                   uint64_t key, void *cb_ctx __rte_unused)
8338 {
8339         union tunnel_tbl_key tbl = {
8340                 .val = key,
8341         };
8342         struct tunnel_tbl_entry *tte = container_of(entry, typeof(*tte), hash);
8343
8344         return tbl.tunnel_id != tte->tunnel_id || tbl.group != tte->group;
8345 }
8346
8347 static struct mlx5_hlist_entry *
8348 mlx5_flow_tunnel_grp2tbl_create_cb(struct mlx5_hlist *list, uint64_t key,
8349                                    void *ctx __rte_unused)
8350 {
8351         struct mlx5_dev_ctx_shared *sh = list->ctx;
8352         struct tunnel_tbl_entry *tte;
8353         union tunnel_tbl_key tbl = {
8354                 .val = key,
8355         };
8356
8357         tte = mlx5_malloc(MLX5_MEM_SYS | MLX5_MEM_ZERO,
8358                           sizeof(*tte), 0,
8359                           SOCKET_ID_ANY);
8360         if (!tte)
8361                 goto err;
8362         mlx5_ipool_malloc(sh->ipool[MLX5_IPOOL_TNL_TBL_ID],
8363                           &tte->flow_table);
8364         if (tte->flow_table >= MLX5_MAX_TABLES) {
8365                 DRV_LOG(ERR, "Tunnel TBL ID %d exceed max limit.",
8366                         tte->flow_table);
8367                 mlx5_ipool_free(sh->ipool[MLX5_IPOOL_TNL_TBL_ID],
8368                                 tte->flow_table);
8369                 goto err;
8370         } else if (!tte->flow_table) {
8371                 goto err;
8372         }
8373         tte->flow_table = tunnel_id_to_flow_tbl(tte->flow_table);
8374         tte->tunnel_id = tbl.tunnel_id;
8375         tte->group = tbl.group;
8376         return &tte->hash;
8377 err:
8378         if (tte)
8379                 mlx5_free(tte);
8380         return NULL;
8381 }
8382
8383 static uint32_t
8384 tunnel_flow_group_to_flow_table(struct rte_eth_dev *dev,
8385                                 const struct mlx5_flow_tunnel *tunnel,
8386                                 uint32_t group, uint32_t *table,
8387                                 struct rte_flow_error *error)
8388 {
8389         struct mlx5_hlist_entry *he;
8390         struct tunnel_tbl_entry *tte;
8391         union tunnel_tbl_key key = {
8392                 .tunnel_id = tunnel ? tunnel->tunnel_id : 0,
8393                 .group = group
8394         };
8395         struct mlx5_flow_tunnel_hub *thub = mlx5_tunnel_hub(dev);
8396         struct mlx5_hlist *group_hash;
8397
8398         group_hash = tunnel ? tunnel->groups : thub->groups;
8399         he = mlx5_hlist_register(group_hash, key.val, NULL);
8400         if (!he)
8401                 return rte_flow_error_set(error, EINVAL,
8402                                           RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
8403                                           NULL,
8404                                           "tunnel group index not supported");
8405         tte = container_of(he, typeof(*tte), hash);
8406         *table = tte->flow_table;
8407         DRV_LOG(DEBUG, "port %u tunnel %u group=%#x table=%#x",
8408                 dev->data->port_id, key.tunnel_id, group, *table);
8409         return 0;
8410 }
8411
8412 static void
8413 mlx5_flow_tunnel_free(struct rte_eth_dev *dev,
8414                       struct mlx5_flow_tunnel *tunnel)
8415 {
8416         struct mlx5_priv *priv = dev->data->dev_private;
8417         struct mlx5_indexed_pool *ipool;
8418
8419         DRV_LOG(DEBUG, "port %u release pmd tunnel id=0x%x",
8420                 dev->data->port_id, tunnel->tunnel_id);
8421         LIST_REMOVE(tunnel, chain);
8422         mlx5_hlist_destroy(tunnel->groups);
8423         ipool = priv->sh->ipool[MLX5_IPOOL_TUNNEL_ID];
8424         mlx5_ipool_free(ipool, tunnel->tunnel_id);
8425 }
8426
8427 static bool
8428 mlx5_access_tunnel_offload_db
8429         (struct rte_eth_dev *dev,
8430          bool (*match)(struct rte_eth_dev *,
8431                        struct mlx5_flow_tunnel *, const void *),
8432          void (*hit)(struct rte_eth_dev *, struct mlx5_flow_tunnel *, void *),
8433          void (*miss)(struct rte_eth_dev *, void *),
8434          void *ctx, bool lock_op)
8435 {
8436         bool verdict = false;
8437         struct mlx5_flow_tunnel_hub *thub = mlx5_tunnel_hub(dev);
8438         struct mlx5_flow_tunnel *tunnel;
8439
8440         rte_spinlock_lock(&thub->sl);
8441         LIST_FOREACH(tunnel, &thub->tunnels, chain) {
8442                 verdict = match(dev, tunnel, (const void *)ctx);
8443                 if (verdict)
8444                         break;
8445         }
8446         if (!lock_op)
8447                 rte_spinlock_unlock(&thub->sl);
8448         if (verdict && hit)
8449                 hit(dev, tunnel, ctx);
8450         if (!verdict && miss)
8451                 miss(dev, ctx);
8452         if (lock_op)
8453                 rte_spinlock_unlock(&thub->sl);
8454
8455         return verdict;
8456 }
8457
8458 struct tunnel_db_find_tunnel_id_ctx {
8459         uint32_t tunnel_id;
8460         struct mlx5_flow_tunnel *tunnel;
8461 };
8462
8463 static bool
8464 find_tunnel_id_match(struct rte_eth_dev *dev,
8465                      struct mlx5_flow_tunnel *tunnel, const void *x)
8466 {
8467         const struct tunnel_db_find_tunnel_id_ctx *ctx = x;
8468
8469         RTE_SET_USED(dev);
8470         return tunnel->tunnel_id == ctx->tunnel_id;
8471 }
8472
8473 static void
8474 find_tunnel_id_hit(struct rte_eth_dev *dev,
8475                    struct mlx5_flow_tunnel *tunnel, void *x)
8476 {
8477         struct tunnel_db_find_tunnel_id_ctx *ctx = x;
8478         RTE_SET_USED(dev);
8479         ctx->tunnel = tunnel;
8480 }
8481
8482 static struct mlx5_flow_tunnel *
8483 mlx5_find_tunnel_id(struct rte_eth_dev *dev, uint32_t id)
8484 {
8485         struct tunnel_db_find_tunnel_id_ctx ctx = {
8486                 .tunnel_id = id,
8487         };
8488
8489         mlx5_access_tunnel_offload_db(dev, find_tunnel_id_match,
8490                                       find_tunnel_id_hit, NULL, &ctx, true);
8491
8492         return ctx.tunnel;
8493 }
8494
8495 static struct mlx5_flow_tunnel *
8496 mlx5_flow_tunnel_allocate(struct rte_eth_dev *dev,
8497                           const struct rte_flow_tunnel *app_tunnel)
8498 {
8499         struct mlx5_priv *priv = dev->data->dev_private;
8500         struct mlx5_indexed_pool *ipool;
8501         struct mlx5_flow_tunnel *tunnel;
8502         uint32_t id;
8503
8504         ipool = priv->sh->ipool[MLX5_IPOOL_TUNNEL_ID];
8505         tunnel = mlx5_ipool_zmalloc(ipool, &id);
8506         if (!tunnel)
8507                 return NULL;
8508         if (id >= MLX5_MAX_TUNNELS) {
8509                 mlx5_ipool_free(ipool, id);
8510                 DRV_LOG(ERR, "Tunnel ID %d exceed max limit.", id);
8511                 return NULL;
8512         }
8513         tunnel->groups = mlx5_hlist_create("tunnel groups", 1024, 0, 0,
8514                                            mlx5_flow_tunnel_grp2tbl_create_cb,
8515                                            mlx5_flow_tunnel_grp2tbl_match_cb,
8516                                            mlx5_flow_tunnel_grp2tbl_remove_cb);
8517         if (!tunnel->groups) {
8518                 mlx5_ipool_free(ipool, id);
8519                 return NULL;
8520         }
8521         tunnel->groups->ctx = priv->sh;
8522         /* initiate new PMD tunnel */
8523         memcpy(&tunnel->app_tunnel, app_tunnel, sizeof(*app_tunnel));
8524         tunnel->tunnel_id = id;
8525         tunnel->action.type = (typeof(tunnel->action.type))
8526                               MLX5_RTE_FLOW_ACTION_TYPE_TUNNEL_SET;
8527         tunnel->action.conf = tunnel;
8528         tunnel->item.type = (typeof(tunnel->item.type))
8529                             MLX5_RTE_FLOW_ITEM_TYPE_TUNNEL;
8530         tunnel->item.spec = tunnel;
8531         tunnel->item.last = NULL;
8532         tunnel->item.mask = NULL;
8533
8534         DRV_LOG(DEBUG, "port %u new pmd tunnel id=0x%x",
8535                 dev->data->port_id, tunnel->tunnel_id);
8536
8537         return tunnel;
8538 }
8539
8540 struct tunnel_db_get_tunnel_ctx {
8541         const struct rte_flow_tunnel *app_tunnel;
8542         struct mlx5_flow_tunnel *tunnel;
8543 };
8544
8545 static bool get_tunnel_match(struct rte_eth_dev *dev,
8546                              struct mlx5_flow_tunnel *tunnel, const void *x)
8547 {
8548         const struct tunnel_db_get_tunnel_ctx *ctx = x;
8549
8550         RTE_SET_USED(dev);
8551         return !memcmp(ctx->app_tunnel, &tunnel->app_tunnel,
8552                        sizeof(*ctx->app_tunnel));
8553 }
8554
8555 static void get_tunnel_hit(struct rte_eth_dev *dev,
8556                            struct mlx5_flow_tunnel *tunnel, void *x)
8557 {
8558         /* called under tunnel spinlock protection */
8559         struct tunnel_db_get_tunnel_ctx *ctx = x;
8560
8561         RTE_SET_USED(dev);
8562         tunnel->refctn++;
8563         ctx->tunnel = tunnel;
8564 }
8565
8566 static void get_tunnel_miss(struct rte_eth_dev *dev, void *x)
8567 {
8568         /* called under tunnel spinlock protection */
8569         struct mlx5_flow_tunnel_hub *thub = mlx5_tunnel_hub(dev);
8570         struct tunnel_db_get_tunnel_ctx *ctx = x;
8571
8572         rte_spinlock_unlock(&thub->sl);
8573         ctx->tunnel = mlx5_flow_tunnel_allocate(dev, ctx->app_tunnel);
8574         rte_spinlock_lock(&thub->sl);
8575         if (ctx->tunnel) {
8576                 ctx->tunnel->refctn = 1;
8577                 LIST_INSERT_HEAD(&thub->tunnels, ctx->tunnel, chain);
8578         }
8579 }
8580
8581
8582 static int
8583 mlx5_get_flow_tunnel(struct rte_eth_dev *dev,
8584                      const struct rte_flow_tunnel *app_tunnel,
8585                      struct mlx5_flow_tunnel **tunnel)
8586 {
8587         struct tunnel_db_get_tunnel_ctx ctx = {
8588                 .app_tunnel = app_tunnel,
8589         };
8590
8591         mlx5_access_tunnel_offload_db(dev, get_tunnel_match, get_tunnel_hit,
8592                                       get_tunnel_miss, &ctx, true);
8593         *tunnel = ctx.tunnel;
8594         return ctx.tunnel ? 0 : -ENOMEM;
8595 }
8596
8597 void mlx5_release_tunnel_hub(struct mlx5_dev_ctx_shared *sh, uint16_t port_id)
8598 {
8599         struct mlx5_flow_tunnel_hub *thub = sh->tunnel_hub;
8600
8601         if (!thub)
8602                 return;
8603         if (!LIST_EMPTY(&thub->tunnels))
8604                 DRV_LOG(WARNING, "port %u tunnels present", port_id);
8605         mlx5_hlist_destroy(thub->groups);
8606         mlx5_free(thub);
8607 }
8608
8609 int mlx5_alloc_tunnel_hub(struct mlx5_dev_ctx_shared *sh)
8610 {
8611         int err;
8612         struct mlx5_flow_tunnel_hub *thub;
8613
8614         thub = mlx5_malloc(MLX5_MEM_SYS | MLX5_MEM_ZERO, sizeof(*thub),
8615                            0, SOCKET_ID_ANY);
8616         if (!thub)
8617                 return -ENOMEM;
8618         LIST_INIT(&thub->tunnels);
8619         rte_spinlock_init(&thub->sl);
8620         thub->groups = mlx5_hlist_create("flow groups",
8621                                          rte_align32pow2(MLX5_MAX_TABLES), 0,
8622                                          0, mlx5_flow_tunnel_grp2tbl_create_cb,
8623                                          mlx5_flow_tunnel_grp2tbl_match_cb,
8624                                          mlx5_flow_tunnel_grp2tbl_remove_cb);
8625         if (!thub->groups) {
8626                 err = -rte_errno;
8627                 goto err;
8628         }
8629         thub->groups->ctx = sh;
8630         sh->tunnel_hub = thub;
8631
8632         return 0;
8633
8634 err:
8635         if (thub->groups)
8636                 mlx5_hlist_destroy(thub->groups);
8637         if (thub)
8638                 mlx5_free(thub);
8639         return err;
8640 }
8641
8642 static inline bool
8643 mlx5_flow_tunnel_validate(struct rte_eth_dev *dev,
8644                           struct rte_flow_tunnel *tunnel,
8645                           const char *err_msg)
8646 {
8647         err_msg = NULL;
8648         if (!is_tunnel_offload_active(dev)) {
8649                 err_msg = "tunnel offload was not activated";
8650                 goto out;
8651         } else if (!tunnel) {
8652                 err_msg = "no application tunnel";
8653                 goto out;
8654         }
8655
8656         switch (tunnel->type) {
8657         default:
8658                 err_msg = "unsupported tunnel type";
8659                 goto out;
8660         case RTE_FLOW_ITEM_TYPE_VXLAN:
8661                 break;
8662         }
8663
8664 out:
8665         return !err_msg;
8666 }
8667
8668 static int
8669 mlx5_flow_tunnel_decap_set(struct rte_eth_dev *dev,
8670                     struct rte_flow_tunnel *app_tunnel,
8671                     struct rte_flow_action **actions,
8672                     uint32_t *num_of_actions,
8673                     struct rte_flow_error *error)
8674 {
8675         int ret;
8676         struct mlx5_flow_tunnel *tunnel;
8677         const char *err_msg = NULL;
8678         bool verdict = mlx5_flow_tunnel_validate(dev, app_tunnel, err_msg);
8679
8680         if (!verdict)
8681                 return rte_flow_error_set(error, EINVAL,
8682                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
8683                                           err_msg);
8684         ret = mlx5_get_flow_tunnel(dev, app_tunnel, &tunnel);
8685         if (ret < 0) {
8686                 return rte_flow_error_set(error, ret,
8687                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
8688                                           "failed to initialize pmd tunnel");
8689         }
8690         *actions = &tunnel->action;
8691         *num_of_actions = 1;
8692         return 0;
8693 }
8694
8695 static int
8696 mlx5_flow_tunnel_match(struct rte_eth_dev *dev,
8697                        struct rte_flow_tunnel *app_tunnel,
8698                        struct rte_flow_item **items,
8699                        uint32_t *num_of_items,
8700                        struct rte_flow_error *error)
8701 {
8702         int ret;
8703         struct mlx5_flow_tunnel *tunnel;
8704         const char *err_msg = NULL;
8705         bool verdict = mlx5_flow_tunnel_validate(dev, app_tunnel, err_msg);
8706
8707         if (!verdict)
8708                 return rte_flow_error_set(error, EINVAL,
8709                                           RTE_FLOW_ERROR_TYPE_HANDLE, NULL,
8710                                           err_msg);
8711         ret = mlx5_get_flow_tunnel(dev, app_tunnel, &tunnel);
8712         if (ret < 0) {
8713                 return rte_flow_error_set(error, ret,
8714                                           RTE_FLOW_ERROR_TYPE_HANDLE, NULL,
8715                                           "failed to initialize pmd tunnel");
8716         }
8717         *items = &tunnel->item;
8718         *num_of_items = 1;
8719         return 0;
8720 }
8721
8722 struct tunnel_db_element_release_ctx {
8723         struct rte_flow_item *items;
8724         struct rte_flow_action *actions;
8725         uint32_t num_elements;
8726         struct rte_flow_error *error;
8727         int ret;
8728 };
8729
8730 static bool
8731 tunnel_element_release_match(struct rte_eth_dev *dev,
8732                              struct mlx5_flow_tunnel *tunnel, const void *x)
8733 {
8734         const struct tunnel_db_element_release_ctx *ctx = x;
8735
8736         RTE_SET_USED(dev);
8737         if (ctx->num_elements != 1)
8738                 return false;
8739         else if (ctx->items)
8740                 return ctx->items == &tunnel->item;
8741         else if (ctx->actions)
8742                 return ctx->actions == &tunnel->action;
8743
8744         return false;
8745 }
8746
8747 static void
8748 tunnel_element_release_hit(struct rte_eth_dev *dev,
8749                            struct mlx5_flow_tunnel *tunnel, void *x)
8750 {
8751         struct tunnel_db_element_release_ctx *ctx = x;
8752         ctx->ret = 0;
8753         if (!__atomic_sub_fetch(&tunnel->refctn, 1, __ATOMIC_RELAXED))
8754                 mlx5_flow_tunnel_free(dev, tunnel);
8755 }
8756
8757 static void
8758 tunnel_element_release_miss(struct rte_eth_dev *dev, void *x)
8759 {
8760         struct tunnel_db_element_release_ctx *ctx = x;
8761         RTE_SET_USED(dev);
8762         ctx->ret = rte_flow_error_set(ctx->error, EINVAL,
8763                                       RTE_FLOW_ERROR_TYPE_HANDLE, NULL,
8764                                       "invalid argument");
8765 }
8766
8767 static int
8768 mlx5_flow_tunnel_item_release(struct rte_eth_dev *dev,
8769                        struct rte_flow_item *pmd_items,
8770                        uint32_t num_items, struct rte_flow_error *err)
8771 {
8772         struct tunnel_db_element_release_ctx ctx = {
8773                 .items = pmd_items,
8774                 .actions = NULL,
8775                 .num_elements = num_items,
8776                 .error = err,
8777         };
8778
8779         mlx5_access_tunnel_offload_db(dev, tunnel_element_release_match,
8780                                       tunnel_element_release_hit,
8781                                       tunnel_element_release_miss, &ctx, false);
8782
8783         return ctx.ret;
8784 }
8785
8786 static int
8787 mlx5_flow_tunnel_action_release(struct rte_eth_dev *dev,
8788                          struct rte_flow_action *pmd_actions,
8789                          uint32_t num_actions, struct rte_flow_error *err)
8790 {
8791         struct tunnel_db_element_release_ctx ctx = {
8792                 .items = NULL,
8793                 .actions = pmd_actions,
8794                 .num_elements = num_actions,
8795                 .error = err,
8796         };
8797
8798         mlx5_access_tunnel_offload_db(dev, tunnel_element_release_match,
8799                                       tunnel_element_release_hit,
8800                                       tunnel_element_release_miss, &ctx, false);
8801
8802         return ctx.ret;
8803 }
8804
8805 static int
8806 mlx5_flow_tunnel_get_restore_info(struct rte_eth_dev *dev,
8807                                   struct rte_mbuf *m,
8808                                   struct rte_flow_restore_info *info,
8809                                   struct rte_flow_error *err)
8810 {
8811         uint64_t ol_flags = m->ol_flags;
8812         const struct mlx5_flow_tbl_data_entry *tble;
8813         const uint64_t mask = PKT_RX_FDIR | PKT_RX_FDIR_ID;
8814
8815         if (!is_tunnel_offload_active(dev)) {
8816                 info->flags = 0;
8817                 return 0;
8818         }
8819
8820         if ((ol_flags & mask) != mask)
8821                 goto err;
8822         tble = tunnel_mark_decode(dev, m->hash.fdir.hi);
8823         if (!tble) {
8824                 DRV_LOG(DEBUG, "port %u invalid miss tunnel mark %#x",
8825                         dev->data->port_id, m->hash.fdir.hi);
8826                 goto err;
8827         }
8828         MLX5_ASSERT(tble->tunnel);
8829         memcpy(&info->tunnel, &tble->tunnel->app_tunnel, sizeof(info->tunnel));
8830         info->group_id = tble->group_id;
8831         info->flags = RTE_FLOW_RESTORE_INFO_TUNNEL |
8832                       RTE_FLOW_RESTORE_INFO_GROUP_ID |
8833                       RTE_FLOW_RESTORE_INFO_ENCAPSULATED;
8834
8835         return 0;
8836
8837 err:
8838         return rte_flow_error_set(err, EINVAL,
8839                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
8840                                   "failed to get restore info");
8841 }
8842
8843 #else /* HAVE_IBV_FLOW_DV_SUPPORT */
8844 static int
8845 mlx5_flow_tunnel_decap_set(__rte_unused struct rte_eth_dev *dev,
8846                            __rte_unused struct rte_flow_tunnel *app_tunnel,
8847                            __rte_unused struct rte_flow_action **actions,
8848                            __rte_unused uint32_t *num_of_actions,
8849                            __rte_unused struct rte_flow_error *error)
8850 {
8851         return -ENOTSUP;
8852 }
8853
8854 static int
8855 mlx5_flow_tunnel_match(__rte_unused struct rte_eth_dev *dev,
8856                        __rte_unused struct rte_flow_tunnel *app_tunnel,
8857                        __rte_unused struct rte_flow_item **items,
8858                        __rte_unused uint32_t *num_of_items,
8859                        __rte_unused struct rte_flow_error *error)
8860 {
8861         return -ENOTSUP;
8862 }
8863
8864 static int
8865 mlx5_flow_tunnel_item_release(__rte_unused struct rte_eth_dev *dev,
8866                               __rte_unused struct rte_flow_item *pmd_items,
8867                               __rte_unused uint32_t num_items,
8868                               __rte_unused struct rte_flow_error *err)
8869 {
8870         return -ENOTSUP;
8871 }
8872
8873 static int
8874 mlx5_flow_tunnel_action_release(__rte_unused struct rte_eth_dev *dev,
8875                                 __rte_unused struct rte_flow_action *pmd_action,
8876                                 __rte_unused uint32_t num_actions,
8877                                 __rte_unused struct rte_flow_error *err)
8878 {
8879         return -ENOTSUP;
8880 }
8881
8882 static int
8883 mlx5_flow_tunnel_get_restore_info(__rte_unused struct rte_eth_dev *dev,
8884                                   __rte_unused struct rte_mbuf *m,
8885                                   __rte_unused struct rte_flow_restore_info *i,
8886                                   __rte_unused struct rte_flow_error *err)
8887 {
8888         return -ENOTSUP;
8889 }
8890
8891 static int
8892 flow_tunnel_add_default_miss(__rte_unused struct rte_eth_dev *dev,
8893                              __rte_unused struct rte_flow *flow,
8894                              __rte_unused const struct rte_flow_attr *attr,
8895                              __rte_unused const struct rte_flow_action *actions,
8896                              __rte_unused uint32_t flow_idx,
8897                              __rte_unused struct tunnel_default_miss_ctx *ctx,
8898                              __rte_unused struct rte_flow_error *error)
8899 {
8900         return -ENOTSUP;
8901 }
8902
8903 static struct mlx5_flow_tunnel *
8904 mlx5_find_tunnel_id(__rte_unused struct rte_eth_dev *dev,
8905                     __rte_unused uint32_t id)
8906 {
8907         return NULL;
8908 }
8909
8910 static void
8911 mlx5_flow_tunnel_free(__rte_unused struct rte_eth_dev *dev,
8912                       __rte_unused struct mlx5_flow_tunnel *tunnel)
8913 {
8914 }
8915
8916 static uint32_t
8917 tunnel_flow_group_to_flow_table(__rte_unused struct rte_eth_dev *dev,
8918                                 __rte_unused const struct mlx5_flow_tunnel *t,
8919                                 __rte_unused uint32_t group,
8920                                 __rte_unused uint32_t *table,
8921                                 struct rte_flow_error *error)
8922 {
8923         return rte_flow_error_set(error, ENOTSUP,
8924                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
8925                                   "tunnel offload requires DV support");
8926 }
8927
8928 void
8929 mlx5_release_tunnel_hub(__rte_unused struct mlx5_dev_ctx_shared *sh,
8930                         __rte_unused  uint16_t port_id)
8931 {
8932 }
8933 #endif /* HAVE_IBV_FLOW_DV_SUPPORT */