net/mlx5: support match on IPv4 fragment packets
[dpdk.git] / drivers / net / mlx5 / mlx5_flow.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2016 6WIND S.A.
3  * Copyright 2016 Mellanox Technologies, Ltd
4  */
5
6 #include <netinet/in.h>
7 #include <sys/queue.h>
8 #include <stdalign.h>
9 #include <stdint.h>
10 #include <string.h>
11 #include <stdbool.h>
12
13 #include <rte_common.h>
14 #include <rte_ether.h>
15 #include <rte_ethdev_driver.h>
16 #include <rte_flow.h>
17 #include <rte_cycles.h>
18 #include <rte_flow_driver.h>
19 #include <rte_malloc.h>
20 #include <rte_ip.h>
21
22 #include <mlx5_glue.h>
23 #include <mlx5_devx_cmds.h>
24 #include <mlx5_prm.h>
25 #include <mlx5_malloc.h>
26
27 #include "mlx5_defs.h"
28 #include "mlx5.h"
29 #include "mlx5_flow.h"
30 #include "mlx5_flow_os.h"
31 #include "mlx5_rxtx.h"
32
33 /** Device flow drivers. */
34 extern const struct mlx5_flow_driver_ops mlx5_flow_verbs_drv_ops;
35
36 const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops;
37
38 const struct mlx5_flow_driver_ops *flow_drv_ops[] = {
39         [MLX5_FLOW_TYPE_MIN] = &mlx5_flow_null_drv_ops,
40 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
41         [MLX5_FLOW_TYPE_DV] = &mlx5_flow_dv_drv_ops,
42 #endif
43         [MLX5_FLOW_TYPE_VERBS] = &mlx5_flow_verbs_drv_ops,
44         [MLX5_FLOW_TYPE_MAX] = &mlx5_flow_null_drv_ops
45 };
46
47 /** Helper macro to build input graph for mlx5_flow_expand_rss(). */
48 #define MLX5_FLOW_EXPAND_RSS_NEXT(...) \
49         (const int []){ \
50                 __VA_ARGS__, 0, \
51         }
52
53 /** Node object of input graph for mlx5_flow_expand_rss(). */
54 struct mlx5_flow_expand_node {
55         const int *const next;
56         /**<
57          * List of next node indexes. Index 0 is interpreted as a terminator.
58          */
59         const enum rte_flow_item_type type;
60         /**< Pattern item type of current node. */
61         uint64_t rss_types;
62         /**<
63          * RSS types bit-field associated with this node
64          * (see ETH_RSS_* definitions).
65          */
66 };
67
68 /** Object returned by mlx5_flow_expand_rss(). */
69 struct mlx5_flow_expand_rss {
70         uint32_t entries;
71         /**< Number of entries @p patterns and @p priorities. */
72         struct {
73                 struct rte_flow_item *pattern; /**< Expanded pattern array. */
74                 uint32_t priority; /**< Priority offset for each expansion. */
75         } entry[];
76 };
77
78 static enum rte_flow_item_type
79 mlx5_flow_expand_rss_item_complete(const struct rte_flow_item *item)
80 {
81         enum rte_flow_item_type ret = RTE_FLOW_ITEM_TYPE_VOID;
82         uint16_t ether_type = 0;
83         uint16_t ether_type_m;
84         uint8_t ip_next_proto = 0;
85         uint8_t ip_next_proto_m;
86
87         if (item == NULL || item->spec == NULL)
88                 return ret;
89         switch (item->type) {
90         case RTE_FLOW_ITEM_TYPE_ETH:
91                 if (item->mask)
92                         ether_type_m = ((const struct rte_flow_item_eth *)
93                                                 (item->mask))->type;
94                 else
95                         ether_type_m = rte_flow_item_eth_mask.type;
96                 if (ether_type_m != RTE_BE16(0xFFFF))
97                         break;
98                 ether_type = ((const struct rte_flow_item_eth *)
99                                 (item->spec))->type;
100                 if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_IPV4)
101                         ret = RTE_FLOW_ITEM_TYPE_IPV4;
102                 else if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_IPV6)
103                         ret = RTE_FLOW_ITEM_TYPE_IPV6;
104                 else if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_VLAN)
105                         ret = RTE_FLOW_ITEM_TYPE_VLAN;
106                 else
107                         ret = RTE_FLOW_ITEM_TYPE_END;
108                 break;
109         case RTE_FLOW_ITEM_TYPE_VLAN:
110                 if (item->mask)
111                         ether_type_m = ((const struct rte_flow_item_vlan *)
112                                                 (item->mask))->inner_type;
113                 else
114                         ether_type_m = rte_flow_item_vlan_mask.inner_type;
115                 if (ether_type_m != RTE_BE16(0xFFFF))
116                         break;
117                 ether_type = ((const struct rte_flow_item_vlan *)
118                                 (item->spec))->inner_type;
119                 if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_IPV4)
120                         ret = RTE_FLOW_ITEM_TYPE_IPV4;
121                 else if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_IPV6)
122                         ret = RTE_FLOW_ITEM_TYPE_IPV6;
123                 else if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_VLAN)
124                         ret = RTE_FLOW_ITEM_TYPE_VLAN;
125                 else
126                         ret = RTE_FLOW_ITEM_TYPE_END;
127                 break;
128         case RTE_FLOW_ITEM_TYPE_IPV4:
129                 if (item->mask)
130                         ip_next_proto_m = ((const struct rte_flow_item_ipv4 *)
131                                         (item->mask))->hdr.next_proto_id;
132                 else
133                         ip_next_proto_m =
134                                 rte_flow_item_ipv4_mask.hdr.next_proto_id;
135                 if (ip_next_proto_m != 0xFF)
136                         break;
137                 ip_next_proto = ((const struct rte_flow_item_ipv4 *)
138                                 (item->spec))->hdr.next_proto_id;
139                 if (ip_next_proto == IPPROTO_UDP)
140                         ret = RTE_FLOW_ITEM_TYPE_UDP;
141                 else if (ip_next_proto == IPPROTO_TCP)
142                         ret = RTE_FLOW_ITEM_TYPE_TCP;
143                 else if (ip_next_proto == IPPROTO_IP)
144                         ret = RTE_FLOW_ITEM_TYPE_IPV4;
145                 else if (ip_next_proto == IPPROTO_IPV6)
146                         ret = RTE_FLOW_ITEM_TYPE_IPV6;
147                 else
148                         ret = RTE_FLOW_ITEM_TYPE_END;
149                 break;
150         case RTE_FLOW_ITEM_TYPE_IPV6:
151                 if (item->mask)
152                         ip_next_proto_m = ((const struct rte_flow_item_ipv6 *)
153                                                 (item->mask))->hdr.proto;
154                 else
155                         ip_next_proto_m =
156                                 rte_flow_item_ipv6_mask.hdr.proto;
157                 if (ip_next_proto_m != 0xFF)
158                         break;
159                 ip_next_proto = ((const struct rte_flow_item_ipv6 *)
160                                 (item->spec))->hdr.proto;
161                 if (ip_next_proto == IPPROTO_UDP)
162                         ret = RTE_FLOW_ITEM_TYPE_UDP;
163                 else if (ip_next_proto == IPPROTO_TCP)
164                         ret = RTE_FLOW_ITEM_TYPE_TCP;
165                 else if (ip_next_proto == IPPROTO_IP)
166                         ret = RTE_FLOW_ITEM_TYPE_IPV4;
167                 else if (ip_next_proto == IPPROTO_IPV6)
168                         ret = RTE_FLOW_ITEM_TYPE_IPV6;
169                 else
170                         ret = RTE_FLOW_ITEM_TYPE_END;
171                 break;
172         default:
173                 ret = RTE_FLOW_ITEM_TYPE_VOID;
174                 break;
175         }
176         return ret;
177 }
178
179 /**
180  * Expand RSS flows into several possible flows according to the RSS hash
181  * fields requested and the driver capabilities.
182  *
183  * @param[out] buf
184  *   Buffer to store the result expansion.
185  * @param[in] size
186  *   Buffer size in bytes. If 0, @p buf can be NULL.
187  * @param[in] pattern
188  *   User flow pattern.
189  * @param[in] types
190  *   RSS types to expand (see ETH_RSS_* definitions).
191  * @param[in] graph
192  *   Input graph to expand @p pattern according to @p types.
193  * @param[in] graph_root_index
194  *   Index of root node in @p graph, typically 0.
195  *
196  * @return
197  *   A positive value representing the size of @p buf in bytes regardless of
198  *   @p size on success, a negative errno value otherwise and rte_errno is
199  *   set, the following errors are defined:
200  *
201  *   -E2BIG: graph-depth @p graph is too deep.
202  */
203 static int
204 mlx5_flow_expand_rss(struct mlx5_flow_expand_rss *buf, size_t size,
205                      const struct rte_flow_item *pattern, uint64_t types,
206                      const struct mlx5_flow_expand_node graph[],
207                      int graph_root_index)
208 {
209         const int elt_n = 8;
210         const struct rte_flow_item *item;
211         const struct mlx5_flow_expand_node *node = &graph[graph_root_index];
212         const int *next_node;
213         const int *stack[elt_n];
214         int stack_pos = 0;
215         struct rte_flow_item flow_items[elt_n];
216         unsigned int i;
217         size_t lsize;
218         size_t user_pattern_size = 0;
219         void *addr = NULL;
220         const struct mlx5_flow_expand_node *next = NULL;
221         struct rte_flow_item missed_item;
222         int missed = 0;
223         int elt = 0;
224         const struct rte_flow_item *last_item = NULL;
225
226         memset(&missed_item, 0, sizeof(missed_item));
227         lsize = offsetof(struct mlx5_flow_expand_rss, entry) +
228                 elt_n * sizeof(buf->entry[0]);
229         if (lsize <= size) {
230                 buf->entry[0].priority = 0;
231                 buf->entry[0].pattern = (void *)&buf->entry[elt_n];
232                 buf->entries = 0;
233                 addr = buf->entry[0].pattern;
234         }
235         for (item = pattern; item->type != RTE_FLOW_ITEM_TYPE_END; item++) {
236                 if (item->type != RTE_FLOW_ITEM_TYPE_VOID)
237                         last_item = item;
238                 for (i = 0; node->next && node->next[i]; ++i) {
239                         next = &graph[node->next[i]];
240                         if (next->type == item->type)
241                                 break;
242                 }
243                 if (next)
244                         node = next;
245                 user_pattern_size += sizeof(*item);
246         }
247         user_pattern_size += sizeof(*item); /* Handle END item. */
248         lsize += user_pattern_size;
249         /* Copy the user pattern in the first entry of the buffer. */
250         if (lsize <= size) {
251                 rte_memcpy(addr, pattern, user_pattern_size);
252                 addr = (void *)(((uintptr_t)addr) + user_pattern_size);
253                 buf->entries = 1;
254         }
255         /* Start expanding. */
256         memset(flow_items, 0, sizeof(flow_items));
257         user_pattern_size -= sizeof(*item);
258         /*
259          * Check if the last valid item has spec set, need complete pattern,
260          * and the pattern can be used for expansion.
261          */
262         missed_item.type = mlx5_flow_expand_rss_item_complete(last_item);
263         if (missed_item.type == RTE_FLOW_ITEM_TYPE_END) {
264                 /* Item type END indicates expansion is not required. */
265                 return lsize;
266         }
267         if (missed_item.type != RTE_FLOW_ITEM_TYPE_VOID) {
268                 next = NULL;
269                 missed = 1;
270                 for (i = 0; node->next && node->next[i]; ++i) {
271                         next = &graph[node->next[i]];
272                         if (next->type == missed_item.type) {
273                                 flow_items[0].type = missed_item.type;
274                                 flow_items[1].type = RTE_FLOW_ITEM_TYPE_END;
275                                 break;
276                         }
277                         next = NULL;
278                 }
279         }
280         if (next && missed) {
281                 elt = 2; /* missed item + item end. */
282                 node = next;
283                 lsize += elt * sizeof(*item) + user_pattern_size;
284                 if ((node->rss_types & types) && lsize <= size) {
285                         buf->entry[buf->entries].priority = 1;
286                         buf->entry[buf->entries].pattern = addr;
287                         buf->entries++;
288                         rte_memcpy(addr, buf->entry[0].pattern,
289                                    user_pattern_size);
290                         addr = (void *)(((uintptr_t)addr) + user_pattern_size);
291                         rte_memcpy(addr, flow_items, elt * sizeof(*item));
292                         addr = (void *)(((uintptr_t)addr) +
293                                         elt * sizeof(*item));
294                 }
295         }
296         memset(flow_items, 0, sizeof(flow_items));
297         next_node = node->next;
298         stack[stack_pos] = next_node;
299         node = next_node ? &graph[*next_node] : NULL;
300         while (node) {
301                 flow_items[stack_pos].type = node->type;
302                 if (node->rss_types & types) {
303                         /*
304                          * compute the number of items to copy from the
305                          * expansion and copy it.
306                          * When the stack_pos is 0, there are 1 element in it,
307                          * plus the addition END item.
308                          */
309                         elt = stack_pos + 2;
310                         flow_items[stack_pos + 1].type = RTE_FLOW_ITEM_TYPE_END;
311                         lsize += elt * sizeof(*item) + user_pattern_size;
312                         if (lsize <= size) {
313                                 size_t n = elt * sizeof(*item);
314
315                                 buf->entry[buf->entries].priority =
316                                         stack_pos + 1 + missed;
317                                 buf->entry[buf->entries].pattern = addr;
318                                 buf->entries++;
319                                 rte_memcpy(addr, buf->entry[0].pattern,
320                                            user_pattern_size);
321                                 addr = (void *)(((uintptr_t)addr) +
322                                                 user_pattern_size);
323                                 rte_memcpy(addr, &missed_item,
324                                            missed * sizeof(*item));
325                                 addr = (void *)(((uintptr_t)addr) +
326                                         missed * sizeof(*item));
327                                 rte_memcpy(addr, flow_items, n);
328                                 addr = (void *)(((uintptr_t)addr) + n);
329                         }
330                 }
331                 /* Go deeper. */
332                 if (node->next) {
333                         next_node = node->next;
334                         if (stack_pos++ == elt_n) {
335                                 rte_errno = E2BIG;
336                                 return -rte_errno;
337                         }
338                         stack[stack_pos] = next_node;
339                 } else if (*(next_node + 1)) {
340                         /* Follow up with the next possibility. */
341                         ++next_node;
342                 } else {
343                         /* Move to the next path. */
344                         if (stack_pos)
345                                 next_node = stack[--stack_pos];
346                         next_node++;
347                         stack[stack_pos] = next_node;
348                 }
349                 node = *next_node ? &graph[*next_node] : NULL;
350         };
351         /* no expanded flows but we have missed item, create one rule for it */
352         if (buf->entries == 1 && missed != 0) {
353                 elt = 2;
354                 lsize += elt * sizeof(*item) + user_pattern_size;
355                 if (lsize <= size) {
356                         buf->entry[buf->entries].priority = 1;
357                         buf->entry[buf->entries].pattern = addr;
358                         buf->entries++;
359                         flow_items[0].type = missed_item.type;
360                         flow_items[1].type = RTE_FLOW_ITEM_TYPE_END;
361                         rte_memcpy(addr, buf->entry[0].pattern,
362                                    user_pattern_size);
363                         addr = (void *)(((uintptr_t)addr) + user_pattern_size);
364                         rte_memcpy(addr, flow_items, elt * sizeof(*item));
365                         addr = (void *)(((uintptr_t)addr) +
366                                         elt * sizeof(*item));
367                 }
368         }
369         return lsize;
370 }
371
372 enum mlx5_expansion {
373         MLX5_EXPANSION_ROOT,
374         MLX5_EXPANSION_ROOT_OUTER,
375         MLX5_EXPANSION_ROOT_ETH_VLAN,
376         MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN,
377         MLX5_EXPANSION_OUTER_ETH,
378         MLX5_EXPANSION_OUTER_ETH_VLAN,
379         MLX5_EXPANSION_OUTER_VLAN,
380         MLX5_EXPANSION_OUTER_IPV4,
381         MLX5_EXPANSION_OUTER_IPV4_UDP,
382         MLX5_EXPANSION_OUTER_IPV4_TCP,
383         MLX5_EXPANSION_OUTER_IPV6,
384         MLX5_EXPANSION_OUTER_IPV6_UDP,
385         MLX5_EXPANSION_OUTER_IPV6_TCP,
386         MLX5_EXPANSION_VXLAN,
387         MLX5_EXPANSION_VXLAN_GPE,
388         MLX5_EXPANSION_GRE,
389         MLX5_EXPANSION_MPLS,
390         MLX5_EXPANSION_ETH,
391         MLX5_EXPANSION_ETH_VLAN,
392         MLX5_EXPANSION_VLAN,
393         MLX5_EXPANSION_IPV4,
394         MLX5_EXPANSION_IPV4_UDP,
395         MLX5_EXPANSION_IPV4_TCP,
396         MLX5_EXPANSION_IPV6,
397         MLX5_EXPANSION_IPV6_UDP,
398         MLX5_EXPANSION_IPV6_TCP,
399 };
400
401 /** Supported expansion of items. */
402 static const struct mlx5_flow_expand_node mlx5_support_expansion[] = {
403         [MLX5_EXPANSION_ROOT] = {
404                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
405                                                   MLX5_EXPANSION_IPV4,
406                                                   MLX5_EXPANSION_IPV6),
407                 .type = RTE_FLOW_ITEM_TYPE_END,
408         },
409         [MLX5_EXPANSION_ROOT_OUTER] = {
410                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_ETH,
411                                                   MLX5_EXPANSION_OUTER_IPV4,
412                                                   MLX5_EXPANSION_OUTER_IPV6),
413                 .type = RTE_FLOW_ITEM_TYPE_END,
414         },
415         [MLX5_EXPANSION_ROOT_ETH_VLAN] = {
416                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH_VLAN),
417                 .type = RTE_FLOW_ITEM_TYPE_END,
418         },
419         [MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN] = {
420                 .next = MLX5_FLOW_EXPAND_RSS_NEXT
421                                                 (MLX5_EXPANSION_OUTER_ETH_VLAN),
422                 .type = RTE_FLOW_ITEM_TYPE_END,
423         },
424         [MLX5_EXPANSION_OUTER_ETH] = {
425                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_IPV4,
426                                                   MLX5_EXPANSION_OUTER_IPV6,
427                                                   MLX5_EXPANSION_MPLS),
428                 .type = RTE_FLOW_ITEM_TYPE_ETH,
429                 .rss_types = 0,
430         },
431         [MLX5_EXPANSION_OUTER_ETH_VLAN] = {
432                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_VLAN),
433                 .type = RTE_FLOW_ITEM_TYPE_ETH,
434                 .rss_types = 0,
435         },
436         [MLX5_EXPANSION_OUTER_VLAN] = {
437                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_IPV4,
438                                                   MLX5_EXPANSION_OUTER_IPV6),
439                 .type = RTE_FLOW_ITEM_TYPE_VLAN,
440         },
441         [MLX5_EXPANSION_OUTER_IPV4] = {
442                 .next = MLX5_FLOW_EXPAND_RSS_NEXT
443                         (MLX5_EXPANSION_OUTER_IPV4_UDP,
444                          MLX5_EXPANSION_OUTER_IPV4_TCP,
445                          MLX5_EXPANSION_GRE,
446                          MLX5_EXPANSION_IPV4,
447                          MLX5_EXPANSION_IPV6),
448                 .type = RTE_FLOW_ITEM_TYPE_IPV4,
449                 .rss_types = ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
450                         ETH_RSS_NONFRAG_IPV4_OTHER,
451         },
452         [MLX5_EXPANSION_OUTER_IPV4_UDP] = {
453                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN,
454                                                   MLX5_EXPANSION_VXLAN_GPE),
455                 .type = RTE_FLOW_ITEM_TYPE_UDP,
456                 .rss_types = ETH_RSS_NONFRAG_IPV4_UDP,
457         },
458         [MLX5_EXPANSION_OUTER_IPV4_TCP] = {
459                 .type = RTE_FLOW_ITEM_TYPE_TCP,
460                 .rss_types = ETH_RSS_NONFRAG_IPV4_TCP,
461         },
462         [MLX5_EXPANSION_OUTER_IPV6] = {
463                 .next = MLX5_FLOW_EXPAND_RSS_NEXT
464                         (MLX5_EXPANSION_OUTER_IPV6_UDP,
465                          MLX5_EXPANSION_OUTER_IPV6_TCP,
466                          MLX5_EXPANSION_IPV4,
467                          MLX5_EXPANSION_IPV6),
468                 .type = RTE_FLOW_ITEM_TYPE_IPV6,
469                 .rss_types = ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 |
470                         ETH_RSS_NONFRAG_IPV6_OTHER,
471         },
472         [MLX5_EXPANSION_OUTER_IPV6_UDP] = {
473                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN,
474                                                   MLX5_EXPANSION_VXLAN_GPE),
475                 .type = RTE_FLOW_ITEM_TYPE_UDP,
476                 .rss_types = ETH_RSS_NONFRAG_IPV6_UDP,
477         },
478         [MLX5_EXPANSION_OUTER_IPV6_TCP] = {
479                 .type = RTE_FLOW_ITEM_TYPE_TCP,
480                 .rss_types = ETH_RSS_NONFRAG_IPV6_TCP,
481         },
482         [MLX5_EXPANSION_VXLAN] = {
483                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
484                                                   MLX5_EXPANSION_IPV4,
485                                                   MLX5_EXPANSION_IPV6),
486                 .type = RTE_FLOW_ITEM_TYPE_VXLAN,
487         },
488         [MLX5_EXPANSION_VXLAN_GPE] = {
489                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
490                                                   MLX5_EXPANSION_IPV4,
491                                                   MLX5_EXPANSION_IPV6),
492                 .type = RTE_FLOW_ITEM_TYPE_VXLAN_GPE,
493         },
494         [MLX5_EXPANSION_GRE] = {
495                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4),
496                 .type = RTE_FLOW_ITEM_TYPE_GRE,
497         },
498         [MLX5_EXPANSION_MPLS] = {
499                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
500                                                   MLX5_EXPANSION_IPV6),
501                 .type = RTE_FLOW_ITEM_TYPE_MPLS,
502         },
503         [MLX5_EXPANSION_ETH] = {
504                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
505                                                   MLX5_EXPANSION_IPV6),
506                 .type = RTE_FLOW_ITEM_TYPE_ETH,
507         },
508         [MLX5_EXPANSION_ETH_VLAN] = {
509                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VLAN),
510                 .type = RTE_FLOW_ITEM_TYPE_ETH,
511         },
512         [MLX5_EXPANSION_VLAN] = {
513                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
514                                                   MLX5_EXPANSION_IPV6),
515                 .type = RTE_FLOW_ITEM_TYPE_VLAN,
516         },
517         [MLX5_EXPANSION_IPV4] = {
518                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4_UDP,
519                                                   MLX5_EXPANSION_IPV4_TCP),
520                 .type = RTE_FLOW_ITEM_TYPE_IPV4,
521                 .rss_types = ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
522                         ETH_RSS_NONFRAG_IPV4_OTHER,
523         },
524         [MLX5_EXPANSION_IPV4_UDP] = {
525                 .type = RTE_FLOW_ITEM_TYPE_UDP,
526                 .rss_types = ETH_RSS_NONFRAG_IPV4_UDP,
527         },
528         [MLX5_EXPANSION_IPV4_TCP] = {
529                 .type = RTE_FLOW_ITEM_TYPE_TCP,
530                 .rss_types = ETH_RSS_NONFRAG_IPV4_TCP,
531         },
532         [MLX5_EXPANSION_IPV6] = {
533                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV6_UDP,
534                                                   MLX5_EXPANSION_IPV6_TCP),
535                 .type = RTE_FLOW_ITEM_TYPE_IPV6,
536                 .rss_types = ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 |
537                         ETH_RSS_NONFRAG_IPV6_OTHER,
538         },
539         [MLX5_EXPANSION_IPV6_UDP] = {
540                 .type = RTE_FLOW_ITEM_TYPE_UDP,
541                 .rss_types = ETH_RSS_NONFRAG_IPV6_UDP,
542         },
543         [MLX5_EXPANSION_IPV6_TCP] = {
544                 .type = RTE_FLOW_ITEM_TYPE_TCP,
545                 .rss_types = ETH_RSS_NONFRAG_IPV6_TCP,
546         },
547 };
548
549 static const struct rte_flow_ops mlx5_flow_ops = {
550         .validate = mlx5_flow_validate,
551         .create = mlx5_flow_create,
552         .destroy = mlx5_flow_destroy,
553         .flush = mlx5_flow_flush,
554         .isolate = mlx5_flow_isolate,
555         .query = mlx5_flow_query,
556         .dev_dump = mlx5_flow_dev_dump,
557         .get_aged_flows = mlx5_flow_get_aged_flows,
558 };
559
560 /* Convert FDIR request to Generic flow. */
561 struct mlx5_fdir {
562         struct rte_flow_attr attr;
563         struct rte_flow_item items[4];
564         struct rte_flow_item_eth l2;
565         struct rte_flow_item_eth l2_mask;
566         union {
567                 struct rte_flow_item_ipv4 ipv4;
568                 struct rte_flow_item_ipv6 ipv6;
569         } l3;
570         union {
571                 struct rte_flow_item_ipv4 ipv4;
572                 struct rte_flow_item_ipv6 ipv6;
573         } l3_mask;
574         union {
575                 struct rte_flow_item_udp udp;
576                 struct rte_flow_item_tcp tcp;
577         } l4;
578         union {
579                 struct rte_flow_item_udp udp;
580                 struct rte_flow_item_tcp tcp;
581         } l4_mask;
582         struct rte_flow_action actions[2];
583         struct rte_flow_action_queue queue;
584 };
585
586 /* Tunnel information. */
587 struct mlx5_flow_tunnel_info {
588         uint64_t tunnel; /**< Tunnel bit (see MLX5_FLOW_*). */
589         uint32_t ptype; /**< Tunnel Ptype (see RTE_PTYPE_*). */
590 };
591
592 static struct mlx5_flow_tunnel_info tunnels_info[] = {
593         {
594                 .tunnel = MLX5_FLOW_LAYER_VXLAN,
595                 .ptype = RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L4_UDP,
596         },
597         {
598                 .tunnel = MLX5_FLOW_LAYER_GENEVE,
599                 .ptype = RTE_PTYPE_TUNNEL_GENEVE | RTE_PTYPE_L4_UDP,
600         },
601         {
602                 .tunnel = MLX5_FLOW_LAYER_VXLAN_GPE,
603                 .ptype = RTE_PTYPE_TUNNEL_VXLAN_GPE | RTE_PTYPE_L4_UDP,
604         },
605         {
606                 .tunnel = MLX5_FLOW_LAYER_GRE,
607                 .ptype = RTE_PTYPE_TUNNEL_GRE,
608         },
609         {
610                 .tunnel = MLX5_FLOW_LAYER_MPLS | MLX5_FLOW_LAYER_OUTER_L4_UDP,
611                 .ptype = RTE_PTYPE_TUNNEL_MPLS_IN_UDP | RTE_PTYPE_L4_UDP,
612         },
613         {
614                 .tunnel = MLX5_FLOW_LAYER_MPLS,
615                 .ptype = RTE_PTYPE_TUNNEL_MPLS_IN_GRE,
616         },
617         {
618                 .tunnel = MLX5_FLOW_LAYER_NVGRE,
619                 .ptype = RTE_PTYPE_TUNNEL_NVGRE,
620         },
621         {
622                 .tunnel = MLX5_FLOW_LAYER_IPIP,
623                 .ptype = RTE_PTYPE_TUNNEL_IP,
624         },
625         {
626                 .tunnel = MLX5_FLOW_LAYER_IPV6_ENCAP,
627                 .ptype = RTE_PTYPE_TUNNEL_IP,
628         },
629         {
630                 .tunnel = MLX5_FLOW_LAYER_GTP,
631                 .ptype = RTE_PTYPE_TUNNEL_GTPU,
632         },
633 };
634
635 /**
636  * Translate tag ID to register.
637  *
638  * @param[in] dev
639  *   Pointer to the Ethernet device structure.
640  * @param[in] feature
641  *   The feature that request the register.
642  * @param[in] id
643  *   The request register ID.
644  * @param[out] error
645  *   Error description in case of any.
646  *
647  * @return
648  *   The request register on success, a negative errno
649  *   value otherwise and rte_errno is set.
650  */
651 int
652 mlx5_flow_get_reg_id(struct rte_eth_dev *dev,
653                      enum mlx5_feature_name feature,
654                      uint32_t id,
655                      struct rte_flow_error *error)
656 {
657         struct mlx5_priv *priv = dev->data->dev_private;
658         struct mlx5_dev_config *config = &priv->config;
659         enum modify_reg start_reg;
660         bool skip_mtr_reg = false;
661
662         switch (feature) {
663         case MLX5_HAIRPIN_RX:
664                 return REG_B;
665         case MLX5_HAIRPIN_TX:
666                 return REG_A;
667         case MLX5_METADATA_RX:
668                 switch (config->dv_xmeta_en) {
669                 case MLX5_XMETA_MODE_LEGACY:
670                         return REG_B;
671                 case MLX5_XMETA_MODE_META16:
672                         return REG_C_0;
673                 case MLX5_XMETA_MODE_META32:
674                         return REG_C_1;
675                 }
676                 break;
677         case MLX5_METADATA_TX:
678                 return REG_A;
679         case MLX5_METADATA_FDB:
680                 switch (config->dv_xmeta_en) {
681                 case MLX5_XMETA_MODE_LEGACY:
682                         return REG_NON;
683                 case MLX5_XMETA_MODE_META16:
684                         return REG_C_0;
685                 case MLX5_XMETA_MODE_META32:
686                         return REG_C_1;
687                 }
688                 break;
689         case MLX5_FLOW_MARK:
690                 switch (config->dv_xmeta_en) {
691                 case MLX5_XMETA_MODE_LEGACY:
692                         return REG_NON;
693                 case MLX5_XMETA_MODE_META16:
694                         return REG_C_1;
695                 case MLX5_XMETA_MODE_META32:
696                         return REG_C_0;
697                 }
698                 break;
699         case MLX5_MTR_SFX:
700                 /*
701                  * If meter color and flow match share one register, flow match
702                  * should use the meter color register for match.
703                  */
704                 if (priv->mtr_reg_share)
705                         return priv->mtr_color_reg;
706                 else
707                         return priv->mtr_color_reg != REG_C_2 ? REG_C_2 :
708                                REG_C_3;
709         case MLX5_MTR_COLOR:
710                 MLX5_ASSERT(priv->mtr_color_reg != REG_NON);
711                 return priv->mtr_color_reg;
712         case MLX5_COPY_MARK:
713                 /*
714                  * Metadata COPY_MARK register using is in meter suffix sub
715                  * flow while with meter. It's safe to share the same register.
716                  */
717                 return priv->mtr_color_reg != REG_C_2 ? REG_C_2 : REG_C_3;
718         case MLX5_APP_TAG:
719                 /*
720                  * If meter is enable, it will engage the register for color
721                  * match and flow match. If meter color match is not using the
722                  * REG_C_2, need to skip the REG_C_x be used by meter color
723                  * match.
724                  * If meter is disable, free to use all available registers.
725                  */
726                 start_reg = priv->mtr_color_reg != REG_C_2 ? REG_C_2 :
727                             (priv->mtr_reg_share ? REG_C_3 : REG_C_4);
728                 skip_mtr_reg = !!(priv->mtr_en && start_reg == REG_C_2);
729                 if (id > (REG_C_7 - start_reg))
730                         return rte_flow_error_set(error, EINVAL,
731                                                   RTE_FLOW_ERROR_TYPE_ITEM,
732                                                   NULL, "invalid tag id");
733                 if (config->flow_mreg_c[id + start_reg - REG_C_0] == REG_NON)
734                         return rte_flow_error_set(error, ENOTSUP,
735                                                   RTE_FLOW_ERROR_TYPE_ITEM,
736                                                   NULL, "unsupported tag id");
737                 /*
738                  * This case means meter is using the REG_C_x great than 2.
739                  * Take care not to conflict with meter color REG_C_x.
740                  * If the available index REG_C_y >= REG_C_x, skip the
741                  * color register.
742                  */
743                 if (skip_mtr_reg && config->flow_mreg_c
744                     [id + start_reg - REG_C_0] >= priv->mtr_color_reg) {
745                         if (id >= (REG_C_7 - start_reg))
746                                 return rte_flow_error_set(error, EINVAL,
747                                                        RTE_FLOW_ERROR_TYPE_ITEM,
748                                                         NULL, "invalid tag id");
749                         if (config->flow_mreg_c
750                             [id + 1 + start_reg - REG_C_0] != REG_NON)
751                                 return config->flow_mreg_c
752                                                [id + 1 + start_reg - REG_C_0];
753                         return rte_flow_error_set(error, ENOTSUP,
754                                                   RTE_FLOW_ERROR_TYPE_ITEM,
755                                                   NULL, "unsupported tag id");
756                 }
757                 return config->flow_mreg_c[id + start_reg - REG_C_0];
758         }
759         MLX5_ASSERT(false);
760         return rte_flow_error_set(error, EINVAL,
761                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
762                                   NULL, "invalid feature name");
763 }
764
765 /**
766  * Check extensive flow metadata register support.
767  *
768  * @param dev
769  *   Pointer to rte_eth_dev structure.
770  *
771  * @return
772  *   True if device supports extensive flow metadata register, otherwise false.
773  */
774 bool
775 mlx5_flow_ext_mreg_supported(struct rte_eth_dev *dev)
776 {
777         struct mlx5_priv *priv = dev->data->dev_private;
778         struct mlx5_dev_config *config = &priv->config;
779
780         /*
781          * Having available reg_c can be regarded inclusively as supporting
782          * extensive flow metadata register, which could mean,
783          * - metadata register copy action by modify header.
784          * - 16 modify header actions is supported.
785          * - reg_c's are preserved across different domain (FDB and NIC) on
786          *   packet loopback by flow lookup miss.
787          */
788         return config->flow_mreg_c[2] != REG_NON;
789 }
790
791 /**
792  * Verify the @p item specifications (spec, last, mask) are compatible with the
793  * NIC capabilities.
794  *
795  * @param[in] item
796  *   Item specification.
797  * @param[in] mask
798  *   @p item->mask or flow default bit-masks.
799  * @param[in] nic_mask
800  *   Bit-masks covering supported fields by the NIC to compare with user mask.
801  * @param[in] size
802  *   Bit-masks size in bytes.
803  * @param[in] range_accepted
804  *   True if range of values is accepted for specific fields, false otherwise.
805  * @param[out] error
806  *   Pointer to error structure.
807  *
808  * @return
809  *   0 on success, a negative errno value otherwise and rte_errno is set.
810  */
811 int
812 mlx5_flow_item_acceptable(const struct rte_flow_item *item,
813                           const uint8_t *mask,
814                           const uint8_t *nic_mask,
815                           unsigned int size,
816                           bool range_accepted,
817                           struct rte_flow_error *error)
818 {
819         unsigned int i;
820
821         MLX5_ASSERT(nic_mask);
822         for (i = 0; i < size; ++i)
823                 if ((nic_mask[i] | mask[i]) != nic_mask[i])
824                         return rte_flow_error_set(error, ENOTSUP,
825                                                   RTE_FLOW_ERROR_TYPE_ITEM,
826                                                   item,
827                                                   "mask enables non supported"
828                                                   " bits");
829         if (!item->spec && (item->mask || item->last))
830                 return rte_flow_error_set(error, EINVAL,
831                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
832                                           "mask/last without a spec is not"
833                                           " supported");
834         if (item->spec && item->last && !range_accepted) {
835                 uint8_t spec[size];
836                 uint8_t last[size];
837                 unsigned int i;
838                 int ret;
839
840                 for (i = 0; i < size; ++i) {
841                         spec[i] = ((const uint8_t *)item->spec)[i] & mask[i];
842                         last[i] = ((const uint8_t *)item->last)[i] & mask[i];
843                 }
844                 ret = memcmp(spec, last, size);
845                 if (ret != 0)
846                         return rte_flow_error_set(error, EINVAL,
847                                                   RTE_FLOW_ERROR_TYPE_ITEM,
848                                                   item,
849                                                   "range is not valid");
850         }
851         return 0;
852 }
853
854 /**
855  * Adjust the hash fields according to the @p flow information.
856  *
857  * @param[in] dev_flow.
858  *   Pointer to the mlx5_flow.
859  * @param[in] tunnel
860  *   1 when the hash field is for a tunnel item.
861  * @param[in] layer_types
862  *   ETH_RSS_* types.
863  * @param[in] hash_fields
864  *   Item hash fields.
865  *
866  * @return
867  *   The hash fields that should be used.
868  */
869 uint64_t
870 mlx5_flow_hashfields_adjust(struct mlx5_flow_rss_desc *rss_desc,
871                             int tunnel __rte_unused, uint64_t layer_types,
872                             uint64_t hash_fields)
873 {
874 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
875         int rss_request_inner = rss_desc->level >= 2;
876
877         /* Check RSS hash level for tunnel. */
878         if (tunnel && rss_request_inner)
879                 hash_fields |= IBV_RX_HASH_INNER;
880         else if (tunnel || rss_request_inner)
881                 return 0;
882 #endif
883         /* Check if requested layer matches RSS hash fields. */
884         if (!(rss_desc->types & layer_types))
885                 return 0;
886         return hash_fields;
887 }
888
889 /**
890  * Lookup and set the ptype in the data Rx part.  A single Ptype can be used,
891  * if several tunnel rules are used on this queue, the tunnel ptype will be
892  * cleared.
893  *
894  * @param rxq_ctrl
895  *   Rx queue to update.
896  */
897 static void
898 flow_rxq_tunnel_ptype_update(struct mlx5_rxq_ctrl *rxq_ctrl)
899 {
900         unsigned int i;
901         uint32_t tunnel_ptype = 0;
902
903         /* Look up for the ptype to use. */
904         for (i = 0; i != MLX5_FLOW_TUNNEL; ++i) {
905                 if (!rxq_ctrl->flow_tunnels_n[i])
906                         continue;
907                 if (!tunnel_ptype) {
908                         tunnel_ptype = tunnels_info[i].ptype;
909                 } else {
910                         tunnel_ptype = 0;
911                         break;
912                 }
913         }
914         rxq_ctrl->rxq.tunnel = tunnel_ptype;
915 }
916
917 /**
918  * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) according to the devive
919  * flow.
920  *
921  * @param[in] dev
922  *   Pointer to the Ethernet device structure.
923  * @param[in] dev_handle
924  *   Pointer to device flow handle structure.
925  */
926 static void
927 flow_drv_rxq_flags_set(struct rte_eth_dev *dev,
928                        struct mlx5_flow_handle *dev_handle)
929 {
930         struct mlx5_priv *priv = dev->data->dev_private;
931         const int mark = dev_handle->mark;
932         const int tunnel = !!(dev_handle->layers & MLX5_FLOW_LAYER_TUNNEL);
933         struct mlx5_hrxq *hrxq;
934         unsigned int i;
935
936         if (dev_handle->fate_action != MLX5_FLOW_FATE_QUEUE)
937                 return;
938         hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ],
939                               dev_handle->rix_hrxq);
940         if (!hrxq)
941                 return;
942         for (i = 0; i != hrxq->ind_table->queues_n; ++i) {
943                 int idx = hrxq->ind_table->queues[i];
944                 struct mlx5_rxq_ctrl *rxq_ctrl =
945                         container_of((*priv->rxqs)[idx],
946                                      struct mlx5_rxq_ctrl, rxq);
947
948                 /*
949                  * To support metadata register copy on Tx loopback,
950                  * this must be always enabled (metadata may arive
951                  * from other port - not from local flows only.
952                  */
953                 if (priv->config.dv_flow_en &&
954                     priv->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY &&
955                     mlx5_flow_ext_mreg_supported(dev)) {
956                         rxq_ctrl->rxq.mark = 1;
957                         rxq_ctrl->flow_mark_n = 1;
958                 } else if (mark) {
959                         rxq_ctrl->rxq.mark = 1;
960                         rxq_ctrl->flow_mark_n++;
961                 }
962                 if (tunnel) {
963                         unsigned int j;
964
965                         /* Increase the counter matching the flow. */
966                         for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) {
967                                 if ((tunnels_info[j].tunnel &
968                                      dev_handle->layers) ==
969                                     tunnels_info[j].tunnel) {
970                                         rxq_ctrl->flow_tunnels_n[j]++;
971                                         break;
972                                 }
973                         }
974                         flow_rxq_tunnel_ptype_update(rxq_ctrl);
975                 }
976         }
977 }
978
979 /**
980  * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) for a flow
981  *
982  * @param[in] dev
983  *   Pointer to the Ethernet device structure.
984  * @param[in] flow
985  *   Pointer to flow structure.
986  */
987 static void
988 flow_rxq_flags_set(struct rte_eth_dev *dev, struct rte_flow *flow)
989 {
990         struct mlx5_priv *priv = dev->data->dev_private;
991         uint32_t handle_idx;
992         struct mlx5_flow_handle *dev_handle;
993
994         SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles,
995                        handle_idx, dev_handle, next)
996                 flow_drv_rxq_flags_set(dev, dev_handle);
997 }
998
999 /**
1000  * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the
1001  * device flow if no other flow uses it with the same kind of request.
1002  *
1003  * @param dev
1004  *   Pointer to Ethernet device.
1005  * @param[in] dev_handle
1006  *   Pointer to the device flow handle structure.
1007  */
1008 static void
1009 flow_drv_rxq_flags_trim(struct rte_eth_dev *dev,
1010                         struct mlx5_flow_handle *dev_handle)
1011 {
1012         struct mlx5_priv *priv = dev->data->dev_private;
1013         const int mark = dev_handle->mark;
1014         const int tunnel = !!(dev_handle->layers & MLX5_FLOW_LAYER_TUNNEL);
1015         struct mlx5_hrxq *hrxq;
1016         unsigned int i;
1017
1018         if (dev_handle->fate_action != MLX5_FLOW_FATE_QUEUE)
1019                 return;
1020         hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ],
1021                               dev_handle->rix_hrxq);
1022         if (!hrxq)
1023                 return;
1024         MLX5_ASSERT(dev->data->dev_started);
1025         for (i = 0; i != hrxq->ind_table->queues_n; ++i) {
1026                 int idx = hrxq->ind_table->queues[i];
1027                 struct mlx5_rxq_ctrl *rxq_ctrl =
1028                         container_of((*priv->rxqs)[idx],
1029                                      struct mlx5_rxq_ctrl, rxq);
1030
1031                 if (priv->config.dv_flow_en &&
1032                     priv->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY &&
1033                     mlx5_flow_ext_mreg_supported(dev)) {
1034                         rxq_ctrl->rxq.mark = 1;
1035                         rxq_ctrl->flow_mark_n = 1;
1036                 } else if (mark) {
1037                         rxq_ctrl->flow_mark_n--;
1038                         rxq_ctrl->rxq.mark = !!rxq_ctrl->flow_mark_n;
1039                 }
1040                 if (tunnel) {
1041                         unsigned int j;
1042
1043                         /* Decrease the counter matching the flow. */
1044                         for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) {
1045                                 if ((tunnels_info[j].tunnel &
1046                                      dev_handle->layers) ==
1047                                     tunnels_info[j].tunnel) {
1048                                         rxq_ctrl->flow_tunnels_n[j]--;
1049                                         break;
1050                                 }
1051                         }
1052                         flow_rxq_tunnel_ptype_update(rxq_ctrl);
1053                 }
1054         }
1055 }
1056
1057 /**
1058  * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the
1059  * @p flow if no other flow uses it with the same kind of request.
1060  *
1061  * @param dev
1062  *   Pointer to Ethernet device.
1063  * @param[in] flow
1064  *   Pointer to the flow.
1065  */
1066 static void
1067 flow_rxq_flags_trim(struct rte_eth_dev *dev, struct rte_flow *flow)
1068 {
1069         struct mlx5_priv *priv = dev->data->dev_private;
1070         uint32_t handle_idx;
1071         struct mlx5_flow_handle *dev_handle;
1072
1073         SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles,
1074                        handle_idx, dev_handle, next)
1075                 flow_drv_rxq_flags_trim(dev, dev_handle);
1076 }
1077
1078 /**
1079  * Clear the Mark/Flag and Tunnel ptype information in all Rx queues.
1080  *
1081  * @param dev
1082  *   Pointer to Ethernet device.
1083  */
1084 static void
1085 flow_rxq_flags_clear(struct rte_eth_dev *dev)
1086 {
1087         struct mlx5_priv *priv = dev->data->dev_private;
1088         unsigned int i;
1089
1090         for (i = 0; i != priv->rxqs_n; ++i) {
1091                 struct mlx5_rxq_ctrl *rxq_ctrl;
1092                 unsigned int j;
1093
1094                 if (!(*priv->rxqs)[i])
1095                         continue;
1096                 rxq_ctrl = container_of((*priv->rxqs)[i],
1097                                         struct mlx5_rxq_ctrl, rxq);
1098                 rxq_ctrl->flow_mark_n = 0;
1099                 rxq_ctrl->rxq.mark = 0;
1100                 for (j = 0; j != MLX5_FLOW_TUNNEL; ++j)
1101                         rxq_ctrl->flow_tunnels_n[j] = 0;
1102                 rxq_ctrl->rxq.tunnel = 0;
1103         }
1104 }
1105
1106 /**
1107  * Set the Rx queue dynamic metadata (mask and offset) for a flow
1108  *
1109  * @param[in] dev
1110  *   Pointer to the Ethernet device structure.
1111  */
1112 void
1113 mlx5_flow_rxq_dynf_metadata_set(struct rte_eth_dev *dev)
1114 {
1115         struct mlx5_priv *priv = dev->data->dev_private;
1116         struct mlx5_rxq_data *data;
1117         unsigned int i;
1118
1119         for (i = 0; i != priv->rxqs_n; ++i) {
1120                 if (!(*priv->rxqs)[i])
1121                         continue;
1122                 data = (*priv->rxqs)[i];
1123                 if (!rte_flow_dynf_metadata_avail()) {
1124                         data->dynf_meta = 0;
1125                         data->flow_meta_mask = 0;
1126                         data->flow_meta_offset = -1;
1127                 } else {
1128                         data->dynf_meta = 1;
1129                         data->flow_meta_mask = rte_flow_dynf_metadata_mask;
1130                         data->flow_meta_offset = rte_flow_dynf_metadata_offs;
1131                 }
1132         }
1133 }
1134
1135 /*
1136  * return a pointer to the desired action in the list of actions.
1137  *
1138  * @param[in] actions
1139  *   The list of actions to search the action in.
1140  * @param[in] action
1141  *   The action to find.
1142  *
1143  * @return
1144  *   Pointer to the action in the list, if found. NULL otherwise.
1145  */
1146 const struct rte_flow_action *
1147 mlx5_flow_find_action(const struct rte_flow_action *actions,
1148                       enum rte_flow_action_type action)
1149 {
1150         if (actions == NULL)
1151                 return NULL;
1152         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++)
1153                 if (actions->type == action)
1154                         return actions;
1155         return NULL;
1156 }
1157
1158 /*
1159  * Validate the flag action.
1160  *
1161  * @param[in] action_flags
1162  *   Bit-fields that holds the actions detected until now.
1163  * @param[in] attr
1164  *   Attributes of flow that includes this action.
1165  * @param[out] error
1166  *   Pointer to error structure.
1167  *
1168  * @return
1169  *   0 on success, a negative errno value otherwise and rte_errno is set.
1170  */
1171 int
1172 mlx5_flow_validate_action_flag(uint64_t action_flags,
1173                                const struct rte_flow_attr *attr,
1174                                struct rte_flow_error *error)
1175 {
1176         if (action_flags & MLX5_FLOW_ACTION_MARK)
1177                 return rte_flow_error_set(error, EINVAL,
1178                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1179                                           "can't mark and flag in same flow");
1180         if (action_flags & MLX5_FLOW_ACTION_FLAG)
1181                 return rte_flow_error_set(error, EINVAL,
1182                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1183                                           "can't have 2 flag"
1184                                           " actions in same flow");
1185         if (attr->egress)
1186                 return rte_flow_error_set(error, ENOTSUP,
1187                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1188                                           "flag action not supported for "
1189                                           "egress");
1190         return 0;
1191 }
1192
1193 /*
1194  * Validate the mark action.
1195  *
1196  * @param[in] action
1197  *   Pointer to the queue action.
1198  * @param[in] action_flags
1199  *   Bit-fields that holds the actions detected until now.
1200  * @param[in] attr
1201  *   Attributes of flow that includes this action.
1202  * @param[out] error
1203  *   Pointer to error structure.
1204  *
1205  * @return
1206  *   0 on success, a negative errno value otherwise and rte_errno is set.
1207  */
1208 int
1209 mlx5_flow_validate_action_mark(const struct rte_flow_action *action,
1210                                uint64_t action_flags,
1211                                const struct rte_flow_attr *attr,
1212                                struct rte_flow_error *error)
1213 {
1214         const struct rte_flow_action_mark *mark = action->conf;
1215
1216         if (!mark)
1217                 return rte_flow_error_set(error, EINVAL,
1218                                           RTE_FLOW_ERROR_TYPE_ACTION,
1219                                           action,
1220                                           "configuration cannot be null");
1221         if (mark->id >= MLX5_FLOW_MARK_MAX)
1222                 return rte_flow_error_set(error, EINVAL,
1223                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1224                                           &mark->id,
1225                                           "mark id must in 0 <= id < "
1226                                           RTE_STR(MLX5_FLOW_MARK_MAX));
1227         if (action_flags & MLX5_FLOW_ACTION_FLAG)
1228                 return rte_flow_error_set(error, EINVAL,
1229                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1230                                           "can't flag and mark in same flow");
1231         if (action_flags & MLX5_FLOW_ACTION_MARK)
1232                 return rte_flow_error_set(error, EINVAL,
1233                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1234                                           "can't have 2 mark actions in same"
1235                                           " flow");
1236         if (attr->egress)
1237                 return rte_flow_error_set(error, ENOTSUP,
1238                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1239                                           "mark action not supported for "
1240                                           "egress");
1241         return 0;
1242 }
1243
1244 /*
1245  * Validate the drop action.
1246  *
1247  * @param[in] action_flags
1248  *   Bit-fields that holds the actions detected until now.
1249  * @param[in] attr
1250  *   Attributes of flow that includes this action.
1251  * @param[out] error
1252  *   Pointer to error structure.
1253  *
1254  * @return
1255  *   0 on success, a negative errno value otherwise and rte_errno is set.
1256  */
1257 int
1258 mlx5_flow_validate_action_drop(uint64_t action_flags __rte_unused,
1259                                const struct rte_flow_attr *attr,
1260                                struct rte_flow_error *error)
1261 {
1262         if (attr->egress)
1263                 return rte_flow_error_set(error, ENOTSUP,
1264                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1265                                           "drop action not supported for "
1266                                           "egress");
1267         return 0;
1268 }
1269
1270 /*
1271  * Validate the queue action.
1272  *
1273  * @param[in] action
1274  *   Pointer to the queue action.
1275  * @param[in] action_flags
1276  *   Bit-fields that holds the actions detected until now.
1277  * @param[in] dev
1278  *   Pointer to the Ethernet device structure.
1279  * @param[in] attr
1280  *   Attributes of flow that includes this action.
1281  * @param[out] error
1282  *   Pointer to error structure.
1283  *
1284  * @return
1285  *   0 on success, a negative errno value otherwise and rte_errno is set.
1286  */
1287 int
1288 mlx5_flow_validate_action_queue(const struct rte_flow_action *action,
1289                                 uint64_t action_flags,
1290                                 struct rte_eth_dev *dev,
1291                                 const struct rte_flow_attr *attr,
1292                                 struct rte_flow_error *error)
1293 {
1294         struct mlx5_priv *priv = dev->data->dev_private;
1295         const struct rte_flow_action_queue *queue = action->conf;
1296
1297         if (action_flags & MLX5_FLOW_FATE_ACTIONS)
1298                 return rte_flow_error_set(error, EINVAL,
1299                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1300                                           "can't have 2 fate actions in"
1301                                           " same flow");
1302         if (!priv->rxqs_n)
1303                 return rte_flow_error_set(error, EINVAL,
1304                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1305                                           NULL, "No Rx queues configured");
1306         if (queue->index >= priv->rxqs_n)
1307                 return rte_flow_error_set(error, EINVAL,
1308                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1309                                           &queue->index,
1310                                           "queue index out of range");
1311         if (!(*priv->rxqs)[queue->index])
1312                 return rte_flow_error_set(error, EINVAL,
1313                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1314                                           &queue->index,
1315                                           "queue is not configured");
1316         if (attr->egress)
1317                 return rte_flow_error_set(error, ENOTSUP,
1318                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1319                                           "queue action not supported for "
1320                                           "egress");
1321         return 0;
1322 }
1323
1324 /*
1325  * Validate the rss action.
1326  *
1327  * @param[in] action
1328  *   Pointer to the queue action.
1329  * @param[in] action_flags
1330  *   Bit-fields that holds the actions detected until now.
1331  * @param[in] dev
1332  *   Pointer to the Ethernet device structure.
1333  * @param[in] attr
1334  *   Attributes of flow that includes this action.
1335  * @param[in] item_flags
1336  *   Items that were detected.
1337  * @param[out] error
1338  *   Pointer to error structure.
1339  *
1340  * @return
1341  *   0 on success, a negative errno value otherwise and rte_errno is set.
1342  */
1343 int
1344 mlx5_flow_validate_action_rss(const struct rte_flow_action *action,
1345                               uint64_t action_flags,
1346                               struct rte_eth_dev *dev,
1347                               const struct rte_flow_attr *attr,
1348                               uint64_t item_flags,
1349                               struct rte_flow_error *error)
1350 {
1351         struct mlx5_priv *priv = dev->data->dev_private;
1352         const struct rte_flow_action_rss *rss = action->conf;
1353         int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1354         unsigned int i;
1355
1356         if (action_flags & MLX5_FLOW_FATE_ACTIONS)
1357                 return rte_flow_error_set(error, EINVAL,
1358                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1359                                           "can't have 2 fate actions"
1360                                           " in same flow");
1361         if (rss->func != RTE_ETH_HASH_FUNCTION_DEFAULT &&
1362             rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ)
1363                 return rte_flow_error_set(error, ENOTSUP,
1364                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1365                                           &rss->func,
1366                                           "RSS hash function not supported");
1367 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
1368         if (rss->level > 2)
1369 #else
1370         if (rss->level > 1)
1371 #endif
1372                 return rte_flow_error_set(error, ENOTSUP,
1373                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1374                                           &rss->level,
1375                                           "tunnel RSS is not supported");
1376         /* allow RSS key_len 0 in case of NULL (default) RSS key. */
1377         if (rss->key_len == 0 && rss->key != NULL)
1378                 return rte_flow_error_set(error, ENOTSUP,
1379                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1380                                           &rss->key_len,
1381                                           "RSS hash key length 0");
1382         if (rss->key_len > 0 && rss->key_len < MLX5_RSS_HASH_KEY_LEN)
1383                 return rte_flow_error_set(error, ENOTSUP,
1384                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1385                                           &rss->key_len,
1386                                           "RSS hash key too small");
1387         if (rss->key_len > MLX5_RSS_HASH_KEY_LEN)
1388                 return rte_flow_error_set(error, ENOTSUP,
1389                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1390                                           &rss->key_len,
1391                                           "RSS hash key too large");
1392         if (rss->queue_num > priv->config.ind_table_max_size)
1393                 return rte_flow_error_set(error, ENOTSUP,
1394                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1395                                           &rss->queue_num,
1396                                           "number of queues too large");
1397         if (rss->types & MLX5_RSS_HF_MASK)
1398                 return rte_flow_error_set(error, ENOTSUP,
1399                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1400                                           &rss->types,
1401                                           "some RSS protocols are not"
1402                                           " supported");
1403         if ((rss->types & (ETH_RSS_L3_SRC_ONLY | ETH_RSS_L3_DST_ONLY)) &&
1404             !(rss->types & ETH_RSS_IP))
1405                 return rte_flow_error_set(error, EINVAL,
1406                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
1407                                           "L3 partial RSS requested but L3 RSS"
1408                                           " type not specified");
1409         if ((rss->types & (ETH_RSS_L4_SRC_ONLY | ETH_RSS_L4_DST_ONLY)) &&
1410             !(rss->types & (ETH_RSS_UDP | ETH_RSS_TCP)))
1411                 return rte_flow_error_set(error, EINVAL,
1412                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
1413                                           "L4 partial RSS requested but L4 RSS"
1414                                           " type not specified");
1415         if (!priv->rxqs_n)
1416                 return rte_flow_error_set(error, EINVAL,
1417                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1418                                           NULL, "No Rx queues configured");
1419         if (!rss->queue_num)
1420                 return rte_flow_error_set(error, EINVAL,
1421                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1422                                           NULL, "No queues configured");
1423         for (i = 0; i != rss->queue_num; ++i) {
1424                 if (rss->queue[i] >= priv->rxqs_n)
1425                         return rte_flow_error_set
1426                                 (error, EINVAL,
1427                                  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1428                                  &rss->queue[i], "queue index out of range");
1429                 if (!(*priv->rxqs)[rss->queue[i]])
1430                         return rte_flow_error_set
1431                                 (error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1432                                  &rss->queue[i], "queue is not configured");
1433         }
1434         if (attr->egress)
1435                 return rte_flow_error_set(error, ENOTSUP,
1436                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1437                                           "rss action not supported for "
1438                                           "egress");
1439         if (rss->level > 1 && !tunnel)
1440                 return rte_flow_error_set(error, EINVAL,
1441                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
1442                                           "inner RSS is not supported for "
1443                                           "non-tunnel flows");
1444         if ((item_flags & MLX5_FLOW_LAYER_ECPRI) &&
1445             !(item_flags & MLX5_FLOW_LAYER_INNER_L4_UDP)) {
1446                 return rte_flow_error_set(error, EINVAL,
1447                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
1448                                           "RSS on eCPRI is not supported now");
1449         }
1450         return 0;
1451 }
1452
1453 /*
1454  * Validate the default miss action.
1455  *
1456  * @param[in] action_flags
1457  *   Bit-fields that holds the actions detected until now.
1458  * @param[out] error
1459  *   Pointer to error structure.
1460  *
1461  * @return
1462  *   0 on success, a negative errno value otherwise and rte_errno is set.
1463  */
1464 int
1465 mlx5_flow_validate_action_default_miss(uint64_t action_flags,
1466                                 const struct rte_flow_attr *attr,
1467                                 struct rte_flow_error *error)
1468 {
1469         if (action_flags & MLX5_FLOW_FATE_ACTIONS)
1470                 return rte_flow_error_set(error, EINVAL,
1471                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1472                                           "can't have 2 fate actions in"
1473                                           " same flow");
1474         if (attr->egress)
1475                 return rte_flow_error_set(error, ENOTSUP,
1476                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1477                                           "default miss action not supported "
1478                                           "for egress");
1479         if (attr->group)
1480                 return rte_flow_error_set(error, ENOTSUP,
1481                                           RTE_FLOW_ERROR_TYPE_ATTR_GROUP, NULL,
1482                                           "only group 0 is supported");
1483         if (attr->transfer)
1484                 return rte_flow_error_set(error, ENOTSUP,
1485                                           RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
1486                                           NULL, "transfer is not supported");
1487         return 0;
1488 }
1489
1490 /*
1491  * Validate the count action.
1492  *
1493  * @param[in] dev
1494  *   Pointer to the Ethernet device structure.
1495  * @param[in] attr
1496  *   Attributes of flow that includes this action.
1497  * @param[out] error
1498  *   Pointer to error structure.
1499  *
1500  * @return
1501  *   0 on success, a negative errno value otherwise and rte_errno is set.
1502  */
1503 int
1504 mlx5_flow_validate_action_count(struct rte_eth_dev *dev __rte_unused,
1505                                 const struct rte_flow_attr *attr,
1506                                 struct rte_flow_error *error)
1507 {
1508         if (attr->egress)
1509                 return rte_flow_error_set(error, ENOTSUP,
1510                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1511                                           "count action not supported for "
1512                                           "egress");
1513         return 0;
1514 }
1515
1516 /**
1517  * Verify the @p attributes will be correctly understood by the NIC and store
1518  * them in the @p flow if everything is correct.
1519  *
1520  * @param[in] dev
1521  *   Pointer to the Ethernet device structure.
1522  * @param[in] attributes
1523  *   Pointer to flow attributes
1524  * @param[out] error
1525  *   Pointer to error structure.
1526  *
1527  * @return
1528  *   0 on success, a negative errno value otherwise and rte_errno is set.
1529  */
1530 int
1531 mlx5_flow_validate_attributes(struct rte_eth_dev *dev,
1532                               const struct rte_flow_attr *attributes,
1533                               struct rte_flow_error *error)
1534 {
1535         struct mlx5_priv *priv = dev->data->dev_private;
1536         uint32_t priority_max = priv->config.flow_prio - 1;
1537
1538         if (attributes->group)
1539                 return rte_flow_error_set(error, ENOTSUP,
1540                                           RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
1541                                           NULL, "groups is not supported");
1542         if (attributes->priority != MLX5_FLOW_PRIO_RSVD &&
1543             attributes->priority >= priority_max)
1544                 return rte_flow_error_set(error, ENOTSUP,
1545                                           RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
1546                                           NULL, "priority out of range");
1547         if (attributes->egress)
1548                 return rte_flow_error_set(error, ENOTSUP,
1549                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1550                                           "egress is not supported");
1551         if (attributes->transfer && !priv->config.dv_esw_en)
1552                 return rte_flow_error_set(error, ENOTSUP,
1553                                           RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
1554                                           NULL, "transfer is not supported");
1555         if (!attributes->ingress)
1556                 return rte_flow_error_set(error, EINVAL,
1557                                           RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
1558                                           NULL,
1559                                           "ingress attribute is mandatory");
1560         return 0;
1561 }
1562
1563 /**
1564  * Validate ICMP6 item.
1565  *
1566  * @param[in] item
1567  *   Item specification.
1568  * @param[in] item_flags
1569  *   Bit-fields that holds the items detected until now.
1570  * @param[out] error
1571  *   Pointer to error structure.
1572  *
1573  * @return
1574  *   0 on success, a negative errno value otherwise and rte_errno is set.
1575  */
1576 int
1577 mlx5_flow_validate_item_icmp6(const struct rte_flow_item *item,
1578                                uint64_t item_flags,
1579                                uint8_t target_protocol,
1580                                struct rte_flow_error *error)
1581 {
1582         const struct rte_flow_item_icmp6 *mask = item->mask;
1583         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1584         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
1585                                       MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1586         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
1587                                       MLX5_FLOW_LAYER_OUTER_L4;
1588         int ret;
1589
1590         if (target_protocol != 0xFF && target_protocol != IPPROTO_ICMPV6)
1591                 return rte_flow_error_set(error, EINVAL,
1592                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1593                                           "protocol filtering not compatible"
1594                                           " with ICMP6 layer");
1595         if (!(item_flags & l3m))
1596                 return rte_flow_error_set(error, EINVAL,
1597                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1598                                           "IPv6 is mandatory to filter on"
1599                                           " ICMP6");
1600         if (item_flags & l4m)
1601                 return rte_flow_error_set(error, EINVAL,
1602                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1603                                           "multiple L4 layers not supported");
1604         if (!mask)
1605                 mask = &rte_flow_item_icmp6_mask;
1606         ret = mlx5_flow_item_acceptable
1607                 (item, (const uint8_t *)mask,
1608                  (const uint8_t *)&rte_flow_item_icmp6_mask,
1609                  sizeof(struct rte_flow_item_icmp6),
1610                  MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
1611         if (ret < 0)
1612                 return ret;
1613         return 0;
1614 }
1615
1616 /**
1617  * Validate ICMP item.
1618  *
1619  * @param[in] item
1620  *   Item specification.
1621  * @param[in] item_flags
1622  *   Bit-fields that holds the items detected until now.
1623  * @param[out] error
1624  *   Pointer to error structure.
1625  *
1626  * @return
1627  *   0 on success, a negative errno value otherwise and rte_errno is set.
1628  */
1629 int
1630 mlx5_flow_validate_item_icmp(const struct rte_flow_item *item,
1631                              uint64_t item_flags,
1632                              uint8_t target_protocol,
1633                              struct rte_flow_error *error)
1634 {
1635         const struct rte_flow_item_icmp *mask = item->mask;
1636         const struct rte_flow_item_icmp nic_mask = {
1637                 .hdr.icmp_type = 0xff,
1638                 .hdr.icmp_code = 0xff,
1639                 .hdr.icmp_ident = RTE_BE16(0xffff),
1640                 .hdr.icmp_seq_nb = RTE_BE16(0xffff),
1641         };
1642         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1643         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
1644                                       MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1645         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
1646                                       MLX5_FLOW_LAYER_OUTER_L4;
1647         int ret;
1648
1649         if (target_protocol != 0xFF && target_protocol != IPPROTO_ICMP)
1650                 return rte_flow_error_set(error, EINVAL,
1651                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1652                                           "protocol filtering not compatible"
1653                                           " with ICMP layer");
1654         if (!(item_flags & l3m))
1655                 return rte_flow_error_set(error, EINVAL,
1656                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1657                                           "IPv4 is mandatory to filter"
1658                                           " on ICMP");
1659         if (item_flags & l4m)
1660                 return rte_flow_error_set(error, EINVAL,
1661                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1662                                           "multiple L4 layers not supported");
1663         if (!mask)
1664                 mask = &nic_mask;
1665         ret = mlx5_flow_item_acceptable
1666                 (item, (const uint8_t *)mask,
1667                  (const uint8_t *)&nic_mask,
1668                  sizeof(struct rte_flow_item_icmp),
1669                  MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
1670         if (ret < 0)
1671                 return ret;
1672         return 0;
1673 }
1674
1675 /**
1676  * Validate Ethernet item.
1677  *
1678  * @param[in] item
1679  *   Item specification.
1680  * @param[in] item_flags
1681  *   Bit-fields that holds the items detected until now.
1682  * @param[out] error
1683  *   Pointer to error structure.
1684  *
1685  * @return
1686  *   0 on success, a negative errno value otherwise and rte_errno is set.
1687  */
1688 int
1689 mlx5_flow_validate_item_eth(const struct rte_flow_item *item,
1690                             uint64_t item_flags,
1691                             struct rte_flow_error *error)
1692 {
1693         const struct rte_flow_item_eth *mask = item->mask;
1694         const struct rte_flow_item_eth nic_mask = {
1695                 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1696                 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1697                 .type = RTE_BE16(0xffff),
1698         };
1699         int ret;
1700         int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1701         const uint64_t ethm = tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
1702                                        MLX5_FLOW_LAYER_OUTER_L2;
1703
1704         if (item_flags & ethm)
1705                 return rte_flow_error_set(error, ENOTSUP,
1706                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1707                                           "multiple L2 layers not supported");
1708         if ((!tunnel && (item_flags & MLX5_FLOW_LAYER_OUTER_L3)) ||
1709             (tunnel && (item_flags & MLX5_FLOW_LAYER_INNER_L3)))
1710                 return rte_flow_error_set(error, EINVAL,
1711                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1712                                           "L2 layer should not follow "
1713                                           "L3 layers");
1714         if ((!tunnel && (item_flags & MLX5_FLOW_LAYER_OUTER_VLAN)) ||
1715             (tunnel && (item_flags & MLX5_FLOW_LAYER_INNER_VLAN)))
1716                 return rte_flow_error_set(error, EINVAL,
1717                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1718                                           "L2 layer should not follow VLAN");
1719         if (!mask)
1720                 mask = &rte_flow_item_eth_mask;
1721         ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
1722                                         (const uint8_t *)&nic_mask,
1723                                         sizeof(struct rte_flow_item_eth),
1724                                         MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
1725         return ret;
1726 }
1727
1728 /**
1729  * Validate VLAN item.
1730  *
1731  * @param[in] item
1732  *   Item specification.
1733  * @param[in] item_flags
1734  *   Bit-fields that holds the items detected until now.
1735  * @param[in] dev
1736  *   Ethernet device flow is being created on.
1737  * @param[out] error
1738  *   Pointer to error structure.
1739  *
1740  * @return
1741  *   0 on success, a negative errno value otherwise and rte_errno is set.
1742  */
1743 int
1744 mlx5_flow_validate_item_vlan(const struct rte_flow_item *item,
1745                              uint64_t item_flags,
1746                              struct rte_eth_dev *dev,
1747                              struct rte_flow_error *error)
1748 {
1749         const struct rte_flow_item_vlan *spec = item->spec;
1750         const struct rte_flow_item_vlan *mask = item->mask;
1751         const struct rte_flow_item_vlan nic_mask = {
1752                 .tci = RTE_BE16(UINT16_MAX),
1753                 .inner_type = RTE_BE16(UINT16_MAX),
1754         };
1755         uint16_t vlan_tag = 0;
1756         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1757         int ret;
1758         const uint64_t l34m = tunnel ? (MLX5_FLOW_LAYER_INNER_L3 |
1759                                         MLX5_FLOW_LAYER_INNER_L4) :
1760                                        (MLX5_FLOW_LAYER_OUTER_L3 |
1761                                         MLX5_FLOW_LAYER_OUTER_L4);
1762         const uint64_t vlanm = tunnel ? MLX5_FLOW_LAYER_INNER_VLAN :
1763                                         MLX5_FLOW_LAYER_OUTER_VLAN;
1764
1765         if (item_flags & vlanm)
1766                 return rte_flow_error_set(error, EINVAL,
1767                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1768                                           "multiple VLAN layers not supported");
1769         else if ((item_flags & l34m) != 0)
1770                 return rte_flow_error_set(error, EINVAL,
1771                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1772                                           "VLAN cannot follow L3/L4 layer");
1773         if (!mask)
1774                 mask = &rte_flow_item_vlan_mask;
1775         ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
1776                                         (const uint8_t *)&nic_mask,
1777                                         sizeof(struct rte_flow_item_vlan),
1778                                         MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
1779         if (ret)
1780                 return ret;
1781         if (!tunnel && mask->tci != RTE_BE16(0x0fff)) {
1782                 struct mlx5_priv *priv = dev->data->dev_private;
1783
1784                 if (priv->vmwa_context) {
1785                         /*
1786                          * Non-NULL context means we have a virtual machine
1787                          * and SR-IOV enabled, we have to create VLAN interface
1788                          * to make hypervisor to setup E-Switch vport
1789                          * context correctly. We avoid creating the multiple
1790                          * VLAN interfaces, so we cannot support VLAN tag mask.
1791                          */
1792                         return rte_flow_error_set(error, EINVAL,
1793                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1794                                                   item,
1795                                                   "VLAN tag mask is not"
1796                                                   " supported in virtual"
1797                                                   " environment");
1798                 }
1799         }
1800         if (spec) {
1801                 vlan_tag = spec->tci;
1802                 vlan_tag &= mask->tci;
1803         }
1804         /*
1805          * From verbs perspective an empty VLAN is equivalent
1806          * to a packet without VLAN layer.
1807          */
1808         if (!vlan_tag)
1809                 return rte_flow_error_set(error, EINVAL,
1810                                           RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
1811                                           item->spec,
1812                                           "VLAN cannot be empty");
1813         return 0;
1814 }
1815
1816 /**
1817  * Validate IPV4 item.
1818  *
1819  * @param[in] item
1820  *   Item specification.
1821  * @param[in] item_flags
1822  *   Bit-fields that holds the items detected until now.
1823  * @param[in] last_item
1824  *   Previous validated item in the pattern items.
1825  * @param[in] ether_type
1826  *   Type in the ethernet layer header (including dot1q).
1827  * @param[in] acc_mask
1828  *   Acceptable mask, if NULL default internal default mask
1829  *   will be used to check whether item fields are supported.
1830  * @param[in] range_accepted
1831  *   True if range of values is accepted for specific fields, false otherwise.
1832  * @param[out] error
1833  *   Pointer to error structure.
1834  *
1835  * @return
1836  *   0 on success, a negative errno value otherwise and rte_errno is set.
1837  */
1838 int
1839 mlx5_flow_validate_item_ipv4(const struct rte_flow_item *item,
1840                              uint64_t item_flags,
1841                              uint64_t last_item,
1842                              uint16_t ether_type,
1843                              const struct rte_flow_item_ipv4 *acc_mask,
1844                              bool range_accepted,
1845                              struct rte_flow_error *error)
1846 {
1847         const struct rte_flow_item_ipv4 *mask = item->mask;
1848         const struct rte_flow_item_ipv4 *spec = item->spec;
1849         const struct rte_flow_item_ipv4 nic_mask = {
1850                 .hdr = {
1851                         .src_addr = RTE_BE32(0xffffffff),
1852                         .dst_addr = RTE_BE32(0xffffffff),
1853                         .type_of_service = 0xff,
1854                         .next_proto_id = 0xff,
1855                 },
1856         };
1857         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1858         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
1859                                       MLX5_FLOW_LAYER_OUTER_L3;
1860         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
1861                                       MLX5_FLOW_LAYER_OUTER_L4;
1862         int ret;
1863         uint8_t next_proto = 0xFF;
1864         const uint64_t l2_vlan = (MLX5_FLOW_LAYER_L2 |
1865                                   MLX5_FLOW_LAYER_OUTER_VLAN |
1866                                   MLX5_FLOW_LAYER_INNER_VLAN);
1867
1868         if ((last_item & l2_vlan) && ether_type &&
1869             ether_type != RTE_ETHER_TYPE_IPV4)
1870                 return rte_flow_error_set(error, EINVAL,
1871                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1872                                           "IPv4 cannot follow L2/VLAN layer "
1873                                           "which ether type is not IPv4");
1874         if (item_flags & MLX5_FLOW_LAYER_IPIP) {
1875                 if (mask && spec)
1876                         next_proto = mask->hdr.next_proto_id &
1877                                      spec->hdr.next_proto_id;
1878                 if (next_proto == IPPROTO_IPIP || next_proto == IPPROTO_IPV6)
1879                         return rte_flow_error_set(error, EINVAL,
1880                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1881                                                   item,
1882                                                   "multiple tunnel "
1883                                                   "not supported");
1884         }
1885         if (item_flags & MLX5_FLOW_LAYER_IPV6_ENCAP)
1886                 return rte_flow_error_set(error, EINVAL,
1887                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1888                                           "wrong tunnel type - IPv6 specified "
1889                                           "but IPv4 item provided");
1890         if (item_flags & l3m)
1891                 return rte_flow_error_set(error, ENOTSUP,
1892                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1893                                           "multiple L3 layers not supported");
1894         else if (item_flags & l4m)
1895                 return rte_flow_error_set(error, EINVAL,
1896                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1897                                           "L3 cannot follow an L4 layer.");
1898         else if ((item_flags & MLX5_FLOW_LAYER_NVGRE) &&
1899                   !(item_flags & MLX5_FLOW_LAYER_INNER_L2))
1900                 return rte_flow_error_set(error, EINVAL,
1901                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1902                                           "L3 cannot follow an NVGRE layer.");
1903         if (!mask)
1904                 mask = &rte_flow_item_ipv4_mask;
1905         else if (mask->hdr.next_proto_id != 0 &&
1906                  mask->hdr.next_proto_id != 0xff)
1907                 return rte_flow_error_set(error, EINVAL,
1908                                           RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
1909                                           "partial mask is not supported"
1910                                           " for protocol");
1911         ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
1912                                         acc_mask ? (const uint8_t *)acc_mask
1913                                                  : (const uint8_t *)&nic_mask,
1914                                         sizeof(struct rte_flow_item_ipv4),
1915                                         range_accepted, error);
1916         if (ret < 0)
1917                 return ret;
1918         return 0;
1919 }
1920
1921 /**
1922  * Validate IPV6 item.
1923  *
1924  * @param[in] item
1925  *   Item specification.
1926  * @param[in] item_flags
1927  *   Bit-fields that holds the items detected until now.
1928  * @param[in] last_item
1929  *   Previous validated item in the pattern items.
1930  * @param[in] ether_type
1931  *   Type in the ethernet layer header (including dot1q).
1932  * @param[in] acc_mask
1933  *   Acceptable mask, if NULL default internal default mask
1934  *   will be used to check whether item fields are supported.
1935  * @param[out] error
1936  *   Pointer to error structure.
1937  *
1938  * @return
1939  *   0 on success, a negative errno value otherwise and rte_errno is set.
1940  */
1941 int
1942 mlx5_flow_validate_item_ipv6(const struct rte_flow_item *item,
1943                              uint64_t item_flags,
1944                              uint64_t last_item,
1945                              uint16_t ether_type,
1946                              const struct rte_flow_item_ipv6 *acc_mask,
1947                              struct rte_flow_error *error)
1948 {
1949         const struct rte_flow_item_ipv6 *mask = item->mask;
1950         const struct rte_flow_item_ipv6 *spec = item->spec;
1951         const struct rte_flow_item_ipv6 nic_mask = {
1952                 .hdr = {
1953                         .src_addr =
1954                                 "\xff\xff\xff\xff\xff\xff\xff\xff"
1955                                 "\xff\xff\xff\xff\xff\xff\xff\xff",
1956                         .dst_addr =
1957                                 "\xff\xff\xff\xff\xff\xff\xff\xff"
1958                                 "\xff\xff\xff\xff\xff\xff\xff\xff",
1959                         .vtc_flow = RTE_BE32(0xffffffff),
1960                         .proto = 0xff,
1961                 },
1962         };
1963         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1964         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
1965                                       MLX5_FLOW_LAYER_OUTER_L3;
1966         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
1967                                       MLX5_FLOW_LAYER_OUTER_L4;
1968         int ret;
1969         uint8_t next_proto = 0xFF;
1970         const uint64_t l2_vlan = (MLX5_FLOW_LAYER_L2 |
1971                                   MLX5_FLOW_LAYER_OUTER_VLAN |
1972                                   MLX5_FLOW_LAYER_INNER_VLAN);
1973
1974         if ((last_item & l2_vlan) && ether_type &&
1975             ether_type != RTE_ETHER_TYPE_IPV6)
1976                 return rte_flow_error_set(error, EINVAL,
1977                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1978                                           "IPv6 cannot follow L2/VLAN layer "
1979                                           "which ether type is not IPv6");
1980         if (item_flags & MLX5_FLOW_LAYER_IPV6_ENCAP) {
1981                 if (mask && spec)
1982                         next_proto = mask->hdr.proto & spec->hdr.proto;
1983                 if (next_proto == IPPROTO_IPIP || next_proto == IPPROTO_IPV6)
1984                         return rte_flow_error_set(error, EINVAL,
1985                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1986                                                   item,
1987                                                   "multiple tunnel "
1988                                                   "not supported");
1989         }
1990         if (item_flags & MLX5_FLOW_LAYER_IPIP)
1991                 return rte_flow_error_set(error, EINVAL,
1992                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1993                                           "wrong tunnel type - IPv4 specified "
1994                                           "but IPv6 item provided");
1995         if (item_flags & l3m)
1996                 return rte_flow_error_set(error, ENOTSUP,
1997                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1998                                           "multiple L3 layers not supported");
1999         else if (item_flags & l4m)
2000                 return rte_flow_error_set(error, EINVAL,
2001                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2002                                           "L3 cannot follow an L4 layer.");
2003         else if ((item_flags & MLX5_FLOW_LAYER_NVGRE) &&
2004                   !(item_flags & MLX5_FLOW_LAYER_INNER_L2))
2005                 return rte_flow_error_set(error, EINVAL,
2006                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2007                                           "L3 cannot follow an NVGRE layer.");
2008         if (!mask)
2009                 mask = &rte_flow_item_ipv6_mask;
2010         ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
2011                                         acc_mask ? (const uint8_t *)acc_mask
2012                                                  : (const uint8_t *)&nic_mask,
2013                                         sizeof(struct rte_flow_item_ipv6),
2014                                         MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2015         if (ret < 0)
2016                 return ret;
2017         return 0;
2018 }
2019
2020 /**
2021  * Validate UDP item.
2022  *
2023  * @param[in] item
2024  *   Item specification.
2025  * @param[in] item_flags
2026  *   Bit-fields that holds the items detected until now.
2027  * @param[in] target_protocol
2028  *   The next protocol in the previous item.
2029  * @param[in] flow_mask
2030  *   mlx5 flow-specific (DV, verbs, etc.) supported header fields mask.
2031  * @param[out] error
2032  *   Pointer to error structure.
2033  *
2034  * @return
2035  *   0 on success, a negative errno value otherwise and rte_errno is set.
2036  */
2037 int
2038 mlx5_flow_validate_item_udp(const struct rte_flow_item *item,
2039                             uint64_t item_flags,
2040                             uint8_t target_protocol,
2041                             struct rte_flow_error *error)
2042 {
2043         const struct rte_flow_item_udp *mask = item->mask;
2044         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2045         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
2046                                       MLX5_FLOW_LAYER_OUTER_L3;
2047         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2048                                       MLX5_FLOW_LAYER_OUTER_L4;
2049         int ret;
2050
2051         if (target_protocol != 0xff && target_protocol != IPPROTO_UDP)
2052                 return rte_flow_error_set(error, EINVAL,
2053                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2054                                           "protocol filtering not compatible"
2055                                           " with UDP layer");
2056         if (!(item_flags & l3m))
2057                 return rte_flow_error_set(error, EINVAL,
2058                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2059                                           "L3 is mandatory to filter on L4");
2060         if (item_flags & l4m)
2061                 return rte_flow_error_set(error, EINVAL,
2062                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2063                                           "multiple L4 layers not supported");
2064         if (!mask)
2065                 mask = &rte_flow_item_udp_mask;
2066         ret = mlx5_flow_item_acceptable
2067                 (item, (const uint8_t *)mask,
2068                  (const uint8_t *)&rte_flow_item_udp_mask,
2069                  sizeof(struct rte_flow_item_udp), MLX5_ITEM_RANGE_NOT_ACCEPTED,
2070                  error);
2071         if (ret < 0)
2072                 return ret;
2073         return 0;
2074 }
2075
2076 /**
2077  * Validate TCP item.
2078  *
2079  * @param[in] item
2080  *   Item specification.
2081  * @param[in] item_flags
2082  *   Bit-fields that holds the items detected until now.
2083  * @param[in] target_protocol
2084  *   The next protocol in the previous item.
2085  * @param[out] error
2086  *   Pointer to error structure.
2087  *
2088  * @return
2089  *   0 on success, a negative errno value otherwise and rte_errno is set.
2090  */
2091 int
2092 mlx5_flow_validate_item_tcp(const struct rte_flow_item *item,
2093                             uint64_t item_flags,
2094                             uint8_t target_protocol,
2095                             const struct rte_flow_item_tcp *flow_mask,
2096                             struct rte_flow_error *error)
2097 {
2098         const struct rte_flow_item_tcp *mask = item->mask;
2099         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2100         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
2101                                       MLX5_FLOW_LAYER_OUTER_L3;
2102         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2103                                       MLX5_FLOW_LAYER_OUTER_L4;
2104         int ret;
2105
2106         MLX5_ASSERT(flow_mask);
2107         if (target_protocol != 0xff && target_protocol != IPPROTO_TCP)
2108                 return rte_flow_error_set(error, EINVAL,
2109                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2110                                           "protocol filtering not compatible"
2111                                           " with TCP layer");
2112         if (!(item_flags & l3m))
2113                 return rte_flow_error_set(error, EINVAL,
2114                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2115                                           "L3 is mandatory to filter on L4");
2116         if (item_flags & l4m)
2117                 return rte_flow_error_set(error, EINVAL,
2118                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2119                                           "multiple L4 layers not supported");
2120         if (!mask)
2121                 mask = &rte_flow_item_tcp_mask;
2122         ret = mlx5_flow_item_acceptable
2123                 (item, (const uint8_t *)mask,
2124                  (const uint8_t *)flow_mask,
2125                  sizeof(struct rte_flow_item_tcp), MLX5_ITEM_RANGE_NOT_ACCEPTED,
2126                  error);
2127         if (ret < 0)
2128                 return ret;
2129         return 0;
2130 }
2131
2132 /**
2133  * Validate VXLAN item.
2134  *
2135  * @param[in] item
2136  *   Item specification.
2137  * @param[in] item_flags
2138  *   Bit-fields that holds the items detected until now.
2139  * @param[in] target_protocol
2140  *   The next protocol in the previous item.
2141  * @param[out] error
2142  *   Pointer to error structure.
2143  *
2144  * @return
2145  *   0 on success, a negative errno value otherwise and rte_errno is set.
2146  */
2147 int
2148 mlx5_flow_validate_item_vxlan(const struct rte_flow_item *item,
2149                               uint64_t item_flags,
2150                               struct rte_flow_error *error)
2151 {
2152         const struct rte_flow_item_vxlan *spec = item->spec;
2153         const struct rte_flow_item_vxlan *mask = item->mask;
2154         int ret;
2155         union vni {
2156                 uint32_t vlan_id;
2157                 uint8_t vni[4];
2158         } id = { .vlan_id = 0, };
2159
2160
2161         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2162                 return rte_flow_error_set(error, ENOTSUP,
2163                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2164                                           "multiple tunnel layers not"
2165                                           " supported");
2166         /*
2167          * Verify only UDPv4 is present as defined in
2168          * https://tools.ietf.org/html/rfc7348
2169          */
2170         if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
2171                 return rte_flow_error_set(error, EINVAL,
2172                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2173                                           "no outer UDP layer found");
2174         if (!mask)
2175                 mask = &rte_flow_item_vxlan_mask;
2176         ret = mlx5_flow_item_acceptable
2177                 (item, (const uint8_t *)mask,
2178                  (const uint8_t *)&rte_flow_item_vxlan_mask,
2179                  sizeof(struct rte_flow_item_vxlan),
2180                  MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2181         if (ret < 0)
2182                 return ret;
2183         if (spec) {
2184                 memcpy(&id.vni[1], spec->vni, 3);
2185                 memcpy(&id.vni[1], mask->vni, 3);
2186         }
2187         if (!(item_flags & MLX5_FLOW_LAYER_OUTER))
2188                 return rte_flow_error_set(error, ENOTSUP,
2189                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2190                                           "VXLAN tunnel must be fully defined");
2191         return 0;
2192 }
2193
2194 /**
2195  * Validate VXLAN_GPE item.
2196  *
2197  * @param[in] item
2198  *   Item specification.
2199  * @param[in] item_flags
2200  *   Bit-fields that holds the items detected until now.
2201  * @param[in] priv
2202  *   Pointer to the private data structure.
2203  * @param[in] target_protocol
2204  *   The next protocol in the previous item.
2205  * @param[out] error
2206  *   Pointer to error structure.
2207  *
2208  * @return
2209  *   0 on success, a negative errno value otherwise and rte_errno is set.
2210  */
2211 int
2212 mlx5_flow_validate_item_vxlan_gpe(const struct rte_flow_item *item,
2213                                   uint64_t item_flags,
2214                                   struct rte_eth_dev *dev,
2215                                   struct rte_flow_error *error)
2216 {
2217         struct mlx5_priv *priv = dev->data->dev_private;
2218         const struct rte_flow_item_vxlan_gpe *spec = item->spec;
2219         const struct rte_flow_item_vxlan_gpe *mask = item->mask;
2220         int ret;
2221         union vni {
2222                 uint32_t vlan_id;
2223                 uint8_t vni[4];
2224         } id = { .vlan_id = 0, };
2225
2226         if (!priv->config.l3_vxlan_en)
2227                 return rte_flow_error_set(error, ENOTSUP,
2228                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2229                                           "L3 VXLAN is not enabled by device"
2230                                           " parameter and/or not configured in"
2231                                           " firmware");
2232         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2233                 return rte_flow_error_set(error, ENOTSUP,
2234                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2235                                           "multiple tunnel layers not"
2236                                           " supported");
2237         /*
2238          * Verify only UDPv4 is present as defined in
2239          * https://tools.ietf.org/html/rfc7348
2240          */
2241         if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
2242                 return rte_flow_error_set(error, EINVAL,
2243                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2244                                           "no outer UDP layer found");
2245         if (!mask)
2246                 mask = &rte_flow_item_vxlan_gpe_mask;
2247         ret = mlx5_flow_item_acceptable
2248                 (item, (const uint8_t *)mask,
2249                  (const uint8_t *)&rte_flow_item_vxlan_gpe_mask,
2250                  sizeof(struct rte_flow_item_vxlan_gpe),
2251                  MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2252         if (ret < 0)
2253                 return ret;
2254         if (spec) {
2255                 if (spec->protocol)
2256                         return rte_flow_error_set(error, ENOTSUP,
2257                                                   RTE_FLOW_ERROR_TYPE_ITEM,
2258                                                   item,
2259                                                   "VxLAN-GPE protocol"
2260                                                   " not supported");
2261                 memcpy(&id.vni[1], spec->vni, 3);
2262                 memcpy(&id.vni[1], mask->vni, 3);
2263         }
2264         if (!(item_flags & MLX5_FLOW_LAYER_OUTER))
2265                 return rte_flow_error_set(error, ENOTSUP,
2266                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2267                                           "VXLAN-GPE tunnel must be fully"
2268                                           " defined");
2269         return 0;
2270 }
2271 /**
2272  * Validate GRE Key item.
2273  *
2274  * @param[in] item
2275  *   Item specification.
2276  * @param[in] item_flags
2277  *   Bit flags to mark detected items.
2278  * @param[in] gre_item
2279  *   Pointer to gre_item
2280  * @param[out] error
2281  *   Pointer to error structure.
2282  *
2283  * @return
2284  *   0 on success, a negative errno value otherwise and rte_errno is set.
2285  */
2286 int
2287 mlx5_flow_validate_item_gre_key(const struct rte_flow_item *item,
2288                                 uint64_t item_flags,
2289                                 const struct rte_flow_item *gre_item,
2290                                 struct rte_flow_error *error)
2291 {
2292         const rte_be32_t *mask = item->mask;
2293         int ret = 0;
2294         rte_be32_t gre_key_default_mask = RTE_BE32(UINT32_MAX);
2295         const struct rte_flow_item_gre *gre_spec;
2296         const struct rte_flow_item_gre *gre_mask;
2297
2298         if (item_flags & MLX5_FLOW_LAYER_GRE_KEY)
2299                 return rte_flow_error_set(error, ENOTSUP,
2300                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2301                                           "Multiple GRE key not support");
2302         if (!(item_flags & MLX5_FLOW_LAYER_GRE))
2303                 return rte_flow_error_set(error, ENOTSUP,
2304                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2305                                           "No preceding GRE header");
2306         if (item_flags & MLX5_FLOW_LAYER_INNER)
2307                 return rte_flow_error_set(error, ENOTSUP,
2308                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2309                                           "GRE key following a wrong item");
2310         gre_mask = gre_item->mask;
2311         if (!gre_mask)
2312                 gre_mask = &rte_flow_item_gre_mask;
2313         gre_spec = gre_item->spec;
2314         if (gre_spec && (gre_mask->c_rsvd0_ver & RTE_BE16(0x2000)) &&
2315                          !(gre_spec->c_rsvd0_ver & RTE_BE16(0x2000)))
2316                 return rte_flow_error_set(error, EINVAL,
2317                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2318                                           "Key bit must be on");
2319
2320         if (!mask)
2321                 mask = &gre_key_default_mask;
2322         ret = mlx5_flow_item_acceptable
2323                 (item, (const uint8_t *)mask,
2324                  (const uint8_t *)&gre_key_default_mask,
2325                  sizeof(rte_be32_t), MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2326         return ret;
2327 }
2328
2329 /**
2330  * Validate GRE item.
2331  *
2332  * @param[in] item
2333  *   Item specification.
2334  * @param[in] item_flags
2335  *   Bit flags to mark detected items.
2336  * @param[in] target_protocol
2337  *   The next protocol in the previous item.
2338  * @param[out] error
2339  *   Pointer to error structure.
2340  *
2341  * @return
2342  *   0 on success, a negative errno value otherwise and rte_errno is set.
2343  */
2344 int
2345 mlx5_flow_validate_item_gre(const struct rte_flow_item *item,
2346                             uint64_t item_flags,
2347                             uint8_t target_protocol,
2348                             struct rte_flow_error *error)
2349 {
2350         const struct rte_flow_item_gre *spec __rte_unused = item->spec;
2351         const struct rte_flow_item_gre *mask = item->mask;
2352         int ret;
2353         const struct rte_flow_item_gre nic_mask = {
2354                 .c_rsvd0_ver = RTE_BE16(0xB000),
2355                 .protocol = RTE_BE16(UINT16_MAX),
2356         };
2357
2358         if (target_protocol != 0xff && target_protocol != IPPROTO_GRE)
2359                 return rte_flow_error_set(error, EINVAL,
2360                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2361                                           "protocol filtering not compatible"
2362                                           " with this GRE layer");
2363         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2364                 return rte_flow_error_set(error, ENOTSUP,
2365                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2366                                           "multiple tunnel layers not"
2367                                           " supported");
2368         if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3))
2369                 return rte_flow_error_set(error, ENOTSUP,
2370                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2371                                           "L3 Layer is missing");
2372         if (!mask)
2373                 mask = &rte_flow_item_gre_mask;
2374         ret = mlx5_flow_item_acceptable
2375                 (item, (const uint8_t *)mask,
2376                  (const uint8_t *)&nic_mask,
2377                  sizeof(struct rte_flow_item_gre), MLX5_ITEM_RANGE_NOT_ACCEPTED,
2378                  error);
2379         if (ret < 0)
2380                 return ret;
2381 #ifndef HAVE_MLX5DV_DR
2382 #ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT
2383         if (spec && (spec->protocol & mask->protocol))
2384                 return rte_flow_error_set(error, ENOTSUP,
2385                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2386                                           "without MPLS support the"
2387                                           " specification cannot be used for"
2388                                           " filtering");
2389 #endif
2390 #endif
2391         return 0;
2392 }
2393
2394 /**
2395  * Validate Geneve item.
2396  *
2397  * @param[in] item
2398  *   Item specification.
2399  * @param[in] itemFlags
2400  *   Bit-fields that holds the items detected until now.
2401  * @param[in] enPriv
2402  *   Pointer to the private data structure.
2403  * @param[out] error
2404  *   Pointer to error structure.
2405  *
2406  * @return
2407  *   0 on success, a negative errno value otherwise and rte_errno is set.
2408  */
2409
2410 int
2411 mlx5_flow_validate_item_geneve(const struct rte_flow_item *item,
2412                                uint64_t item_flags,
2413                                struct rte_eth_dev *dev,
2414                                struct rte_flow_error *error)
2415 {
2416         struct mlx5_priv *priv = dev->data->dev_private;
2417         const struct rte_flow_item_geneve *spec = item->spec;
2418         const struct rte_flow_item_geneve *mask = item->mask;
2419         int ret;
2420         uint16_t gbhdr;
2421         uint8_t opt_len = priv->config.hca_attr.geneve_max_opt_len ?
2422                           MLX5_GENEVE_OPT_LEN_1 : MLX5_GENEVE_OPT_LEN_0;
2423         const struct rte_flow_item_geneve nic_mask = {
2424                 .ver_opt_len_o_c_rsvd0 = RTE_BE16(0x3f80),
2425                 .vni = "\xff\xff\xff",
2426                 .protocol = RTE_BE16(UINT16_MAX),
2427         };
2428
2429         if (!priv->config.hca_attr.tunnel_stateless_geneve_rx)
2430                 return rte_flow_error_set(error, ENOTSUP,
2431                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2432                                           "L3 Geneve is not enabled by device"
2433                                           " parameter and/or not configured in"
2434                                           " firmware");
2435         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2436                 return rte_flow_error_set(error, ENOTSUP,
2437                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2438                                           "multiple tunnel layers not"
2439                                           " supported");
2440         /*
2441          * Verify only UDPv4 is present as defined in
2442          * https://tools.ietf.org/html/rfc7348
2443          */
2444         if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
2445                 return rte_flow_error_set(error, EINVAL,
2446                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2447                                           "no outer UDP layer found");
2448         if (!mask)
2449                 mask = &rte_flow_item_geneve_mask;
2450         ret = mlx5_flow_item_acceptable
2451                                   (item, (const uint8_t *)mask,
2452                                    (const uint8_t *)&nic_mask,
2453                                    sizeof(struct rte_flow_item_geneve),
2454                                    MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2455         if (ret)
2456                 return ret;
2457         if (spec) {
2458                 gbhdr = rte_be_to_cpu_16(spec->ver_opt_len_o_c_rsvd0);
2459                 if (MLX5_GENEVE_VER_VAL(gbhdr) ||
2460                      MLX5_GENEVE_CRITO_VAL(gbhdr) ||
2461                      MLX5_GENEVE_RSVD_VAL(gbhdr) || spec->rsvd1)
2462                         return rte_flow_error_set(error, ENOTSUP,
2463                                                   RTE_FLOW_ERROR_TYPE_ITEM,
2464                                                   item,
2465                                                   "Geneve protocol unsupported"
2466                                                   " fields are being used");
2467                 if (MLX5_GENEVE_OPTLEN_VAL(gbhdr) > opt_len)
2468                         return rte_flow_error_set
2469                                         (error, ENOTSUP,
2470                                          RTE_FLOW_ERROR_TYPE_ITEM,
2471                                          item,
2472                                          "Unsupported Geneve options length");
2473         }
2474         if (!(item_flags & MLX5_FLOW_LAYER_OUTER))
2475                 return rte_flow_error_set
2476                                     (error, ENOTSUP,
2477                                      RTE_FLOW_ERROR_TYPE_ITEM, item,
2478                                      "Geneve tunnel must be fully defined");
2479         return 0;
2480 }
2481
2482 /**
2483  * Validate MPLS item.
2484  *
2485  * @param[in] dev
2486  *   Pointer to the rte_eth_dev structure.
2487  * @param[in] item
2488  *   Item specification.
2489  * @param[in] item_flags
2490  *   Bit-fields that holds the items detected until now.
2491  * @param[in] prev_layer
2492  *   The protocol layer indicated in previous item.
2493  * @param[out] error
2494  *   Pointer to error structure.
2495  *
2496  * @return
2497  *   0 on success, a negative errno value otherwise and rte_errno is set.
2498  */
2499 int
2500 mlx5_flow_validate_item_mpls(struct rte_eth_dev *dev __rte_unused,
2501                              const struct rte_flow_item *item __rte_unused,
2502                              uint64_t item_flags __rte_unused,
2503                              uint64_t prev_layer __rte_unused,
2504                              struct rte_flow_error *error)
2505 {
2506 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
2507         const struct rte_flow_item_mpls *mask = item->mask;
2508         struct mlx5_priv *priv = dev->data->dev_private;
2509         int ret;
2510
2511         if (!priv->config.mpls_en)
2512                 return rte_flow_error_set(error, ENOTSUP,
2513                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2514                                           "MPLS not supported or"
2515                                           " disabled in firmware"
2516                                           " configuration.");
2517         /* MPLS over IP, UDP, GRE is allowed */
2518         if (!(prev_layer & (MLX5_FLOW_LAYER_OUTER_L3 |
2519                             MLX5_FLOW_LAYER_OUTER_L4_UDP |
2520                             MLX5_FLOW_LAYER_GRE)))
2521                 return rte_flow_error_set(error, EINVAL,
2522                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2523                                           "protocol filtering not compatible"
2524                                           " with MPLS layer");
2525         /* Multi-tunnel isn't allowed but MPLS over GRE is an exception. */
2526         if ((item_flags & MLX5_FLOW_LAYER_TUNNEL) &&
2527             !(item_flags & MLX5_FLOW_LAYER_GRE))
2528                 return rte_flow_error_set(error, ENOTSUP,
2529                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2530                                           "multiple tunnel layers not"
2531                                           " supported");
2532         if (!mask)
2533                 mask = &rte_flow_item_mpls_mask;
2534         ret = mlx5_flow_item_acceptable
2535                 (item, (const uint8_t *)mask,
2536                  (const uint8_t *)&rte_flow_item_mpls_mask,
2537                  sizeof(struct rte_flow_item_mpls),
2538                  MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2539         if (ret < 0)
2540                 return ret;
2541         return 0;
2542 #else
2543         return rte_flow_error_set(error, ENOTSUP,
2544                                   RTE_FLOW_ERROR_TYPE_ITEM, item,
2545                                   "MPLS is not supported by Verbs, please"
2546                                   " update.");
2547 #endif
2548 }
2549
2550 /**
2551  * Validate NVGRE item.
2552  *
2553  * @param[in] item
2554  *   Item specification.
2555  * @param[in] item_flags
2556  *   Bit flags to mark detected items.
2557  * @param[in] target_protocol
2558  *   The next protocol in the previous item.
2559  * @param[out] error
2560  *   Pointer to error structure.
2561  *
2562  * @return
2563  *   0 on success, a negative errno value otherwise and rte_errno is set.
2564  */
2565 int
2566 mlx5_flow_validate_item_nvgre(const struct rte_flow_item *item,
2567                               uint64_t item_flags,
2568                               uint8_t target_protocol,
2569                               struct rte_flow_error *error)
2570 {
2571         const struct rte_flow_item_nvgre *mask = item->mask;
2572         int ret;
2573
2574         if (target_protocol != 0xff && target_protocol != IPPROTO_GRE)
2575                 return rte_flow_error_set(error, EINVAL,
2576                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2577                                           "protocol filtering not compatible"
2578                                           " with this GRE layer");
2579         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2580                 return rte_flow_error_set(error, ENOTSUP,
2581                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2582                                           "multiple tunnel layers not"
2583                                           " supported");
2584         if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3))
2585                 return rte_flow_error_set(error, ENOTSUP,
2586                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2587                                           "L3 Layer is missing");
2588         if (!mask)
2589                 mask = &rte_flow_item_nvgre_mask;
2590         ret = mlx5_flow_item_acceptable
2591                 (item, (const uint8_t *)mask,
2592                  (const uint8_t *)&rte_flow_item_nvgre_mask,
2593                  sizeof(struct rte_flow_item_nvgre),
2594                  MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2595         if (ret < 0)
2596                 return ret;
2597         return 0;
2598 }
2599
2600 /**
2601  * Validate eCPRI item.
2602  *
2603  * @param[in] item
2604  *   Item specification.
2605  * @param[in] item_flags
2606  *   Bit-fields that holds the items detected until now.
2607  * @param[in] last_item
2608  *   Previous validated item in the pattern items.
2609  * @param[in] ether_type
2610  *   Type in the ethernet layer header (including dot1q).
2611  * @param[in] acc_mask
2612  *   Acceptable mask, if NULL default internal default mask
2613  *   will be used to check whether item fields are supported.
2614  * @param[out] error
2615  *   Pointer to error structure.
2616  *
2617  * @return
2618  *   0 on success, a negative errno value otherwise and rte_errno is set.
2619  */
2620 int
2621 mlx5_flow_validate_item_ecpri(const struct rte_flow_item *item,
2622                               uint64_t item_flags,
2623                               uint64_t last_item,
2624                               uint16_t ether_type,
2625                               const struct rte_flow_item_ecpri *acc_mask,
2626                               struct rte_flow_error *error)
2627 {
2628         const struct rte_flow_item_ecpri *mask = item->mask;
2629         const struct rte_flow_item_ecpri nic_mask = {
2630                 .hdr = {
2631                         .common = {
2632                                 .u32 =
2633                                 RTE_BE32(((const struct rte_ecpri_common_hdr) {
2634                                         .type = 0xFF,
2635                                         }).u32),
2636                         },
2637                         .dummy[0] = 0xFFFFFFFF,
2638                 },
2639         };
2640         const uint64_t outer_l2_vlan = (MLX5_FLOW_LAYER_OUTER_L2 |
2641                                         MLX5_FLOW_LAYER_OUTER_VLAN);
2642         struct rte_flow_item_ecpri mask_lo;
2643
2644         if ((last_item & outer_l2_vlan) && ether_type &&
2645             ether_type != RTE_ETHER_TYPE_ECPRI)
2646                 return rte_flow_error_set(error, EINVAL,
2647                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2648                                           "eCPRI cannot follow L2/VLAN layer "
2649                                           "which ether type is not 0xAEFE.");
2650         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2651                 return rte_flow_error_set(error, EINVAL,
2652                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2653                                           "eCPRI with tunnel is not supported "
2654                                           "right now.");
2655         if (item_flags & MLX5_FLOW_LAYER_OUTER_L3)
2656                 return rte_flow_error_set(error, ENOTSUP,
2657                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2658                                           "multiple L3 layers not supported");
2659         else if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_TCP)
2660                 return rte_flow_error_set(error, EINVAL,
2661                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2662                                           "eCPRI cannot follow a TCP layer.");
2663         /* In specification, eCPRI could be over UDP layer. */
2664         else if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)
2665                 return rte_flow_error_set(error, EINVAL,
2666                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2667                                           "eCPRI over UDP layer is not yet "
2668                                           "supported right now.");
2669         /* Mask for type field in common header could be zero. */
2670         if (!mask)
2671                 mask = &rte_flow_item_ecpri_mask;
2672         mask_lo.hdr.common.u32 = rte_be_to_cpu_32(mask->hdr.common.u32);
2673         /* Input mask is in big-endian format. */
2674         if (mask_lo.hdr.common.type != 0 && mask_lo.hdr.common.type != 0xff)
2675                 return rte_flow_error_set(error, EINVAL,
2676                                           RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
2677                                           "partial mask is not supported "
2678                                           "for protocol");
2679         else if (mask_lo.hdr.common.type == 0 && mask->hdr.dummy[0] != 0)
2680                 return rte_flow_error_set(error, EINVAL,
2681                                           RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
2682                                           "message header mask must be after "
2683                                           "a type mask");
2684         return mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
2685                                          acc_mask ? (const uint8_t *)acc_mask
2686                                                   : (const uint8_t *)&nic_mask,
2687                                          sizeof(struct rte_flow_item_ecpri),
2688                                          MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2689 }
2690
2691 /* Allocate unique ID for the split Q/RSS subflows. */
2692 static uint32_t
2693 flow_qrss_get_id(struct rte_eth_dev *dev)
2694 {
2695         struct mlx5_priv *priv = dev->data->dev_private;
2696         uint32_t qrss_id, ret;
2697
2698         ret = mlx5_flow_id_get(priv->qrss_id_pool, &qrss_id);
2699         if (ret)
2700                 return 0;
2701         MLX5_ASSERT(qrss_id);
2702         return qrss_id;
2703 }
2704
2705 /* Free unique ID for the split Q/RSS subflows. */
2706 static void
2707 flow_qrss_free_id(struct rte_eth_dev *dev,  uint32_t qrss_id)
2708 {
2709         struct mlx5_priv *priv = dev->data->dev_private;
2710
2711         if (qrss_id)
2712                 mlx5_flow_id_release(priv->qrss_id_pool, qrss_id);
2713 }
2714
2715 /**
2716  * Release resource related QUEUE/RSS action split.
2717  *
2718  * @param dev
2719  *   Pointer to Ethernet device.
2720  * @param flow
2721  *   Flow to release id's from.
2722  */
2723 static void
2724 flow_mreg_split_qrss_release(struct rte_eth_dev *dev,
2725                              struct rte_flow *flow)
2726 {
2727         struct mlx5_priv *priv = dev->data->dev_private;
2728         uint32_t handle_idx;
2729         struct mlx5_flow_handle *dev_handle;
2730
2731         SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles,
2732                        handle_idx, dev_handle, next)
2733                 if (dev_handle->split_flow_id)
2734                         flow_qrss_free_id(dev, dev_handle->split_flow_id);
2735 }
2736
2737 static int
2738 flow_null_validate(struct rte_eth_dev *dev __rte_unused,
2739                    const struct rte_flow_attr *attr __rte_unused,
2740                    const struct rte_flow_item items[] __rte_unused,
2741                    const struct rte_flow_action actions[] __rte_unused,
2742                    bool external __rte_unused,
2743                    int hairpin __rte_unused,
2744                    struct rte_flow_error *error)
2745 {
2746         return rte_flow_error_set(error, ENOTSUP,
2747                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
2748 }
2749
2750 static struct mlx5_flow *
2751 flow_null_prepare(struct rte_eth_dev *dev __rte_unused,
2752                   const struct rte_flow_attr *attr __rte_unused,
2753                   const struct rte_flow_item items[] __rte_unused,
2754                   const struct rte_flow_action actions[] __rte_unused,
2755                   struct rte_flow_error *error)
2756 {
2757         rte_flow_error_set(error, ENOTSUP,
2758                            RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
2759         return NULL;
2760 }
2761
2762 static int
2763 flow_null_translate(struct rte_eth_dev *dev __rte_unused,
2764                     struct mlx5_flow *dev_flow __rte_unused,
2765                     const struct rte_flow_attr *attr __rte_unused,
2766                     const struct rte_flow_item items[] __rte_unused,
2767                     const struct rte_flow_action actions[] __rte_unused,
2768                     struct rte_flow_error *error)
2769 {
2770         return rte_flow_error_set(error, ENOTSUP,
2771                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
2772 }
2773
2774 static int
2775 flow_null_apply(struct rte_eth_dev *dev __rte_unused,
2776                 struct rte_flow *flow __rte_unused,
2777                 struct rte_flow_error *error)
2778 {
2779         return rte_flow_error_set(error, ENOTSUP,
2780                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
2781 }
2782
2783 static void
2784 flow_null_remove(struct rte_eth_dev *dev __rte_unused,
2785                  struct rte_flow *flow __rte_unused)
2786 {
2787 }
2788
2789 static void
2790 flow_null_destroy(struct rte_eth_dev *dev __rte_unused,
2791                   struct rte_flow *flow __rte_unused)
2792 {
2793 }
2794
2795 static int
2796 flow_null_query(struct rte_eth_dev *dev __rte_unused,
2797                 struct rte_flow *flow __rte_unused,
2798                 const struct rte_flow_action *actions __rte_unused,
2799                 void *data __rte_unused,
2800                 struct rte_flow_error *error)
2801 {
2802         return rte_flow_error_set(error, ENOTSUP,
2803                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
2804 }
2805
2806 /* Void driver to protect from null pointer reference. */
2807 const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops = {
2808         .validate = flow_null_validate,
2809         .prepare = flow_null_prepare,
2810         .translate = flow_null_translate,
2811         .apply = flow_null_apply,
2812         .remove = flow_null_remove,
2813         .destroy = flow_null_destroy,
2814         .query = flow_null_query,
2815 };
2816
2817 /**
2818  * Select flow driver type according to flow attributes and device
2819  * configuration.
2820  *
2821  * @param[in] dev
2822  *   Pointer to the dev structure.
2823  * @param[in] attr
2824  *   Pointer to the flow attributes.
2825  *
2826  * @return
2827  *   flow driver type, MLX5_FLOW_TYPE_MAX otherwise.
2828  */
2829 static enum mlx5_flow_drv_type
2830 flow_get_drv_type(struct rte_eth_dev *dev, const struct rte_flow_attr *attr)
2831 {
2832         struct mlx5_priv *priv = dev->data->dev_private;
2833         /* The OS can determine first a specific flow type (DV, VERBS) */
2834         enum mlx5_flow_drv_type type = mlx5_flow_os_get_type();
2835
2836         if (type != MLX5_FLOW_TYPE_MAX)
2837                 return type;
2838         /* If no OS specific type - continue with DV/VERBS selection */
2839         if (attr->transfer && priv->config.dv_esw_en)
2840                 type = MLX5_FLOW_TYPE_DV;
2841         if (!attr->transfer)
2842                 type = priv->config.dv_flow_en ? MLX5_FLOW_TYPE_DV :
2843                                                  MLX5_FLOW_TYPE_VERBS;
2844         return type;
2845 }
2846
2847 #define flow_get_drv_ops(type) flow_drv_ops[type]
2848
2849 /**
2850  * Flow driver validation API. This abstracts calling driver specific functions.
2851  * The type of flow driver is determined according to flow attributes.
2852  *
2853  * @param[in] dev
2854  *   Pointer to the dev structure.
2855  * @param[in] attr
2856  *   Pointer to the flow attributes.
2857  * @param[in] items
2858  *   Pointer to the list of items.
2859  * @param[in] actions
2860  *   Pointer to the list of actions.
2861  * @param[in] external
2862  *   This flow rule is created by request external to PMD.
2863  * @param[in] hairpin
2864  *   Number of hairpin TX actions, 0 means classic flow.
2865  * @param[out] error
2866  *   Pointer to the error structure.
2867  *
2868  * @return
2869  *   0 on success, a negative errno value otherwise and rte_errno is set.
2870  */
2871 static inline int
2872 flow_drv_validate(struct rte_eth_dev *dev,
2873                   const struct rte_flow_attr *attr,
2874                   const struct rte_flow_item items[],
2875                   const struct rte_flow_action actions[],
2876                   bool external, int hairpin, struct rte_flow_error *error)
2877 {
2878         const struct mlx5_flow_driver_ops *fops;
2879         enum mlx5_flow_drv_type type = flow_get_drv_type(dev, attr);
2880
2881         fops = flow_get_drv_ops(type);
2882         return fops->validate(dev, attr, items, actions, external,
2883                               hairpin, error);
2884 }
2885
2886 /**
2887  * Flow driver preparation API. This abstracts calling driver specific
2888  * functions. Parent flow (rte_flow) should have driver type (drv_type). It
2889  * calculates the size of memory required for device flow, allocates the memory,
2890  * initializes the device flow and returns the pointer.
2891  *
2892  * @note
2893  *   This function initializes device flow structure such as dv or verbs in
2894  *   struct mlx5_flow. However, it is caller's responsibility to initialize the
2895  *   rest. For example, adding returning device flow to flow->dev_flow list and
2896  *   setting backward reference to the flow should be done out of this function.
2897  *   layers field is not filled either.
2898  *
2899  * @param[in] dev
2900  *   Pointer to the dev structure.
2901  * @param[in] attr
2902  *   Pointer to the flow attributes.
2903  * @param[in] items
2904  *   Pointer to the list of items.
2905  * @param[in] actions
2906  *   Pointer to the list of actions.
2907  * @param[in] flow_idx
2908  *   This memory pool index to the flow.
2909  * @param[out] error
2910  *   Pointer to the error structure.
2911  *
2912  * @return
2913  *   Pointer to device flow on success, otherwise NULL and rte_errno is set.
2914  */
2915 static inline struct mlx5_flow *
2916 flow_drv_prepare(struct rte_eth_dev *dev,
2917                  const struct rte_flow *flow,
2918                  const struct rte_flow_attr *attr,
2919                  const struct rte_flow_item items[],
2920                  const struct rte_flow_action actions[],
2921                  uint32_t flow_idx,
2922                  struct rte_flow_error *error)
2923 {
2924         const struct mlx5_flow_driver_ops *fops;
2925         enum mlx5_flow_drv_type type = flow->drv_type;
2926         struct mlx5_flow *mlx5_flow = NULL;
2927
2928         MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
2929         fops = flow_get_drv_ops(type);
2930         mlx5_flow = fops->prepare(dev, attr, items, actions, error);
2931         if (mlx5_flow)
2932                 mlx5_flow->flow_idx = flow_idx;
2933         return mlx5_flow;
2934 }
2935
2936 /**
2937  * Flow driver translation API. This abstracts calling driver specific
2938  * functions. Parent flow (rte_flow) should have driver type (drv_type). It
2939  * translates a generic flow into a driver flow. flow_drv_prepare() must
2940  * precede.
2941  *
2942  * @note
2943  *   dev_flow->layers could be filled as a result of parsing during translation
2944  *   if needed by flow_drv_apply(). dev_flow->flow->actions can also be filled
2945  *   if necessary. As a flow can have multiple dev_flows by RSS flow expansion,
2946  *   flow->actions could be overwritten even though all the expanded dev_flows
2947  *   have the same actions.
2948  *
2949  * @param[in] dev
2950  *   Pointer to the rte dev structure.
2951  * @param[in, out] dev_flow
2952  *   Pointer to the mlx5 flow.
2953  * @param[in] attr
2954  *   Pointer to the flow attributes.
2955  * @param[in] items
2956  *   Pointer to the list of items.
2957  * @param[in] actions
2958  *   Pointer to the list of actions.
2959  * @param[out] error
2960  *   Pointer to the error structure.
2961  *
2962  * @return
2963  *   0 on success, a negative errno value otherwise and rte_errno is set.
2964  */
2965 static inline int
2966 flow_drv_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
2967                    const struct rte_flow_attr *attr,
2968                    const struct rte_flow_item items[],
2969                    const struct rte_flow_action actions[],
2970                    struct rte_flow_error *error)
2971 {
2972         const struct mlx5_flow_driver_ops *fops;
2973         enum mlx5_flow_drv_type type = dev_flow->flow->drv_type;
2974
2975         MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
2976         fops = flow_get_drv_ops(type);
2977         return fops->translate(dev, dev_flow, attr, items, actions, error);
2978 }
2979
2980 /**
2981  * Flow driver apply API. This abstracts calling driver specific functions.
2982  * Parent flow (rte_flow) should have driver type (drv_type). It applies
2983  * translated driver flows on to device. flow_drv_translate() must precede.
2984  *
2985  * @param[in] dev
2986  *   Pointer to Ethernet device structure.
2987  * @param[in, out] flow
2988  *   Pointer to flow structure.
2989  * @param[out] error
2990  *   Pointer to error structure.
2991  *
2992  * @return
2993  *   0 on success, a negative errno value otherwise and rte_errno is set.
2994  */
2995 static inline int
2996 flow_drv_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
2997                struct rte_flow_error *error)
2998 {
2999         const struct mlx5_flow_driver_ops *fops;
3000         enum mlx5_flow_drv_type type = flow->drv_type;
3001
3002         MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
3003         fops = flow_get_drv_ops(type);
3004         return fops->apply(dev, flow, error);
3005 }
3006
3007 /**
3008  * Flow driver remove API. This abstracts calling driver specific functions.
3009  * Parent flow (rte_flow) should have driver type (drv_type). It removes a flow
3010  * on device. All the resources of the flow should be freed by calling
3011  * flow_drv_destroy().
3012  *
3013  * @param[in] dev
3014  *   Pointer to Ethernet device.
3015  * @param[in, out] flow
3016  *   Pointer to flow structure.
3017  */
3018 static inline void
3019 flow_drv_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
3020 {
3021         const struct mlx5_flow_driver_ops *fops;
3022         enum mlx5_flow_drv_type type = flow->drv_type;
3023
3024         MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
3025         fops = flow_get_drv_ops(type);
3026         fops->remove(dev, flow);
3027 }
3028
3029 /**
3030  * Flow driver destroy API. This abstracts calling driver specific functions.
3031  * Parent flow (rte_flow) should have driver type (drv_type). It removes a flow
3032  * on device and releases resources of the flow.
3033  *
3034  * @param[in] dev
3035  *   Pointer to Ethernet device.
3036  * @param[in, out] flow
3037  *   Pointer to flow structure.
3038  */
3039 static inline void
3040 flow_drv_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
3041 {
3042         const struct mlx5_flow_driver_ops *fops;
3043         enum mlx5_flow_drv_type type = flow->drv_type;
3044
3045         flow_mreg_split_qrss_release(dev, flow);
3046         MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
3047         fops = flow_get_drv_ops(type);
3048         fops->destroy(dev, flow);
3049 }
3050
3051 /**
3052  * Get RSS action from the action list.
3053  *
3054  * @param[in] actions
3055  *   Pointer to the list of actions.
3056  *
3057  * @return
3058  *   Pointer to the RSS action if exist, else return NULL.
3059  */
3060 static const struct rte_flow_action_rss*
3061 flow_get_rss_action(const struct rte_flow_action actions[])
3062 {
3063         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
3064                 switch (actions->type) {
3065                 case RTE_FLOW_ACTION_TYPE_RSS:
3066                         return (const struct rte_flow_action_rss *)
3067                                actions->conf;
3068                 default:
3069                         break;
3070                 }
3071         }
3072         return NULL;
3073 }
3074
3075 static unsigned int
3076 find_graph_root(const struct rte_flow_item pattern[], uint32_t rss_level)
3077 {
3078         const struct rte_flow_item *item;
3079         unsigned int has_vlan = 0;
3080
3081         for (item = pattern; item->type != RTE_FLOW_ITEM_TYPE_END; item++) {
3082                 if (item->type == RTE_FLOW_ITEM_TYPE_VLAN) {
3083                         has_vlan = 1;
3084                         break;
3085                 }
3086         }
3087         if (has_vlan)
3088                 return rss_level < 2 ? MLX5_EXPANSION_ROOT_ETH_VLAN :
3089                                        MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN;
3090         return rss_level < 2 ? MLX5_EXPANSION_ROOT :
3091                                MLX5_EXPANSION_ROOT_OUTER;
3092 }
3093
3094 /**
3095  *  Get layer flags from the prefix flow.
3096  *
3097  *  Some flows may be split to several subflows, the prefix subflow gets the
3098  *  match items and the suffix sub flow gets the actions.
3099  *  Some actions need the user defined match item flags to get the detail for
3100  *  the action.
3101  *  This function helps the suffix flow to get the item layer flags from prefix
3102  *  subflow.
3103  *
3104  * @param[in] dev_flow
3105  *   Pointer the created preifx subflow.
3106  *
3107  * @return
3108  *   The layers get from prefix subflow.
3109  */
3110 static inline uint64_t
3111 flow_get_prefix_layer_flags(struct mlx5_flow *dev_flow)
3112 {
3113         uint64_t layers = 0;
3114
3115         /*
3116          * Layers bits could be localization, but usually the compiler will
3117          * help to do the optimization work for source code.
3118          * If no decap actions, use the layers directly.
3119          */
3120         if (!(dev_flow->act_flags & MLX5_FLOW_ACTION_DECAP))
3121                 return dev_flow->handle->layers;
3122         /* Convert L3 layers with decap action. */
3123         if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L3_IPV4)
3124                 layers |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
3125         else if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L3_IPV6)
3126                 layers |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
3127         /* Convert L4 layers with decap action.  */
3128         if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L4_TCP)
3129                 layers |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
3130         else if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L4_UDP)
3131                 layers |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
3132         return layers;
3133 }
3134
3135 /**
3136  * Get metadata split action information.
3137  *
3138  * @param[in] actions
3139  *   Pointer to the list of actions.
3140  * @param[out] qrss
3141  *   Pointer to the return pointer.
3142  * @param[out] qrss_type
3143  *   Pointer to the action type to return. RTE_FLOW_ACTION_TYPE_END is returned
3144  *   if no QUEUE/RSS is found.
3145  * @param[out] encap_idx
3146  *   Pointer to the index of the encap action if exists, otherwise the last
3147  *   action index.
3148  *
3149  * @return
3150  *   Total number of actions.
3151  */
3152 static int
3153 flow_parse_metadata_split_actions_info(const struct rte_flow_action actions[],
3154                                        const struct rte_flow_action **qrss,
3155                                        int *encap_idx)
3156 {
3157         const struct rte_flow_action_raw_encap *raw_encap;
3158         int actions_n = 0;
3159         int raw_decap_idx = -1;
3160
3161         *encap_idx = -1;
3162         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
3163                 switch (actions->type) {
3164                 case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
3165                 case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
3166                         *encap_idx = actions_n;
3167                         break;
3168                 case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
3169                         raw_decap_idx = actions_n;
3170                         break;
3171                 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
3172                         raw_encap = actions->conf;
3173                         if (raw_encap->size > MLX5_ENCAPSULATION_DECISION_SIZE)
3174                                 *encap_idx = raw_decap_idx != -1 ?
3175                                                       raw_decap_idx : actions_n;
3176                         break;
3177                 case RTE_FLOW_ACTION_TYPE_QUEUE:
3178                 case RTE_FLOW_ACTION_TYPE_RSS:
3179                         *qrss = actions;
3180                         break;
3181                 default:
3182                         break;
3183                 }
3184                 actions_n++;
3185         }
3186         if (*encap_idx == -1)
3187                 *encap_idx = actions_n;
3188         /* Count RTE_FLOW_ACTION_TYPE_END. */
3189         return actions_n + 1;
3190 }
3191
3192 /**
3193  * Check meter action from the action list.
3194  *
3195  * @param[in] actions
3196  *   Pointer to the list of actions.
3197  * @param[out] mtr
3198  *   Pointer to the meter exist flag.
3199  *
3200  * @return
3201  *   Total number of actions.
3202  */
3203 static int
3204 flow_check_meter_action(const struct rte_flow_action actions[], uint32_t *mtr)
3205 {
3206         int actions_n = 0;
3207
3208         MLX5_ASSERT(mtr);
3209         *mtr = 0;
3210         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
3211                 switch (actions->type) {
3212                 case RTE_FLOW_ACTION_TYPE_METER:
3213                         *mtr = 1;
3214                         break;
3215                 default:
3216                         break;
3217                 }
3218                 actions_n++;
3219         }
3220         /* Count RTE_FLOW_ACTION_TYPE_END. */
3221         return actions_n + 1;
3222 }
3223
3224 /**
3225  * Check if the flow should be split due to hairpin.
3226  * The reason for the split is that in current HW we can't
3227  * support encap and push-vlan on Rx, so if a flow contains
3228  * these actions we move it to Tx.
3229  *
3230  * @param dev
3231  *   Pointer to Ethernet device.
3232  * @param[in] attr
3233  *   Flow rule attributes.
3234  * @param[in] actions
3235  *   Associated actions (list terminated by the END action).
3236  *
3237  * @return
3238  *   > 0 the number of actions and the flow should be split,
3239  *   0 when no split required.
3240  */
3241 static int
3242 flow_check_hairpin_split(struct rte_eth_dev *dev,
3243                          const struct rte_flow_attr *attr,
3244                          const struct rte_flow_action actions[])
3245 {
3246         int queue_action = 0;
3247         int action_n = 0;
3248         int split = 0;
3249         const struct rte_flow_action_queue *queue;
3250         const struct rte_flow_action_rss *rss;
3251         const struct rte_flow_action_raw_encap *raw_encap;
3252
3253         if (!attr->ingress)
3254                 return 0;
3255         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
3256                 switch (actions->type) {
3257                 case RTE_FLOW_ACTION_TYPE_QUEUE:
3258                         queue = actions->conf;
3259                         if (queue == NULL)
3260                                 return 0;
3261                         if (mlx5_rxq_get_type(dev, queue->index) !=
3262                             MLX5_RXQ_TYPE_HAIRPIN)
3263                                 return 0;
3264                         queue_action = 1;
3265                         action_n++;
3266                         break;
3267                 case RTE_FLOW_ACTION_TYPE_RSS:
3268                         rss = actions->conf;
3269                         if (rss == NULL || rss->queue_num == 0)
3270                                 return 0;
3271                         if (mlx5_rxq_get_type(dev, rss->queue[0]) !=
3272                             MLX5_RXQ_TYPE_HAIRPIN)
3273                                 return 0;
3274                         queue_action = 1;
3275                         action_n++;
3276                         break;
3277                 case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
3278                 case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
3279                 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
3280                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
3281                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
3282                         split++;
3283                         action_n++;
3284                         break;
3285                 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
3286                         raw_encap = actions->conf;
3287                         if (raw_encap->size >
3288                             (sizeof(struct rte_flow_item_eth) +
3289                              sizeof(struct rte_flow_item_ipv4)))
3290                                 split++;
3291                         action_n++;
3292                         break;
3293                 default:
3294                         action_n++;
3295                         break;
3296                 }
3297         }
3298         if (split && queue_action)
3299                 return action_n;
3300         return 0;
3301 }
3302
3303 /* Declare flow create/destroy prototype in advance. */
3304 static uint32_t
3305 flow_list_create(struct rte_eth_dev *dev, uint32_t *list,
3306                  const struct rte_flow_attr *attr,
3307                  const struct rte_flow_item items[],
3308                  const struct rte_flow_action actions[],
3309                  bool external, struct rte_flow_error *error);
3310
3311 static void
3312 flow_list_destroy(struct rte_eth_dev *dev, uint32_t *list,
3313                   uint32_t flow_idx);
3314
3315 /**
3316  * Add a flow of copying flow metadata registers in RX_CP_TBL.
3317  *
3318  * As mark_id is unique, if there's already a registered flow for the mark_id,
3319  * return by increasing the reference counter of the resource. Otherwise, create
3320  * the resource (mcp_res) and flow.
3321  *
3322  * Flow looks like,
3323  *   - If ingress port is ANY and reg_c[1] is mark_id,
3324  *     flow_tag := mark_id, reg_b := reg_c[0] and jump to RX_ACT_TBL.
3325  *
3326  * For default flow (zero mark_id), flow is like,
3327  *   - If ingress port is ANY,
3328  *     reg_b := reg_c[0] and jump to RX_ACT_TBL.
3329  *
3330  * @param dev
3331  *   Pointer to Ethernet device.
3332  * @param mark_id
3333  *   ID of MARK action, zero means default flow for META.
3334  * @param[out] error
3335  *   Perform verbose error reporting if not NULL.
3336  *
3337  * @return
3338  *   Associated resource on success, NULL otherwise and rte_errno is set.
3339  */
3340 static struct mlx5_flow_mreg_copy_resource *
3341 flow_mreg_add_copy_action(struct rte_eth_dev *dev, uint32_t mark_id,
3342                           struct rte_flow_error *error)
3343 {
3344         struct mlx5_priv *priv = dev->data->dev_private;
3345         struct rte_flow_attr attr = {
3346                 .group = MLX5_FLOW_MREG_CP_TABLE_GROUP,
3347                 .ingress = 1,
3348         };
3349         struct mlx5_rte_flow_item_tag tag_spec = {
3350                 .data = mark_id,
3351         };
3352         struct rte_flow_item items[] = {
3353                 [1] = { .type = RTE_FLOW_ITEM_TYPE_END, },
3354         };
3355         struct rte_flow_action_mark ftag = {
3356                 .id = mark_id,
3357         };
3358         struct mlx5_flow_action_copy_mreg cp_mreg = {
3359                 .dst = REG_B,
3360                 .src = REG_NON,
3361         };
3362         struct rte_flow_action_jump jump = {
3363                 .group = MLX5_FLOW_MREG_ACT_TABLE_GROUP,
3364         };
3365         struct rte_flow_action actions[] = {
3366                 [3] = { .type = RTE_FLOW_ACTION_TYPE_END, },
3367         };
3368         struct mlx5_flow_mreg_copy_resource *mcp_res;
3369         uint32_t idx = 0;
3370         int ret;
3371
3372         /* Fill the register fileds in the flow. */
3373         ret = mlx5_flow_get_reg_id(dev, MLX5_FLOW_MARK, 0, error);
3374         if (ret < 0)
3375                 return NULL;
3376         tag_spec.id = ret;
3377         ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_RX, 0, error);
3378         if (ret < 0)
3379                 return NULL;
3380         cp_mreg.src = ret;
3381         /* Check if already registered. */
3382         MLX5_ASSERT(priv->mreg_cp_tbl);
3383         mcp_res = (void *)mlx5_hlist_lookup(priv->mreg_cp_tbl, mark_id);
3384         if (mcp_res) {
3385                 /* For non-default rule. */
3386                 if (mark_id != MLX5_DEFAULT_COPY_ID)
3387                         mcp_res->refcnt++;
3388                 MLX5_ASSERT(mark_id != MLX5_DEFAULT_COPY_ID ||
3389                             mcp_res->refcnt == 1);
3390                 return mcp_res;
3391         }
3392         /* Provide the full width of FLAG specific value. */
3393         if (mark_id == (priv->sh->dv_regc0_mask & MLX5_FLOW_MARK_DEFAULT))
3394                 tag_spec.data = MLX5_FLOW_MARK_DEFAULT;
3395         /* Build a new flow. */
3396         if (mark_id != MLX5_DEFAULT_COPY_ID) {
3397                 items[0] = (struct rte_flow_item){
3398                         .type = (enum rte_flow_item_type)
3399                                 MLX5_RTE_FLOW_ITEM_TYPE_TAG,
3400                         .spec = &tag_spec,
3401                 };
3402                 items[1] = (struct rte_flow_item){
3403                         .type = RTE_FLOW_ITEM_TYPE_END,
3404                 };
3405                 actions[0] = (struct rte_flow_action){
3406                         .type = (enum rte_flow_action_type)
3407                                 MLX5_RTE_FLOW_ACTION_TYPE_MARK,
3408                         .conf = &ftag,
3409                 };
3410                 actions[1] = (struct rte_flow_action){
3411                         .type = (enum rte_flow_action_type)
3412                                 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
3413                         .conf = &cp_mreg,
3414                 };
3415                 actions[2] = (struct rte_flow_action){
3416                         .type = RTE_FLOW_ACTION_TYPE_JUMP,
3417                         .conf = &jump,
3418                 };
3419                 actions[3] = (struct rte_flow_action){
3420                         .type = RTE_FLOW_ACTION_TYPE_END,
3421                 };
3422         } else {
3423                 /* Default rule, wildcard match. */
3424                 attr.priority = MLX5_FLOW_PRIO_RSVD;
3425                 items[0] = (struct rte_flow_item){
3426                         .type = RTE_FLOW_ITEM_TYPE_END,
3427                 };
3428                 actions[0] = (struct rte_flow_action){
3429                         .type = (enum rte_flow_action_type)
3430                                 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
3431                         .conf = &cp_mreg,
3432                 };
3433                 actions[1] = (struct rte_flow_action){
3434                         .type = RTE_FLOW_ACTION_TYPE_JUMP,
3435                         .conf = &jump,
3436                 };
3437                 actions[2] = (struct rte_flow_action){
3438                         .type = RTE_FLOW_ACTION_TYPE_END,
3439                 };
3440         }
3441         /* Build a new entry. */
3442         mcp_res = mlx5_ipool_zmalloc(priv->sh->ipool[MLX5_IPOOL_MCP], &idx);
3443         if (!mcp_res) {
3444                 rte_errno = ENOMEM;
3445                 return NULL;
3446         }
3447         mcp_res->idx = idx;
3448         /*
3449          * The copy Flows are not included in any list. There
3450          * ones are referenced from other Flows and can not
3451          * be applied, removed, deleted in ardbitrary order
3452          * by list traversing.
3453          */
3454         mcp_res->rix_flow = flow_list_create(dev, NULL, &attr, items,
3455                                          actions, false, error);
3456         if (!mcp_res->rix_flow)
3457                 goto error;
3458         mcp_res->refcnt++;
3459         mcp_res->hlist_ent.key = mark_id;
3460         ret = mlx5_hlist_insert(priv->mreg_cp_tbl,
3461                                 &mcp_res->hlist_ent);
3462         MLX5_ASSERT(!ret);
3463         if (ret)
3464                 goto error;
3465         return mcp_res;
3466 error:
3467         if (mcp_res->rix_flow)
3468                 flow_list_destroy(dev, NULL, mcp_res->rix_flow);
3469         mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MCP], mcp_res->idx);
3470         return NULL;
3471 }
3472
3473 /**
3474  * Release flow in RX_CP_TBL.
3475  *
3476  * @param dev
3477  *   Pointer to Ethernet device.
3478  * @flow
3479  *   Parent flow for wich copying is provided.
3480  */
3481 static void
3482 flow_mreg_del_copy_action(struct rte_eth_dev *dev,
3483                           struct rte_flow *flow)
3484 {
3485         struct mlx5_flow_mreg_copy_resource *mcp_res;
3486         struct mlx5_priv *priv = dev->data->dev_private;
3487
3488         if (!flow->rix_mreg_copy)
3489                 return;
3490         mcp_res = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_MCP],
3491                                  flow->rix_mreg_copy);
3492         if (!mcp_res || !priv->mreg_cp_tbl)
3493                 return;
3494         if (flow->copy_applied) {
3495                 MLX5_ASSERT(mcp_res->appcnt);
3496                 flow->copy_applied = 0;
3497                 --mcp_res->appcnt;
3498                 if (!mcp_res->appcnt) {
3499                         struct rte_flow *mcp_flow = mlx5_ipool_get
3500                                         (priv->sh->ipool[MLX5_IPOOL_RTE_FLOW],
3501                                         mcp_res->rix_flow);
3502
3503                         if (mcp_flow)
3504                                 flow_drv_remove(dev, mcp_flow);
3505                 }
3506         }
3507         /*
3508          * We do not check availability of metadata registers here,
3509          * because copy resources are not allocated in this case.
3510          */
3511         if (--mcp_res->refcnt)
3512                 return;
3513         MLX5_ASSERT(mcp_res->rix_flow);
3514         flow_list_destroy(dev, NULL, mcp_res->rix_flow);
3515         mlx5_hlist_remove(priv->mreg_cp_tbl, &mcp_res->hlist_ent);
3516         mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MCP], mcp_res->idx);
3517         flow->rix_mreg_copy = 0;
3518 }
3519
3520 /**
3521  * Start flow in RX_CP_TBL.
3522  *
3523  * @param dev
3524  *   Pointer to Ethernet device.
3525  * @flow
3526  *   Parent flow for wich copying is provided.
3527  *
3528  * @return
3529  *   0 on success, a negative errno value otherwise and rte_errno is set.
3530  */
3531 static int
3532 flow_mreg_start_copy_action(struct rte_eth_dev *dev,
3533                             struct rte_flow *flow)
3534 {
3535         struct mlx5_flow_mreg_copy_resource *mcp_res;
3536         struct mlx5_priv *priv = dev->data->dev_private;
3537         int ret;
3538
3539         if (!flow->rix_mreg_copy || flow->copy_applied)
3540                 return 0;
3541         mcp_res = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_MCP],
3542                                  flow->rix_mreg_copy);
3543         if (!mcp_res)
3544                 return 0;
3545         if (!mcp_res->appcnt) {
3546                 struct rte_flow *mcp_flow = mlx5_ipool_get
3547                                 (priv->sh->ipool[MLX5_IPOOL_RTE_FLOW],
3548                                 mcp_res->rix_flow);
3549
3550                 if (mcp_flow) {
3551                         ret = flow_drv_apply(dev, mcp_flow, NULL);
3552                         if (ret)
3553                                 return ret;
3554                 }
3555         }
3556         ++mcp_res->appcnt;
3557         flow->copy_applied = 1;
3558         return 0;
3559 }
3560
3561 /**
3562  * Stop flow in RX_CP_TBL.
3563  *
3564  * @param dev
3565  *   Pointer to Ethernet device.
3566  * @flow
3567  *   Parent flow for wich copying is provided.
3568  */
3569 static void
3570 flow_mreg_stop_copy_action(struct rte_eth_dev *dev,
3571                            struct rte_flow *flow)
3572 {
3573         struct mlx5_flow_mreg_copy_resource *mcp_res;
3574         struct mlx5_priv *priv = dev->data->dev_private;
3575
3576         if (!flow->rix_mreg_copy || !flow->copy_applied)
3577                 return;
3578         mcp_res = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_MCP],
3579                                  flow->rix_mreg_copy);
3580         if (!mcp_res)
3581                 return;
3582         MLX5_ASSERT(mcp_res->appcnt);
3583         --mcp_res->appcnt;
3584         flow->copy_applied = 0;
3585         if (!mcp_res->appcnt) {
3586                 struct rte_flow *mcp_flow = mlx5_ipool_get
3587                                 (priv->sh->ipool[MLX5_IPOOL_RTE_FLOW],
3588                                 mcp_res->rix_flow);
3589
3590                 if (mcp_flow)
3591                         flow_drv_remove(dev, mcp_flow);
3592         }
3593 }
3594
3595 /**
3596  * Remove the default copy action from RX_CP_TBL.
3597  *
3598  * @param dev
3599  *   Pointer to Ethernet device.
3600  */
3601 static void
3602 flow_mreg_del_default_copy_action(struct rte_eth_dev *dev)
3603 {
3604         struct mlx5_flow_mreg_copy_resource *mcp_res;
3605         struct mlx5_priv *priv = dev->data->dev_private;
3606
3607         /* Check if default flow is registered. */
3608         if (!priv->mreg_cp_tbl)
3609                 return;
3610         mcp_res = (void *)mlx5_hlist_lookup(priv->mreg_cp_tbl,
3611                                             MLX5_DEFAULT_COPY_ID);
3612         if (!mcp_res)
3613                 return;
3614         MLX5_ASSERT(mcp_res->rix_flow);
3615         flow_list_destroy(dev, NULL, mcp_res->rix_flow);
3616         mlx5_hlist_remove(priv->mreg_cp_tbl, &mcp_res->hlist_ent);
3617         mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MCP], mcp_res->idx);
3618 }
3619
3620 /**
3621  * Add the default copy action in in RX_CP_TBL.
3622  *
3623  * @param dev
3624  *   Pointer to Ethernet device.
3625  * @param[out] error
3626  *   Perform verbose error reporting if not NULL.
3627  *
3628  * @return
3629  *   0 for success, negative value otherwise and rte_errno is set.
3630  */
3631 static int
3632 flow_mreg_add_default_copy_action(struct rte_eth_dev *dev,
3633                                   struct rte_flow_error *error)
3634 {
3635         struct mlx5_priv *priv = dev->data->dev_private;
3636         struct mlx5_flow_mreg_copy_resource *mcp_res;
3637
3638         /* Check whether extensive metadata feature is engaged. */
3639         if (!priv->config.dv_flow_en ||
3640             priv->config.dv_xmeta_en == MLX5_XMETA_MODE_LEGACY ||
3641             !mlx5_flow_ext_mreg_supported(dev) ||
3642             !priv->sh->dv_regc0_mask)
3643                 return 0;
3644         mcp_res = flow_mreg_add_copy_action(dev, MLX5_DEFAULT_COPY_ID, error);
3645         if (!mcp_res)
3646                 return -rte_errno;
3647         return 0;
3648 }
3649
3650 /**
3651  * Add a flow of copying flow metadata registers in RX_CP_TBL.
3652  *
3653  * All the flow having Q/RSS action should be split by
3654  * flow_mreg_split_qrss_prep() to pass by RX_CP_TBL. A flow in the RX_CP_TBL
3655  * performs the following,
3656  *   - CQE->flow_tag := reg_c[1] (MARK)
3657  *   - CQE->flow_table_metadata (reg_b) := reg_c[0] (META)
3658  * As CQE's flow_tag is not a register, it can't be simply copied from reg_c[1]
3659  * but there should be a flow per each MARK ID set by MARK action.
3660  *
3661  * For the aforementioned reason, if there's a MARK action in flow's action
3662  * list, a corresponding flow should be added to the RX_CP_TBL in order to copy
3663  * the MARK ID to CQE's flow_tag like,
3664  *   - If reg_c[1] is mark_id,
3665  *     flow_tag := mark_id, reg_b := reg_c[0] and jump to RX_ACT_TBL.
3666  *
3667  * For SET_META action which stores value in reg_c[0], as the destination is
3668  * also a flow metadata register (reg_b), adding a default flow is enough. Zero
3669  * MARK ID means the default flow. The default flow looks like,
3670  *   - For all flow, reg_b := reg_c[0] and jump to RX_ACT_TBL.
3671  *
3672  * @param dev
3673  *   Pointer to Ethernet device.
3674  * @param flow
3675  *   Pointer to flow structure.
3676  * @param[in] actions
3677  *   Pointer to the list of actions.
3678  * @param[out] error
3679  *   Perform verbose error reporting if not NULL.
3680  *
3681  * @return
3682  *   0 on success, negative value otherwise and rte_errno is set.
3683  */
3684 static int
3685 flow_mreg_update_copy_table(struct rte_eth_dev *dev,
3686                             struct rte_flow *flow,
3687                             const struct rte_flow_action *actions,
3688                             struct rte_flow_error *error)
3689 {
3690         struct mlx5_priv *priv = dev->data->dev_private;
3691         struct mlx5_dev_config *config = &priv->config;
3692         struct mlx5_flow_mreg_copy_resource *mcp_res;
3693         const struct rte_flow_action_mark *mark;
3694
3695         /* Check whether extensive metadata feature is engaged. */
3696         if (!config->dv_flow_en ||
3697             config->dv_xmeta_en == MLX5_XMETA_MODE_LEGACY ||
3698             !mlx5_flow_ext_mreg_supported(dev) ||
3699             !priv->sh->dv_regc0_mask)
3700                 return 0;
3701         /* Find MARK action. */
3702         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
3703                 switch (actions->type) {
3704                 case RTE_FLOW_ACTION_TYPE_FLAG:
3705                         mcp_res = flow_mreg_add_copy_action
3706                                 (dev, MLX5_FLOW_MARK_DEFAULT, error);
3707                         if (!mcp_res)
3708                                 return -rte_errno;
3709                         flow->rix_mreg_copy = mcp_res->idx;
3710                         if (dev->data->dev_started) {
3711                                 mcp_res->appcnt++;
3712                                 flow->copy_applied = 1;
3713                         }
3714                         return 0;
3715                 case RTE_FLOW_ACTION_TYPE_MARK:
3716                         mark = (const struct rte_flow_action_mark *)
3717                                 actions->conf;
3718                         mcp_res =
3719                                 flow_mreg_add_copy_action(dev, mark->id, error);
3720                         if (!mcp_res)
3721                                 return -rte_errno;
3722                         flow->rix_mreg_copy = mcp_res->idx;
3723                         if (dev->data->dev_started) {
3724                                 mcp_res->appcnt++;
3725                                 flow->copy_applied = 1;
3726                         }
3727                         return 0;
3728                 default:
3729                         break;
3730                 }
3731         }
3732         return 0;
3733 }
3734
3735 #define MLX5_MAX_SPLIT_ACTIONS 24
3736 #define MLX5_MAX_SPLIT_ITEMS 24
3737
3738 /**
3739  * Split the hairpin flow.
3740  * Since HW can't support encap and push-vlan on Rx, we move these
3741  * actions to Tx.
3742  * If the count action is after the encap then we also
3743  * move the count action. in this case the count will also measure
3744  * the outer bytes.
3745  *
3746  * @param dev
3747  *   Pointer to Ethernet device.
3748  * @param[in] actions
3749  *   Associated actions (list terminated by the END action).
3750  * @param[out] actions_rx
3751  *   Rx flow actions.
3752  * @param[out] actions_tx
3753  *   Tx flow actions..
3754  * @param[out] pattern_tx
3755  *   The pattern items for the Tx flow.
3756  * @param[out] flow_id
3757  *   The flow ID connected to this flow.
3758  *
3759  * @return
3760  *   0 on success.
3761  */
3762 static int
3763 flow_hairpin_split(struct rte_eth_dev *dev,
3764                    const struct rte_flow_action actions[],
3765                    struct rte_flow_action actions_rx[],
3766                    struct rte_flow_action actions_tx[],
3767                    struct rte_flow_item pattern_tx[],
3768                    uint32_t *flow_id)
3769 {
3770         struct mlx5_priv *priv = dev->data->dev_private;
3771         const struct rte_flow_action_raw_encap *raw_encap;
3772         const struct rte_flow_action_raw_decap *raw_decap;
3773         struct mlx5_rte_flow_action_set_tag *set_tag;
3774         struct rte_flow_action *tag_action;
3775         struct mlx5_rte_flow_item_tag *tag_item;
3776         struct rte_flow_item *item;
3777         char *addr;
3778         int encap = 0;
3779
3780         mlx5_flow_id_get(priv->sh->flow_id_pool, flow_id);
3781         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
3782                 switch (actions->type) {
3783                 case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
3784                 case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
3785                 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
3786                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
3787                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
3788                         rte_memcpy(actions_tx, actions,
3789                                sizeof(struct rte_flow_action));
3790                         actions_tx++;
3791                         break;
3792                 case RTE_FLOW_ACTION_TYPE_COUNT:
3793                         if (encap) {
3794                                 rte_memcpy(actions_tx, actions,
3795                                            sizeof(struct rte_flow_action));
3796                                 actions_tx++;
3797                         } else {
3798                                 rte_memcpy(actions_rx, actions,
3799                                            sizeof(struct rte_flow_action));
3800                                 actions_rx++;
3801                         }
3802                         break;
3803                 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
3804                         raw_encap = actions->conf;
3805                         if (raw_encap->size >
3806                             (sizeof(struct rte_flow_item_eth) +
3807                              sizeof(struct rte_flow_item_ipv4))) {
3808                                 memcpy(actions_tx, actions,
3809                                        sizeof(struct rte_flow_action));
3810                                 actions_tx++;
3811                                 encap = 1;
3812                         } else {
3813                                 rte_memcpy(actions_rx, actions,
3814                                            sizeof(struct rte_flow_action));
3815                                 actions_rx++;
3816                         }
3817                         break;
3818                 case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
3819                         raw_decap = actions->conf;
3820                         if (raw_decap->size <
3821                             (sizeof(struct rte_flow_item_eth) +
3822                              sizeof(struct rte_flow_item_ipv4))) {
3823                                 memcpy(actions_tx, actions,
3824                                        sizeof(struct rte_flow_action));
3825                                 actions_tx++;
3826                         } else {
3827                                 rte_memcpy(actions_rx, actions,
3828                                            sizeof(struct rte_flow_action));
3829                                 actions_rx++;
3830                         }
3831                         break;
3832                 default:
3833                         rte_memcpy(actions_rx, actions,
3834                                    sizeof(struct rte_flow_action));
3835                         actions_rx++;
3836                         break;
3837                 }
3838         }
3839         /* Add set meta action and end action for the Rx flow. */
3840         tag_action = actions_rx;
3841         tag_action->type = (enum rte_flow_action_type)
3842                            MLX5_RTE_FLOW_ACTION_TYPE_TAG;
3843         actions_rx++;
3844         rte_memcpy(actions_rx, actions, sizeof(struct rte_flow_action));
3845         actions_rx++;
3846         set_tag = (void *)actions_rx;
3847         set_tag->id = mlx5_flow_get_reg_id(dev, MLX5_HAIRPIN_RX, 0, NULL);
3848         MLX5_ASSERT(set_tag->id > REG_NON);
3849         set_tag->data = *flow_id;
3850         tag_action->conf = set_tag;
3851         /* Create Tx item list. */
3852         rte_memcpy(actions_tx, actions, sizeof(struct rte_flow_action));
3853         addr = (void *)&pattern_tx[2];
3854         item = pattern_tx;
3855         item->type = (enum rte_flow_item_type)
3856                      MLX5_RTE_FLOW_ITEM_TYPE_TAG;
3857         tag_item = (void *)addr;
3858         tag_item->data = *flow_id;
3859         tag_item->id = mlx5_flow_get_reg_id(dev, MLX5_HAIRPIN_TX, 0, NULL);
3860         MLX5_ASSERT(set_tag->id > REG_NON);
3861         item->spec = tag_item;
3862         addr += sizeof(struct mlx5_rte_flow_item_tag);
3863         tag_item = (void *)addr;
3864         tag_item->data = UINT32_MAX;
3865         tag_item->id = UINT16_MAX;
3866         item->mask = tag_item;
3867         item->last = NULL;
3868         item++;
3869         item->type = RTE_FLOW_ITEM_TYPE_END;
3870         return 0;
3871 }
3872
3873 /**
3874  * The last stage of splitting chain, just creates the subflow
3875  * without any modification.
3876  *
3877  * @param[in] dev
3878  *   Pointer to Ethernet device.
3879  * @param[in] flow
3880  *   Parent flow structure pointer.
3881  * @param[in, out] sub_flow
3882  *   Pointer to return the created subflow, may be NULL.
3883  * @param[in] prefix_layers
3884  *   Prefix subflow layers, may be 0.
3885  * @param[in] prefix_mark
3886  *   Prefix subflow mark flag, may be 0.
3887  * @param[in] attr
3888  *   Flow rule attributes.
3889  * @param[in] items
3890  *   Pattern specification (list terminated by the END pattern item).
3891  * @param[in] actions
3892  *   Associated actions (list terminated by the END action).
3893  * @param[in] external
3894  *   This flow rule is created by request external to PMD.
3895  * @param[in] flow_idx
3896  *   This memory pool index to the flow.
3897  * @param[out] error
3898  *   Perform verbose error reporting if not NULL.
3899  * @return
3900  *   0 on success, negative value otherwise
3901  */
3902 static int
3903 flow_create_split_inner(struct rte_eth_dev *dev,
3904                         struct rte_flow *flow,
3905                         struct mlx5_flow **sub_flow,
3906                         uint64_t prefix_layers,
3907                         uint32_t prefix_mark,
3908                         const struct rte_flow_attr *attr,
3909                         const struct rte_flow_item items[],
3910                         const struct rte_flow_action actions[],
3911                         bool external, uint32_t flow_idx,
3912                         struct rte_flow_error *error)
3913 {
3914         struct mlx5_flow *dev_flow;
3915
3916         dev_flow = flow_drv_prepare(dev, flow, attr, items, actions,
3917                 flow_idx, error);
3918         if (!dev_flow)
3919                 return -rte_errno;
3920         dev_flow->flow = flow;
3921         dev_flow->external = external;
3922         /* Subflow object was created, we must include one in the list. */
3923         SILIST_INSERT(&flow->dev_handles, dev_flow->handle_idx,
3924                       dev_flow->handle, next);
3925         /*
3926          * If dev_flow is as one of the suffix flow, some actions in suffix
3927          * flow may need some user defined item layer flags, and pass the
3928          * Metadate rxq mark flag to suffix flow as well.
3929          */
3930         if (prefix_layers)
3931                 dev_flow->handle->layers = prefix_layers;
3932         if (prefix_mark)
3933                 dev_flow->handle->mark = 1;
3934         if (sub_flow)
3935                 *sub_flow = dev_flow;
3936         return flow_drv_translate(dev, dev_flow, attr, items, actions, error);
3937 }
3938
3939 /**
3940  * Split the meter flow.
3941  *
3942  * As meter flow will split to three sub flow, other than meter
3943  * action, the other actions make sense to only meter accepts
3944  * the packet. If it need to be dropped, no other additional
3945  * actions should be take.
3946  *
3947  * One kind of special action which decapsulates the L3 tunnel
3948  * header will be in the prefix sub flow, as not to take the
3949  * L3 tunnel header into account.
3950  *
3951  * @param dev
3952  *   Pointer to Ethernet device.
3953  * @param[in] items
3954  *   Pattern specification (list terminated by the END pattern item).
3955  * @param[out] sfx_items
3956  *   Suffix flow match items (list terminated by the END pattern item).
3957  * @param[in] actions
3958  *   Associated actions (list terminated by the END action).
3959  * @param[out] actions_sfx
3960  *   Suffix flow actions.
3961  * @param[out] actions_pre
3962  *   Prefix flow actions.
3963  * @param[out] pattern_sfx
3964  *   The pattern items for the suffix flow.
3965  * @param[out] tag_sfx
3966  *   Pointer to suffix flow tag.
3967  *
3968  * @return
3969  *   0 on success.
3970  */
3971 static int
3972 flow_meter_split_prep(struct rte_eth_dev *dev,
3973                  const struct rte_flow_item items[],
3974                  struct rte_flow_item sfx_items[],
3975                  const struct rte_flow_action actions[],
3976                  struct rte_flow_action actions_sfx[],
3977                  struct rte_flow_action actions_pre[])
3978 {
3979         struct rte_flow_action *tag_action = NULL;
3980         struct rte_flow_item *tag_item;
3981         struct mlx5_rte_flow_action_set_tag *set_tag;
3982         struct rte_flow_error error;
3983         const struct rte_flow_action_raw_encap *raw_encap;
3984         const struct rte_flow_action_raw_decap *raw_decap;
3985         struct mlx5_rte_flow_item_tag *tag_spec;
3986         struct mlx5_rte_flow_item_tag *tag_mask;
3987         uint32_t tag_id;
3988         bool copy_vlan = false;
3989
3990         /* Prepare the actions for prefix and suffix flow. */
3991         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
3992                 struct rte_flow_action **action_cur = NULL;
3993
3994                 switch (actions->type) {
3995                 case RTE_FLOW_ACTION_TYPE_METER:
3996                         /* Add the extra tag action first. */
3997                         tag_action = actions_pre;
3998                         tag_action->type = (enum rte_flow_action_type)
3999                                            MLX5_RTE_FLOW_ACTION_TYPE_TAG;
4000                         actions_pre++;
4001                         action_cur = &actions_pre;
4002                         break;
4003                 case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP:
4004                 case RTE_FLOW_ACTION_TYPE_NVGRE_DECAP:
4005                         action_cur = &actions_pre;
4006                         break;
4007                 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
4008                         raw_encap = actions->conf;
4009                         if (raw_encap->size < MLX5_ENCAPSULATION_DECISION_SIZE)
4010                                 action_cur = &actions_pre;
4011                         break;
4012                 case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
4013                         raw_decap = actions->conf;
4014                         if (raw_decap->size > MLX5_ENCAPSULATION_DECISION_SIZE)
4015                                 action_cur = &actions_pre;
4016                         break;
4017                 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
4018                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
4019                         copy_vlan = true;
4020                         break;
4021                 default:
4022                         break;
4023                 }
4024                 if (!action_cur)
4025                         action_cur = &actions_sfx;
4026                 memcpy(*action_cur, actions, sizeof(struct rte_flow_action));
4027                 (*action_cur)++;
4028         }
4029         /* Add end action to the actions. */
4030         actions_sfx->type = RTE_FLOW_ACTION_TYPE_END;
4031         actions_pre->type = RTE_FLOW_ACTION_TYPE_END;
4032         actions_pre++;
4033         /* Set the tag. */
4034         set_tag = (void *)actions_pre;
4035         set_tag->id = mlx5_flow_get_reg_id(dev, MLX5_MTR_SFX, 0, &error);
4036         /*
4037          * Get the id from the qrss_pool to make qrss share the id with meter.
4038          */
4039         tag_id = flow_qrss_get_id(dev);
4040         set_tag->data = tag_id << MLX5_MTR_COLOR_BITS;
4041         assert(tag_action);
4042         tag_action->conf = set_tag;
4043         /* Prepare the suffix subflow items. */
4044         tag_item = sfx_items++;
4045         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
4046                 int item_type = items->type;
4047
4048                 switch (item_type) {
4049                 case RTE_FLOW_ITEM_TYPE_PORT_ID:
4050                         memcpy(sfx_items, items, sizeof(*sfx_items));
4051                         sfx_items++;
4052                         break;
4053                 case RTE_FLOW_ITEM_TYPE_VLAN:
4054                         if (copy_vlan) {
4055                                 memcpy(sfx_items, items, sizeof(*sfx_items));
4056                                 /*
4057                                  * Convert to internal match item, it is used
4058                                  * for vlan push and set vid.
4059                                  */
4060                                 sfx_items->type = (enum rte_flow_item_type)
4061                                                   MLX5_RTE_FLOW_ITEM_TYPE_VLAN;
4062                                 sfx_items++;
4063                         }
4064                         break;
4065                 default:
4066                         break;
4067                 }
4068         }
4069         sfx_items->type = RTE_FLOW_ITEM_TYPE_END;
4070         sfx_items++;
4071         tag_spec = (struct mlx5_rte_flow_item_tag *)sfx_items;
4072         tag_spec->data = tag_id << MLX5_MTR_COLOR_BITS;
4073         tag_spec->id = mlx5_flow_get_reg_id(dev, MLX5_MTR_SFX, 0, &error);
4074         tag_mask = tag_spec + 1;
4075         tag_mask->data = 0xffffff00;
4076         tag_item->type = (enum rte_flow_item_type)
4077                          MLX5_RTE_FLOW_ITEM_TYPE_TAG;
4078         tag_item->spec = tag_spec;
4079         tag_item->last = NULL;
4080         tag_item->mask = tag_mask;
4081         return tag_id;
4082 }
4083
4084 /**
4085  * Split action list having QUEUE/RSS for metadata register copy.
4086  *
4087  * Once Q/RSS action is detected in user's action list, the flow action
4088  * should be split in order to copy metadata registers, which will happen in
4089  * RX_CP_TBL like,
4090  *   - CQE->flow_tag := reg_c[1] (MARK)
4091  *   - CQE->flow_table_metadata (reg_b) := reg_c[0] (META)
4092  * The Q/RSS action will be performed on RX_ACT_TBL after passing by RX_CP_TBL.
4093  * This is because the last action of each flow must be a terminal action
4094  * (QUEUE, RSS or DROP).
4095  *
4096  * Flow ID must be allocated to identify actions in the RX_ACT_TBL and it is
4097  * stored and kept in the mlx5_flow structure per each sub_flow.
4098  *
4099  * The Q/RSS action is replaced with,
4100  *   - SET_TAG, setting the allocated flow ID to reg_c[2].
4101  * And the following JUMP action is added at the end,
4102  *   - JUMP, to RX_CP_TBL.
4103  *
4104  * A flow to perform remained Q/RSS action will be created in RX_ACT_TBL by
4105  * flow_create_split_metadata() routine. The flow will look like,
4106  *   - If flow ID matches (reg_c[2]), perform Q/RSS.
4107  *
4108  * @param dev
4109  *   Pointer to Ethernet device.
4110  * @param[out] split_actions
4111  *   Pointer to store split actions to jump to CP_TBL.
4112  * @param[in] actions
4113  *   Pointer to the list of original flow actions.
4114  * @param[in] qrss
4115  *   Pointer to the Q/RSS action.
4116  * @param[in] actions_n
4117  *   Number of original actions.
4118  * @param[out] error
4119  *   Perform verbose error reporting if not NULL.
4120  *
4121  * @return
4122  *   non-zero unique flow_id on success, otherwise 0 and
4123  *   error/rte_error are set.
4124  */
4125 static uint32_t
4126 flow_mreg_split_qrss_prep(struct rte_eth_dev *dev,
4127                           struct rte_flow_action *split_actions,
4128                           const struct rte_flow_action *actions,
4129                           const struct rte_flow_action *qrss,
4130                           int actions_n, struct rte_flow_error *error)
4131 {
4132         struct mlx5_rte_flow_action_set_tag *set_tag;
4133         struct rte_flow_action_jump *jump;
4134         const int qrss_idx = qrss - actions;
4135         uint32_t flow_id = 0;
4136         int ret = 0;
4137
4138         /*
4139          * Given actions will be split
4140          * - Replace QUEUE/RSS action with SET_TAG to set flow ID.
4141          * - Add jump to mreg CP_TBL.
4142          * As a result, there will be one more action.
4143          */
4144         ++actions_n;
4145         memcpy(split_actions, actions, sizeof(*split_actions) * actions_n);
4146         set_tag = (void *)(split_actions + actions_n);
4147         /*
4148          * If tag action is not set to void(it means we are not the meter
4149          * suffix flow), add the tag action. Since meter suffix flow already
4150          * has the tag added.
4151          */
4152         if (split_actions[qrss_idx].type != RTE_FLOW_ACTION_TYPE_VOID) {
4153                 /*
4154                  * Allocate the new subflow ID. This one is unique within
4155                  * device and not shared with representors. Otherwise,
4156                  * we would have to resolve multi-thread access synch
4157                  * issue. Each flow on the shared device is appended
4158                  * with source vport identifier, so the resulting
4159                  * flows will be unique in the shared (by master and
4160                  * representors) domain even if they have coinciding
4161                  * IDs.
4162                  */
4163                 flow_id = flow_qrss_get_id(dev);
4164                 if (!flow_id)
4165                         return rte_flow_error_set(error, ENOMEM,
4166                                                   RTE_FLOW_ERROR_TYPE_ACTION,
4167                                                   NULL, "can't allocate id "
4168                                                   "for split Q/RSS subflow");
4169                 /* Internal SET_TAG action to set flow ID. */
4170                 *set_tag = (struct mlx5_rte_flow_action_set_tag){
4171                         .data = flow_id,
4172                 };
4173                 ret = mlx5_flow_get_reg_id(dev, MLX5_COPY_MARK, 0, error);
4174                 if (ret < 0)
4175                         return ret;
4176                 set_tag->id = ret;
4177                 /* Construct new actions array. */
4178                 /* Replace QUEUE/RSS action. */
4179                 split_actions[qrss_idx] = (struct rte_flow_action){
4180                         .type = (enum rte_flow_action_type)
4181                                 MLX5_RTE_FLOW_ACTION_TYPE_TAG,
4182                         .conf = set_tag,
4183                 };
4184         }
4185         /* JUMP action to jump to mreg copy table (CP_TBL). */
4186         jump = (void *)(set_tag + 1);
4187         *jump = (struct rte_flow_action_jump){
4188                 .group = MLX5_FLOW_MREG_CP_TABLE_GROUP,
4189         };
4190         split_actions[actions_n - 2] = (struct rte_flow_action){
4191                 .type = RTE_FLOW_ACTION_TYPE_JUMP,
4192                 .conf = jump,
4193         };
4194         split_actions[actions_n - 1] = (struct rte_flow_action){
4195                 .type = RTE_FLOW_ACTION_TYPE_END,
4196         };
4197         return flow_id;
4198 }
4199
4200 /**
4201  * Extend the given action list for Tx metadata copy.
4202  *
4203  * Copy the given action list to the ext_actions and add flow metadata register
4204  * copy action in order to copy reg_a set by WQE to reg_c[0].
4205  *
4206  * @param[out] ext_actions
4207  *   Pointer to the extended action list.
4208  * @param[in] actions
4209  *   Pointer to the list of actions.
4210  * @param[in] actions_n
4211  *   Number of actions in the list.
4212  * @param[out] error
4213  *   Perform verbose error reporting if not NULL.
4214  * @param[in] encap_idx
4215  *   The encap action inndex.
4216  *
4217  * @return
4218  *   0 on success, negative value otherwise
4219  */
4220 static int
4221 flow_mreg_tx_copy_prep(struct rte_eth_dev *dev,
4222                        struct rte_flow_action *ext_actions,
4223                        const struct rte_flow_action *actions,
4224                        int actions_n, struct rte_flow_error *error,
4225                        int encap_idx)
4226 {
4227         struct mlx5_flow_action_copy_mreg *cp_mreg =
4228                 (struct mlx5_flow_action_copy_mreg *)
4229                         (ext_actions + actions_n + 1);
4230         int ret;
4231
4232         ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_RX, 0, error);
4233         if (ret < 0)
4234                 return ret;
4235         cp_mreg->dst = ret;
4236         ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_TX, 0, error);
4237         if (ret < 0)
4238                 return ret;
4239         cp_mreg->src = ret;
4240         if (encap_idx != 0)
4241                 memcpy(ext_actions, actions, sizeof(*ext_actions) * encap_idx);
4242         if (encap_idx == actions_n - 1) {
4243                 ext_actions[actions_n - 1] = (struct rte_flow_action){
4244                         .type = (enum rte_flow_action_type)
4245                                 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
4246                         .conf = cp_mreg,
4247                 };
4248                 ext_actions[actions_n] = (struct rte_flow_action){
4249                         .type = RTE_FLOW_ACTION_TYPE_END,
4250                 };
4251         } else {
4252                 ext_actions[encap_idx] = (struct rte_flow_action){
4253                         .type = (enum rte_flow_action_type)
4254                                 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
4255                         .conf = cp_mreg,
4256                 };
4257                 memcpy(ext_actions + encap_idx + 1, actions + encap_idx,
4258                                 sizeof(*ext_actions) * (actions_n - encap_idx));
4259         }
4260         return 0;
4261 }
4262
4263 /**
4264  * Check the match action from the action list.
4265  *
4266  * @param[in] actions
4267  *   Pointer to the list of actions.
4268  * @param[in] attr
4269  *   Flow rule attributes.
4270  * @param[in] action
4271  *   The action to be check if exist.
4272  * @param[out] match_action_pos
4273  *   Pointer to the position of the matched action if exists, otherwise is -1.
4274  * @param[out] qrss_action_pos
4275  *   Pointer to the position of the Queue/RSS action if exists, otherwise is -1.
4276  *
4277  * @return
4278  *   > 0 the total number of actions.
4279  *   0 if not found match action in action list.
4280  */
4281 static int
4282 flow_check_match_action(const struct rte_flow_action actions[],
4283                         const struct rte_flow_attr *attr,
4284                         enum rte_flow_action_type action,
4285                         int *match_action_pos, int *qrss_action_pos)
4286 {
4287         const struct rte_flow_action_sample *sample;
4288         int actions_n = 0;
4289         int jump_flag = 0;
4290         uint32_t ratio = 0;
4291         int sub_type = 0;
4292         int flag = 0;
4293
4294         *match_action_pos = -1;
4295         *qrss_action_pos = -1;
4296         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
4297                 if (actions->type == action) {
4298                         flag = 1;
4299                         *match_action_pos = actions_n;
4300                 }
4301                 if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE ||
4302                     actions->type == RTE_FLOW_ACTION_TYPE_RSS)
4303                         *qrss_action_pos = actions_n;
4304                 if (actions->type == RTE_FLOW_ACTION_TYPE_JUMP)
4305                         jump_flag = 1;
4306                 if (actions->type == RTE_FLOW_ACTION_TYPE_SAMPLE) {
4307                         sample = actions->conf;
4308                         ratio = sample->ratio;
4309                         sub_type = ((const struct rte_flow_action *)
4310                                         (sample->actions))->type;
4311                 }
4312                 actions_n++;
4313         }
4314         if (flag && action == RTE_FLOW_ACTION_TYPE_SAMPLE && attr->transfer) {
4315                 if (ratio == 1) {
4316                         /* JUMP Action not support for Mirroring;
4317                          * Mirroring support multi-destination;
4318                          */
4319                         if (!jump_flag && sub_type != RTE_FLOW_ACTION_TYPE_END)
4320                                 flag = 0;
4321                 }
4322         }
4323         /* Count RTE_FLOW_ACTION_TYPE_END. */
4324         return flag ? actions_n + 1 : 0;
4325 }
4326
4327 #define SAMPLE_SUFFIX_ITEM 2
4328
4329 /**
4330  * Split the sample flow.
4331  *
4332  * As sample flow will split to two sub flow, sample flow with
4333  * sample action, the other actions will move to new suffix flow.
4334  *
4335  * Also add unique tag id with tag action in the sample flow,
4336  * the same tag id will be as match in the suffix flow.
4337  *
4338  * @param dev
4339  *   Pointer to Ethernet device.
4340  * @param[in] fdb_tx
4341  *   FDB egress flow flag.
4342  * @param[out] sfx_items
4343  *   Suffix flow match items (list terminated by the END pattern item).
4344  * @param[in] actions
4345  *   Associated actions (list terminated by the END action).
4346  * @param[out] actions_sfx
4347  *   Suffix flow actions.
4348  * @param[out] actions_pre
4349  *   Prefix flow actions.
4350  * @param[in] actions_n
4351  *  The total number of actions.
4352  * @param[in] sample_action_pos
4353  *   The sample action position.
4354  * @param[in] qrss_action_pos
4355  *   The Queue/RSS action position.
4356  * @param[out] error
4357  *   Perform verbose error reporting if not NULL.
4358  *
4359  * @return
4360  *   0 on success, or unique flow_id, a negative errno value
4361  *   otherwise and rte_errno is set.
4362  */
4363 static int
4364 flow_sample_split_prep(struct rte_eth_dev *dev,
4365                        uint32_t fdb_tx,
4366                        struct rte_flow_item sfx_items[],
4367                        const struct rte_flow_action actions[],
4368                        struct rte_flow_action actions_sfx[],
4369                        struct rte_flow_action actions_pre[],
4370                        int actions_n,
4371                        int sample_action_pos,
4372                        int qrss_action_pos,
4373                        struct rte_flow_error *error)
4374 {
4375         struct mlx5_rte_flow_action_set_tag *set_tag;
4376         struct mlx5_rte_flow_item_tag *tag_spec;
4377         struct mlx5_rte_flow_item_tag *tag_mask;
4378         uint32_t tag_id = 0;
4379         int index;
4380         int ret;
4381
4382         if (sample_action_pos < 0)
4383                 return rte_flow_error_set(error, EINVAL,
4384                                           RTE_FLOW_ERROR_TYPE_ACTION,
4385                                           NULL, "invalid position of sample "
4386                                           "action in list");
4387         if (!fdb_tx) {
4388                 /* Prepare the prefix tag action. */
4389                 set_tag = (void *)(actions_pre + actions_n + 1);
4390                 ret = mlx5_flow_get_reg_id(dev, MLX5_APP_TAG, 0, error);
4391                 if (ret < 0)
4392                         return ret;
4393                 set_tag->id = ret;
4394                 tag_id = flow_qrss_get_id(dev);
4395                 set_tag->data = tag_id;
4396                 /* Prepare the suffix subflow items. */
4397                 tag_spec = (void *)(sfx_items + SAMPLE_SUFFIX_ITEM);
4398                 tag_spec->data = tag_id;
4399                 tag_spec->id = set_tag->id;
4400                 tag_mask = tag_spec + 1;
4401                 tag_mask->data = UINT32_MAX;
4402                 sfx_items[0] = (struct rte_flow_item){
4403                         .type = (enum rte_flow_item_type)
4404                                 MLX5_RTE_FLOW_ITEM_TYPE_TAG,
4405                         .spec = tag_spec,
4406                         .last = NULL,
4407                         .mask = tag_mask,
4408                 };
4409                 sfx_items[1] = (struct rte_flow_item){
4410                         .type = (enum rte_flow_item_type)
4411                                 RTE_FLOW_ITEM_TYPE_END,
4412                 };
4413         }
4414         /* Prepare the actions for prefix and suffix flow. */
4415         if (qrss_action_pos >= 0 && qrss_action_pos < sample_action_pos) {
4416                 index = qrss_action_pos;
4417                 /* Put the preceding the Queue/RSS action into prefix flow. */
4418                 if (index != 0)
4419                         memcpy(actions_pre, actions,
4420                                sizeof(struct rte_flow_action) * index);
4421                 /* Put others preceding the sample action into prefix flow. */
4422                 if (sample_action_pos > index + 1)
4423                         memcpy(actions_pre + index, actions + index + 1,
4424                                sizeof(struct rte_flow_action) *
4425                                (sample_action_pos - index - 1));
4426                 index = sample_action_pos - 1;
4427                 /* Put Queue/RSS action into Suffix flow. */
4428                 memcpy(actions_sfx, actions + qrss_action_pos,
4429                        sizeof(struct rte_flow_action));
4430                 actions_sfx++;
4431         } else {
4432                 index = sample_action_pos;
4433                 if (index != 0)
4434                         memcpy(actions_pre, actions,
4435                                sizeof(struct rte_flow_action) * index);
4436         }
4437         /* Add the extra tag action for NIC-RX and E-Switch ingress. */
4438         if (!fdb_tx) {
4439                 actions_pre[index++] =
4440                         (struct rte_flow_action){
4441                         .type = (enum rte_flow_action_type)
4442                                 MLX5_RTE_FLOW_ACTION_TYPE_TAG,
4443                         .conf = set_tag,
4444                 };
4445         }
4446         memcpy(actions_pre + index, actions + sample_action_pos,
4447                sizeof(struct rte_flow_action));
4448         index += 1;
4449         actions_pre[index] = (struct rte_flow_action){
4450                 .type = (enum rte_flow_action_type)
4451                         RTE_FLOW_ACTION_TYPE_END,
4452         };
4453         /* Put the actions after sample into Suffix flow. */
4454         memcpy(actions_sfx, actions + sample_action_pos + 1,
4455                sizeof(struct rte_flow_action) *
4456                (actions_n - sample_action_pos - 1));
4457         return tag_id;
4458 }
4459
4460 /**
4461  * The splitting for metadata feature.
4462  *
4463  * - Q/RSS action on NIC Rx should be split in order to pass by
4464  *   the mreg copy table (RX_CP_TBL) and then it jumps to the
4465  *   action table (RX_ACT_TBL) which has the split Q/RSS action.
4466  *
4467  * - All the actions on NIC Tx should have a mreg copy action to
4468  *   copy reg_a from WQE to reg_c[0].
4469  *
4470  * @param dev
4471  *   Pointer to Ethernet device.
4472  * @param[in] flow
4473  *   Parent flow structure pointer.
4474  * @param[in] prefix_layers
4475  *   Prefix flow layer flags.
4476  * @param[in] prefix_mark
4477  *   Prefix subflow mark flag, may be 0.
4478  * @param[in] attr
4479  *   Flow rule attributes.
4480  * @param[in] items
4481  *   Pattern specification (list terminated by the END pattern item).
4482  * @param[in] actions
4483  *   Associated actions (list terminated by the END action).
4484  * @param[in] external
4485  *   This flow rule is created by request external to PMD.
4486  * @param[in] flow_idx
4487  *   This memory pool index to the flow.
4488  * @param[out] error
4489  *   Perform verbose error reporting if not NULL.
4490  * @return
4491  *   0 on success, negative value otherwise
4492  */
4493 static int
4494 flow_create_split_metadata(struct rte_eth_dev *dev,
4495                            struct rte_flow *flow,
4496                            uint64_t prefix_layers,
4497                            uint32_t prefix_mark,
4498                            const struct rte_flow_attr *attr,
4499                            const struct rte_flow_item items[],
4500                            const struct rte_flow_action actions[],
4501                            bool external, uint32_t flow_idx,
4502                            struct rte_flow_error *error)
4503 {
4504         struct mlx5_priv *priv = dev->data->dev_private;
4505         struct mlx5_dev_config *config = &priv->config;
4506         const struct rte_flow_action *qrss = NULL;
4507         struct rte_flow_action *ext_actions = NULL;
4508         struct mlx5_flow *dev_flow = NULL;
4509         uint32_t qrss_id = 0;
4510         int mtr_sfx = 0;
4511         size_t act_size;
4512         int actions_n;
4513         int encap_idx;
4514         int ret;
4515
4516         /* Check whether extensive metadata feature is engaged. */
4517         if (!config->dv_flow_en ||
4518             config->dv_xmeta_en == MLX5_XMETA_MODE_LEGACY ||
4519             !mlx5_flow_ext_mreg_supported(dev))
4520                 return flow_create_split_inner(dev, flow, NULL, prefix_layers,
4521                                                prefix_mark, attr, items,
4522                                                actions, external, flow_idx,
4523                                                error);
4524         actions_n = flow_parse_metadata_split_actions_info(actions, &qrss,
4525                                                            &encap_idx);
4526         if (qrss) {
4527                 /* Exclude hairpin flows from splitting. */
4528                 if (qrss->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
4529                         const struct rte_flow_action_queue *queue;
4530
4531                         queue = qrss->conf;
4532                         if (mlx5_rxq_get_type(dev, queue->index) ==
4533                             MLX5_RXQ_TYPE_HAIRPIN)
4534                                 qrss = NULL;
4535                 } else if (qrss->type == RTE_FLOW_ACTION_TYPE_RSS) {
4536                         const struct rte_flow_action_rss *rss;
4537
4538                         rss = qrss->conf;
4539                         if (mlx5_rxq_get_type(dev, rss->queue[0]) ==
4540                             MLX5_RXQ_TYPE_HAIRPIN)
4541                                 qrss = NULL;
4542                 }
4543         }
4544         if (qrss) {
4545                 /* Check if it is in meter suffix table. */
4546                 mtr_sfx = attr->group == (attr->transfer ?
4547                           (MLX5_FLOW_TABLE_LEVEL_SUFFIX - 1) :
4548                           MLX5_FLOW_TABLE_LEVEL_SUFFIX);
4549                 /*
4550                  * Q/RSS action on NIC Rx should be split in order to pass by
4551                  * the mreg copy table (RX_CP_TBL) and then it jumps to the
4552                  * action table (RX_ACT_TBL) which has the split Q/RSS action.
4553                  */
4554                 act_size = sizeof(struct rte_flow_action) * (actions_n + 1) +
4555                            sizeof(struct rte_flow_action_set_tag) +
4556                            sizeof(struct rte_flow_action_jump);
4557                 ext_actions = mlx5_malloc(MLX5_MEM_ZERO, act_size, 0,
4558                                           SOCKET_ID_ANY);
4559                 if (!ext_actions)
4560                         return rte_flow_error_set(error, ENOMEM,
4561                                                   RTE_FLOW_ERROR_TYPE_ACTION,
4562                                                   NULL, "no memory to split "
4563                                                   "metadata flow");
4564                 /*
4565                  * If we are the suffix flow of meter, tag already exist.
4566                  * Set the tag action to void.
4567                  */
4568                 if (mtr_sfx)
4569                         ext_actions[qrss - actions].type =
4570                                                 RTE_FLOW_ACTION_TYPE_VOID;
4571                 else
4572                         ext_actions[qrss - actions].type =
4573                                                 (enum rte_flow_action_type)
4574                                                 MLX5_RTE_FLOW_ACTION_TYPE_TAG;
4575                 /*
4576                  * Create the new actions list with removed Q/RSS action
4577                  * and appended set tag and jump to register copy table
4578                  * (RX_CP_TBL). We should preallocate unique tag ID here
4579                  * in advance, because it is needed for set tag action.
4580                  */
4581                 qrss_id = flow_mreg_split_qrss_prep(dev, ext_actions, actions,
4582                                                     qrss, actions_n, error);
4583                 if (!mtr_sfx && !qrss_id) {
4584                         ret = -rte_errno;
4585                         goto exit;
4586                 }
4587         } else if (attr->egress && !attr->transfer) {
4588                 /*
4589                  * All the actions on NIC Tx should have a metadata register
4590                  * copy action to copy reg_a from WQE to reg_c[meta]
4591                  */
4592                 act_size = sizeof(struct rte_flow_action) * (actions_n + 1) +
4593                            sizeof(struct mlx5_flow_action_copy_mreg);
4594                 ext_actions = mlx5_malloc(MLX5_MEM_ZERO, act_size, 0,
4595                                           SOCKET_ID_ANY);
4596                 if (!ext_actions)
4597                         return rte_flow_error_set(error, ENOMEM,
4598                                                   RTE_FLOW_ERROR_TYPE_ACTION,
4599                                                   NULL, "no memory to split "
4600                                                   "metadata flow");
4601                 /* Create the action list appended with copy register. */
4602                 ret = flow_mreg_tx_copy_prep(dev, ext_actions, actions,
4603                                              actions_n, error, encap_idx);
4604                 if (ret < 0)
4605                         goto exit;
4606         }
4607         /* Add the unmodified original or prefix subflow. */
4608         ret = flow_create_split_inner(dev, flow, &dev_flow, prefix_layers,
4609                                       prefix_mark, attr,
4610                                       items, ext_actions ? ext_actions :
4611                                       actions, external, flow_idx, error);
4612         if (ret < 0)
4613                 goto exit;
4614         MLX5_ASSERT(dev_flow);
4615         if (qrss) {
4616                 const struct rte_flow_attr q_attr = {
4617                         .group = MLX5_FLOW_MREG_ACT_TABLE_GROUP,
4618                         .ingress = 1,
4619                 };
4620                 /* Internal PMD action to set register. */
4621                 struct mlx5_rte_flow_item_tag q_tag_spec = {
4622                         .data = qrss_id,
4623                         .id = REG_NON,
4624                 };
4625                 struct rte_flow_item q_items[] = {
4626                         {
4627                                 .type = (enum rte_flow_item_type)
4628                                         MLX5_RTE_FLOW_ITEM_TYPE_TAG,
4629                                 .spec = &q_tag_spec,
4630                                 .last = NULL,
4631                                 .mask = NULL,
4632                         },
4633                         {
4634                                 .type = RTE_FLOW_ITEM_TYPE_END,
4635                         },
4636                 };
4637                 struct rte_flow_action q_actions[] = {
4638                         {
4639                                 .type = qrss->type,
4640                                 .conf = qrss->conf,
4641                         },
4642                         {
4643                                 .type = RTE_FLOW_ACTION_TYPE_END,
4644                         },
4645                 };
4646                 uint64_t layers = flow_get_prefix_layer_flags(dev_flow);
4647
4648                 /*
4649                  * Configure the tag item only if there is no meter subflow.
4650                  * Since tag is already marked in the meter suffix subflow
4651                  * we can just use the meter suffix items as is.
4652                  */
4653                 if (qrss_id) {
4654                         /* Not meter subflow. */
4655                         MLX5_ASSERT(!mtr_sfx);
4656                         /*
4657                          * Put unique id in prefix flow due to it is destroyed
4658                          * after suffix flow and id will be freed after there
4659                          * is no actual flows with this id and identifier
4660                          * reallocation becomes possible (for example, for
4661                          * other flows in other threads).
4662                          */
4663                         dev_flow->handle->split_flow_id = qrss_id;
4664                         ret = mlx5_flow_get_reg_id(dev, MLX5_COPY_MARK, 0,
4665                                                    error);
4666                         if (ret < 0)
4667                                 goto exit;
4668                         q_tag_spec.id = ret;
4669                 }
4670                 dev_flow = NULL;
4671                 /* Add suffix subflow to execute Q/RSS. */
4672                 ret = flow_create_split_inner(dev, flow, &dev_flow, layers, 0,
4673                                               &q_attr, mtr_sfx ? items :
4674                                               q_items, q_actions,
4675                                               external, flow_idx, error);
4676                 if (ret < 0)
4677                         goto exit;
4678                 /* qrss ID should be freed if failed. */
4679                 qrss_id = 0;
4680                 MLX5_ASSERT(dev_flow);
4681         }
4682
4683 exit:
4684         /*
4685          * We do not destroy the partially created sub_flows in case of error.
4686          * These ones are included into parent flow list and will be destroyed
4687          * by flow_drv_destroy.
4688          */
4689         flow_qrss_free_id(dev, qrss_id);
4690         mlx5_free(ext_actions);
4691         return ret;
4692 }
4693
4694 /**
4695  * The splitting for meter feature.
4696  *
4697  * - The meter flow will be split to two flows as prefix and
4698  *   suffix flow. The packets make sense only it pass the prefix
4699  *   meter action.
4700  *
4701  * - Reg_C_5 is used for the packet to match betweend prefix and
4702  *   suffix flow.
4703  *
4704  * @param dev
4705  *   Pointer to Ethernet device.
4706  * @param[in] flow
4707  *   Parent flow structure pointer.
4708  * @param[in] prefix_layers
4709  *   Prefix subflow layers, may be 0.
4710  * @param[in] prefix_mark
4711  *   Prefix subflow mark flag, may be 0.
4712  * @param[in] attr
4713  *   Flow rule attributes.
4714  * @param[in] items
4715  *   Pattern specification (list terminated by the END pattern item).
4716  * @param[in] actions
4717  *   Associated actions (list terminated by the END action).
4718  * @param[in] external
4719  *   This flow rule is created by request external to PMD.
4720  * @param[in] flow_idx
4721  *   This memory pool index to the flow.
4722  * @param[out] error
4723  *   Perform verbose error reporting if not NULL.
4724  * @return
4725  *   0 on success, negative value otherwise
4726  */
4727 static int
4728 flow_create_split_meter(struct rte_eth_dev *dev,
4729                         struct rte_flow *flow,
4730                         uint64_t prefix_layers,
4731                         uint32_t prefix_mark,
4732                         const struct rte_flow_attr *attr,
4733                         const struct rte_flow_item items[],
4734                         const struct rte_flow_action actions[],
4735                         bool external, uint32_t flow_idx,
4736                         struct rte_flow_error *error)
4737 {
4738         struct mlx5_priv *priv = dev->data->dev_private;
4739         struct rte_flow_action *sfx_actions = NULL;
4740         struct rte_flow_action *pre_actions = NULL;
4741         struct rte_flow_item *sfx_items = NULL;
4742         struct mlx5_flow *dev_flow = NULL;
4743         struct rte_flow_attr sfx_attr = *attr;
4744         uint32_t mtr = 0;
4745         uint32_t mtr_tag_id = 0;
4746         size_t act_size;
4747         size_t item_size;
4748         int actions_n = 0;
4749         int ret;
4750
4751         if (priv->mtr_en)
4752                 actions_n = flow_check_meter_action(actions, &mtr);
4753         if (mtr) {
4754                 /* The five prefix actions: meter, decap, encap, tag, end. */
4755                 act_size = sizeof(struct rte_flow_action) * (actions_n + 5) +
4756                            sizeof(struct mlx5_rte_flow_action_set_tag);
4757                 /* tag, vlan, port id, end. */
4758 #define METER_SUFFIX_ITEM 4
4759                 item_size = sizeof(struct rte_flow_item) * METER_SUFFIX_ITEM +
4760                             sizeof(struct mlx5_rte_flow_item_tag) * 2;
4761                 sfx_actions = mlx5_malloc(MLX5_MEM_ZERO, (act_size + item_size),
4762                                           0, SOCKET_ID_ANY);
4763                 if (!sfx_actions)
4764                         return rte_flow_error_set(error, ENOMEM,
4765                                                   RTE_FLOW_ERROR_TYPE_ACTION,
4766                                                   NULL, "no memory to split "
4767                                                   "meter flow");
4768                 sfx_items = (struct rte_flow_item *)((char *)sfx_actions +
4769                              act_size);
4770                 pre_actions = sfx_actions + actions_n;
4771                 mtr_tag_id = flow_meter_split_prep(dev, items, sfx_items,
4772                                                    actions, sfx_actions,
4773                                                    pre_actions);
4774                 if (!mtr_tag_id) {
4775                         ret = -rte_errno;
4776                         goto exit;
4777                 }
4778                 /* Add the prefix subflow. */
4779                 ret = flow_create_split_inner(dev, flow, &dev_flow,
4780                                               prefix_layers, 0,
4781                                               attr, items,
4782                                               pre_actions, external,
4783                                               flow_idx, error);
4784                 if (ret) {
4785                         ret = -rte_errno;
4786                         goto exit;
4787                 }
4788                 dev_flow->handle->split_flow_id = mtr_tag_id;
4789                 /* Setting the sfx group atrr. */
4790                 sfx_attr.group = sfx_attr.transfer ?
4791                                 (MLX5_FLOW_TABLE_LEVEL_SUFFIX - 1) :
4792                                  MLX5_FLOW_TABLE_LEVEL_SUFFIX;
4793         }
4794         /* Add the prefix subflow. */
4795         ret = flow_create_split_metadata(dev, flow, dev_flow ?
4796                                          flow_get_prefix_layer_flags(dev_flow) :
4797                                          prefix_layers, dev_flow ?
4798                                          dev_flow->handle->mark : prefix_mark,
4799                                          &sfx_attr, sfx_items ?
4800                                          sfx_items : items,
4801                                          sfx_actions ? sfx_actions : actions,
4802                                          external, flow_idx, error);
4803 exit:
4804         if (sfx_actions)
4805                 mlx5_free(sfx_actions);
4806         return ret;
4807 }
4808
4809 /**
4810  * The splitting for sample feature.
4811  *
4812  * Once Sample action is detected in the action list, the flow actions should
4813  * be split into prefix sub flow and suffix sub flow.
4814  *
4815  * The original items remain in the prefix sub flow, all actions preceding the
4816  * sample action and the sample action itself will be copied to the prefix
4817  * sub flow, the actions following the sample action will be copied to the
4818  * suffix sub flow, Queue action always be located in the suffix sub flow.
4819  *
4820  * In order to make the packet from prefix sub flow matches with suffix sub
4821  * flow, an extra tag action be added into prefix sub flow, and the suffix sub
4822  * flow uses tag item with the unique flow id.
4823  *
4824  * @param dev
4825  *   Pointer to Ethernet device.
4826  * @param[in] flow
4827  *   Parent flow structure pointer.
4828  * @param[in] attr
4829  *   Flow rule attributes.
4830  * @param[in] items
4831  *   Pattern specification (list terminated by the END pattern item).
4832  * @param[in] actions
4833  *   Associated actions (list terminated by the END action).
4834  * @param[in] external
4835  *   This flow rule is created by request external to PMD.
4836  * @param[in] flow_idx
4837  *   This memory pool index to the flow.
4838  * @param[out] error
4839  *   Perform verbose error reporting if not NULL.
4840  * @return
4841  *   0 on success, negative value otherwise
4842  */
4843 static int
4844 flow_create_split_sample(struct rte_eth_dev *dev,
4845                          struct rte_flow *flow,
4846                          const struct rte_flow_attr *attr,
4847                          const struct rte_flow_item items[],
4848                          const struct rte_flow_action actions[],
4849                          bool external, uint32_t flow_idx,
4850                          struct rte_flow_error *error)
4851 {
4852         struct mlx5_priv *priv = dev->data->dev_private;
4853         struct rte_flow_action *sfx_actions = NULL;
4854         struct rte_flow_action *pre_actions = NULL;
4855         struct rte_flow_item *sfx_items = NULL;
4856         struct mlx5_flow *dev_flow = NULL;
4857         struct rte_flow_attr sfx_attr = *attr;
4858 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
4859         struct mlx5_flow_dv_sample_resource *sample_res;
4860         struct mlx5_flow_tbl_data_entry *sfx_tbl_data;
4861         struct mlx5_flow_tbl_resource *sfx_tbl;
4862         union mlx5_flow_tbl_key sfx_table_key;
4863 #endif
4864         size_t act_size;
4865         size_t item_size;
4866         uint32_t fdb_tx = 0;
4867         int32_t tag_id = 0;
4868         int actions_n = 0;
4869         int sample_action_pos;
4870         int qrss_action_pos;
4871         int ret = 0;
4872
4873         if (priv->sampler_en)
4874                 actions_n = flow_check_match_action(actions, attr,
4875                                         RTE_FLOW_ACTION_TYPE_SAMPLE,
4876                                         &sample_action_pos, &qrss_action_pos);
4877         if (actions_n) {
4878                 /* The prefix actions must includes sample, tag, end. */
4879                 act_size = sizeof(struct rte_flow_action) * (actions_n * 2 + 1)
4880                            + sizeof(struct mlx5_rte_flow_action_set_tag);
4881                 item_size = sizeof(struct rte_flow_item) * SAMPLE_SUFFIX_ITEM +
4882                             sizeof(struct mlx5_rte_flow_item_tag) * 2;
4883                 sfx_actions = mlx5_malloc(MLX5_MEM_ZERO, (act_size +
4884                                           item_size), 0, SOCKET_ID_ANY);
4885                 if (!sfx_actions)
4886                         return rte_flow_error_set(error, ENOMEM,
4887                                                   RTE_FLOW_ERROR_TYPE_ACTION,
4888                                                   NULL, "no memory to split "
4889                                                   "sample flow");
4890                 /* The representor_id is -1 for uplink. */
4891                 fdb_tx = (attr->transfer && priv->representor_id != -1);
4892                 if (!fdb_tx)
4893                         sfx_items = (struct rte_flow_item *)((char *)sfx_actions
4894                                         + act_size);
4895                 pre_actions = sfx_actions + actions_n;
4896                 tag_id = flow_sample_split_prep(dev, fdb_tx, sfx_items,
4897                                                 actions, sfx_actions,
4898                                                 pre_actions, actions_n,
4899                                                 sample_action_pos,
4900                                                 qrss_action_pos, error);
4901                 if (tag_id < 0 || (!fdb_tx && !tag_id)) {
4902                         ret = -rte_errno;
4903                         goto exit;
4904                 }
4905                 /* Add the prefix subflow. */
4906                 ret = flow_create_split_inner(dev, flow, &dev_flow, 0, 0, attr,
4907                                               items, pre_actions, external,
4908                                               flow_idx, error);
4909                 if (ret) {
4910                         ret = -rte_errno;
4911                         goto exit;
4912                 }
4913                 dev_flow->handle->split_flow_id = tag_id;
4914 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
4915                 /* Set the sfx group attr. */
4916                 sample_res = (struct mlx5_flow_dv_sample_resource *)
4917                                         dev_flow->dv.sample_res;
4918                 sfx_tbl = (struct mlx5_flow_tbl_resource *)
4919                                         sample_res->normal_path_tbl;
4920                 sfx_tbl_data = container_of(sfx_tbl,
4921                                         struct mlx5_flow_tbl_data_entry, tbl);
4922                 sfx_table_key.v64 = sfx_tbl_data->entry.key;
4923                 sfx_attr.group = sfx_attr.transfer ?
4924                                         (sfx_table_key.table_id - 1) :
4925                                          sfx_table_key.table_id;
4926 #endif
4927         }
4928         /* Add the suffix subflow. */
4929         ret = flow_create_split_meter(dev, flow, dev_flow ?
4930                                  flow_get_prefix_layer_flags(dev_flow) : 0,
4931                                  dev_flow ? dev_flow->handle->mark : 0,
4932                                  &sfx_attr, sfx_items ? sfx_items : items,
4933                                  sfx_actions ? sfx_actions : actions,
4934                                  external, flow_idx, error);
4935 exit:
4936         if (sfx_actions)
4937                 mlx5_free(sfx_actions);
4938         return ret;
4939 }
4940
4941 /**
4942  * Split the flow to subflow set. The splitters might be linked
4943  * in the chain, like this:
4944  * flow_create_split_outer() calls:
4945  *   flow_create_split_meter() calls:
4946  *     flow_create_split_metadata(meter_subflow_0) calls:
4947  *       flow_create_split_inner(metadata_subflow_0)
4948  *       flow_create_split_inner(metadata_subflow_1)
4949  *       flow_create_split_inner(metadata_subflow_2)
4950  *     flow_create_split_metadata(meter_subflow_1) calls:
4951  *       flow_create_split_inner(metadata_subflow_0)
4952  *       flow_create_split_inner(metadata_subflow_1)
4953  *       flow_create_split_inner(metadata_subflow_2)
4954  *
4955  * This provide flexible way to add new levels of flow splitting.
4956  * The all of successfully created subflows are included to the
4957  * parent flow dev_flow list.
4958  *
4959  * @param dev
4960  *   Pointer to Ethernet device.
4961  * @param[in] flow
4962  *   Parent flow structure pointer.
4963  * @param[in] attr
4964  *   Flow rule attributes.
4965  * @param[in] items
4966  *   Pattern specification (list terminated by the END pattern item).
4967  * @param[in] actions
4968  *   Associated actions (list terminated by the END action).
4969  * @param[in] external
4970  *   This flow rule is created by request external to PMD.
4971  * @param[in] flow_idx
4972  *   This memory pool index to the flow.
4973  * @param[out] error
4974  *   Perform verbose error reporting if not NULL.
4975  * @return
4976  *   0 on success, negative value otherwise
4977  */
4978 static int
4979 flow_create_split_outer(struct rte_eth_dev *dev,
4980                         struct rte_flow *flow,
4981                         const struct rte_flow_attr *attr,
4982                         const struct rte_flow_item items[],
4983                         const struct rte_flow_action actions[],
4984                         bool external, uint32_t flow_idx,
4985                         struct rte_flow_error *error)
4986 {
4987         int ret;
4988
4989         ret = flow_create_split_sample(dev, flow, attr, items,
4990                                        actions, external, flow_idx, error);
4991         MLX5_ASSERT(ret <= 0);
4992         return ret;
4993 }
4994
4995 /**
4996  * Create a flow and add it to @p list.
4997  *
4998  * @param dev
4999  *   Pointer to Ethernet device.
5000  * @param list
5001  *   Pointer to a TAILQ flow list. If this parameter NULL,
5002  *   no list insertion occurred, flow is just created,
5003  *   this is caller's responsibility to track the
5004  *   created flow.
5005  * @param[in] attr
5006  *   Flow rule attributes.
5007  * @param[in] items
5008  *   Pattern specification (list terminated by the END pattern item).
5009  * @param[in] actions
5010  *   Associated actions (list terminated by the END action).
5011  * @param[in] external
5012  *   This flow rule is created by request external to PMD.
5013  * @param[out] error
5014  *   Perform verbose error reporting if not NULL.
5015  *
5016  * @return
5017  *   A flow index on success, 0 otherwise and rte_errno is set.
5018  */
5019 static uint32_t
5020 flow_list_create(struct rte_eth_dev *dev, uint32_t *list,
5021                  const struct rte_flow_attr *attr,
5022                  const struct rte_flow_item items[],
5023                  const struct rte_flow_action actions[],
5024                  bool external, struct rte_flow_error *error)
5025 {
5026         struct mlx5_priv *priv = dev->data->dev_private;
5027         struct rte_flow *flow = NULL;
5028         struct mlx5_flow *dev_flow;
5029         const struct rte_flow_action_rss *rss;
5030         union {
5031                 struct mlx5_flow_expand_rss buf;
5032                 uint8_t buffer[2048];
5033         } expand_buffer;
5034         union {
5035                 struct rte_flow_action actions[MLX5_MAX_SPLIT_ACTIONS];
5036                 uint8_t buffer[2048];
5037         } actions_rx;
5038         union {
5039                 struct rte_flow_action actions[MLX5_MAX_SPLIT_ACTIONS];
5040                 uint8_t buffer[2048];
5041         } actions_hairpin_tx;
5042         union {
5043                 struct rte_flow_item items[MLX5_MAX_SPLIT_ITEMS];
5044                 uint8_t buffer[2048];
5045         } items_tx;
5046         struct mlx5_flow_expand_rss *buf = &expand_buffer.buf;
5047         struct mlx5_flow_rss_desc *rss_desc = &((struct mlx5_flow_rss_desc *)
5048                                               priv->rss_desc)[!!priv->flow_idx];
5049         const struct rte_flow_action *p_actions_rx = actions;
5050         uint32_t i;
5051         uint32_t idx = 0;
5052         int hairpin_flow;
5053         uint32_t hairpin_id = 0;
5054         struct rte_flow_attr attr_tx = { .priority = 0 };
5055         struct rte_flow_attr attr_factor = {0};
5056         int ret;
5057
5058         memcpy((void *)&attr_factor, (const void *)attr, sizeof(*attr));
5059         if (external)
5060                 attr_factor.group *= MLX5_FLOW_TABLE_FACTOR;
5061         hairpin_flow = flow_check_hairpin_split(dev, &attr_factor, actions);
5062         ret = flow_drv_validate(dev, &attr_factor, items, p_actions_rx,
5063                                 external, hairpin_flow, error);
5064         if (ret < 0)
5065                 return 0;
5066         if (hairpin_flow > 0) {
5067                 if (hairpin_flow > MLX5_MAX_SPLIT_ACTIONS) {
5068                         rte_errno = EINVAL;
5069                         return 0;
5070                 }
5071                 flow_hairpin_split(dev, actions, actions_rx.actions,
5072                                    actions_hairpin_tx.actions, items_tx.items,
5073                                    &hairpin_id);
5074                 p_actions_rx = actions_rx.actions;
5075         }
5076         flow = mlx5_ipool_zmalloc(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], &idx);
5077         if (!flow) {
5078                 rte_errno = ENOMEM;
5079                 goto error_before_flow;
5080         }
5081         flow->drv_type = flow_get_drv_type(dev, &attr_factor);
5082         if (hairpin_id != 0)
5083                 flow->hairpin_flow_id = hairpin_id;
5084         MLX5_ASSERT(flow->drv_type > MLX5_FLOW_TYPE_MIN &&
5085                     flow->drv_type < MLX5_FLOW_TYPE_MAX);
5086         memset(rss_desc, 0, sizeof(*rss_desc));
5087         rss = flow_get_rss_action(p_actions_rx);
5088         if (rss) {
5089                 /*
5090                  * The following information is required by
5091                  * mlx5_flow_hashfields_adjust() in advance.
5092                  */
5093                 rss_desc->level = rss->level;
5094                 /* RSS type 0 indicates default RSS type (ETH_RSS_IP). */
5095                 rss_desc->types = !rss->types ? ETH_RSS_IP : rss->types;
5096         }
5097         flow->dev_handles = 0;
5098         if (rss && rss->types) {
5099                 unsigned int graph_root;
5100
5101                 graph_root = find_graph_root(items, rss->level);
5102                 ret = mlx5_flow_expand_rss(buf, sizeof(expand_buffer.buffer),
5103                                            items, rss->types,
5104                                            mlx5_support_expansion, graph_root);
5105                 MLX5_ASSERT(ret > 0 &&
5106                        (unsigned int)ret < sizeof(expand_buffer.buffer));
5107         } else {
5108                 buf->entries = 1;
5109                 buf->entry[0].pattern = (void *)(uintptr_t)items;
5110         }
5111         /*
5112          * Record the start index when there is a nested call. All sub-flows
5113          * need to be translated before another calling.
5114          * No need to use ping-pong buffer to save memory here.
5115          */
5116         if (priv->flow_idx) {
5117                 MLX5_ASSERT(!priv->flow_nested_idx);
5118                 priv->flow_nested_idx = priv->flow_idx;
5119         }
5120         for (i = 0; i < buf->entries; ++i) {
5121                 /*
5122                  * The splitter may create multiple dev_flows,
5123                  * depending on configuration. In the simplest
5124                  * case it just creates unmodified original flow.
5125                  */
5126                 ret = flow_create_split_outer(dev, flow, &attr_factor,
5127                                               buf->entry[i].pattern,
5128                                               p_actions_rx, external, idx,
5129                                               error);
5130                 if (ret < 0)
5131                         goto error;
5132         }
5133         /* Create the tx flow. */
5134         if (hairpin_flow) {
5135                 attr_tx.group = MLX5_HAIRPIN_TX_TABLE;
5136                 attr_tx.ingress = 0;
5137                 attr_tx.egress = 1;
5138                 dev_flow = flow_drv_prepare(dev, flow, &attr_tx, items_tx.items,
5139                                          actions_hairpin_tx.actions,
5140                                          idx, error);
5141                 if (!dev_flow)
5142                         goto error;
5143                 dev_flow->flow = flow;
5144                 dev_flow->external = 0;
5145                 SILIST_INSERT(&flow->dev_handles, dev_flow->handle_idx,
5146                               dev_flow->handle, next);
5147                 ret = flow_drv_translate(dev, dev_flow, &attr_tx,
5148                                          items_tx.items,
5149                                          actions_hairpin_tx.actions, error);
5150                 if (ret < 0)
5151                         goto error;
5152         }
5153         /*
5154          * Update the metadata register copy table. If extensive
5155          * metadata feature is enabled and registers are supported
5156          * we might create the extra rte_flow for each unique
5157          * MARK/FLAG action ID.
5158          *
5159          * The table is updated for ingress Flows only, because
5160          * the egress Flows belong to the different device and
5161          * copy table should be updated in peer NIC Rx domain.
5162          */
5163         if (attr_factor.ingress &&
5164             (external || attr_factor.group != MLX5_FLOW_MREG_CP_TABLE_GROUP)) {
5165                 ret = flow_mreg_update_copy_table(dev, flow, actions, error);
5166                 if (ret)
5167                         goto error;
5168         }
5169         /*
5170          * If the flow is external (from application) OR device is started, then
5171          * the flow will be applied immediately.
5172          */
5173         if (external || dev->data->dev_started) {
5174                 ret = flow_drv_apply(dev, flow, error);
5175                 if (ret < 0)
5176                         goto error;
5177         }
5178         if (list)
5179                 ILIST_INSERT(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], list, idx,
5180                              flow, next);
5181         flow_rxq_flags_set(dev, flow);
5182         /* Nested flow creation index recovery. */
5183         priv->flow_idx = priv->flow_nested_idx;
5184         if (priv->flow_nested_idx)
5185                 priv->flow_nested_idx = 0;
5186         return idx;
5187 error:
5188         MLX5_ASSERT(flow);
5189         ret = rte_errno; /* Save rte_errno before cleanup. */
5190         flow_mreg_del_copy_action(dev, flow);
5191         flow_drv_destroy(dev, flow);
5192         mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], idx);
5193         rte_errno = ret; /* Restore rte_errno. */
5194 error_before_flow:
5195         ret = rte_errno;
5196         if (hairpin_id)
5197                 mlx5_flow_id_release(priv->sh->flow_id_pool,
5198                                      hairpin_id);
5199         rte_errno = ret;
5200         priv->flow_idx = priv->flow_nested_idx;
5201         if (priv->flow_nested_idx)
5202                 priv->flow_nested_idx = 0;
5203         return 0;
5204 }
5205
5206 /**
5207  * Create a dedicated flow rule on e-switch table 0 (root table), to direct all
5208  * incoming packets to table 1.
5209  *
5210  * Other flow rules, requested for group n, will be created in
5211  * e-switch table n+1.
5212  * Jump action to e-switch group n will be created to group n+1.
5213  *
5214  * Used when working in switchdev mode, to utilise advantages of table 1
5215  * and above.
5216  *
5217  * @param dev
5218  *   Pointer to Ethernet device.
5219  *
5220  * @return
5221  *   Pointer to flow on success, NULL otherwise and rte_errno is set.
5222  */
5223 struct rte_flow *
5224 mlx5_flow_create_esw_table_zero_flow(struct rte_eth_dev *dev)
5225 {
5226         const struct rte_flow_attr attr = {
5227                 .group = 0,
5228                 .priority = 0,
5229                 .ingress = 1,
5230                 .egress = 0,
5231                 .transfer = 1,
5232         };
5233         const struct rte_flow_item pattern = {
5234                 .type = RTE_FLOW_ITEM_TYPE_END,
5235         };
5236         struct rte_flow_action_jump jump = {
5237                 .group = 1,
5238         };
5239         const struct rte_flow_action actions[] = {
5240                 {
5241                         .type = RTE_FLOW_ACTION_TYPE_JUMP,
5242                         .conf = &jump,
5243                 },
5244                 {
5245                         .type = RTE_FLOW_ACTION_TYPE_END,
5246                 },
5247         };
5248         struct mlx5_priv *priv = dev->data->dev_private;
5249         struct rte_flow_error error;
5250
5251         return (void *)(uintptr_t)flow_list_create(dev, &priv->ctrl_flows,
5252                                                    &attr, &pattern,
5253                                                    actions, false, &error);
5254 }
5255
5256 /**
5257  * Validate a flow supported by the NIC.
5258  *
5259  * @see rte_flow_validate()
5260  * @see rte_flow_ops
5261  */
5262 int
5263 mlx5_flow_validate(struct rte_eth_dev *dev,
5264                    const struct rte_flow_attr *attr,
5265                    const struct rte_flow_item items[],
5266                    const struct rte_flow_action actions[],
5267                    struct rte_flow_error *error)
5268 {
5269         int hairpin_flow;
5270
5271         hairpin_flow = flow_check_hairpin_split(dev, attr, actions);
5272         return flow_drv_validate(dev, attr, items, actions,
5273                                 true, hairpin_flow, error);
5274 }
5275
5276 /**
5277  * Create a flow.
5278  *
5279  * @see rte_flow_create()
5280  * @see rte_flow_ops
5281  */
5282 struct rte_flow *
5283 mlx5_flow_create(struct rte_eth_dev *dev,
5284                  const struct rte_flow_attr *attr,
5285                  const struct rte_flow_item items[],
5286                  const struct rte_flow_action actions[],
5287                  struct rte_flow_error *error)
5288 {
5289         struct mlx5_priv *priv = dev->data->dev_private;
5290
5291         /*
5292          * If the device is not started yet, it is not allowed to created a
5293          * flow from application. PMD default flows and traffic control flows
5294          * are not affected.
5295          */
5296         if (unlikely(!dev->data->dev_started)) {
5297                 DRV_LOG(DEBUG, "port %u is not started when "
5298                         "inserting a flow", dev->data->port_id);
5299                 rte_flow_error_set(error, ENODEV,
5300                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
5301                                    NULL,
5302                                    "port not started");
5303                 return NULL;
5304         }
5305         return (void *)(uintptr_t)flow_list_create(dev, &priv->flows,
5306                                   attr, items, actions, true, error);
5307 }
5308
5309 /**
5310  * Destroy a flow in a list.
5311  *
5312  * @param dev
5313  *   Pointer to Ethernet device.
5314  * @param list
5315  *   Pointer to the Indexed flow list. If this parameter NULL,
5316  *   there is no flow removal from the list. Be noted that as
5317  *   flow is add to the indexed list, memory of the indexed
5318  *   list points to maybe changed as flow destroyed.
5319  * @param[in] flow_idx
5320  *   Index of flow to destroy.
5321  */
5322 static void
5323 flow_list_destroy(struct rte_eth_dev *dev, uint32_t *list,
5324                   uint32_t flow_idx)
5325 {
5326         struct mlx5_priv *priv = dev->data->dev_private;
5327         struct mlx5_fdir_flow *priv_fdir_flow = NULL;
5328         struct rte_flow *flow = mlx5_ipool_get(priv->sh->ipool
5329                                                [MLX5_IPOOL_RTE_FLOW], flow_idx);
5330
5331         if (!flow)
5332                 return;
5333         /*
5334          * Update RX queue flags only if port is started, otherwise it is
5335          * already clean.
5336          */
5337         if (dev->data->dev_started)
5338                 flow_rxq_flags_trim(dev, flow);
5339         if (flow->hairpin_flow_id)
5340                 mlx5_flow_id_release(priv->sh->flow_id_pool,
5341                                      flow->hairpin_flow_id);
5342         flow_drv_destroy(dev, flow);
5343         if (list)
5344                 ILIST_REMOVE(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], list,
5345                              flow_idx, flow, next);
5346         flow_mreg_del_copy_action(dev, flow);
5347         if (flow->fdir) {
5348                 LIST_FOREACH(priv_fdir_flow, &priv->fdir_flows, next) {
5349                         if (priv_fdir_flow->rix_flow == flow_idx)
5350                                 break;
5351                 }
5352                 if (priv_fdir_flow) {
5353                         LIST_REMOVE(priv_fdir_flow, next);
5354                         mlx5_free(priv_fdir_flow->fdir);
5355                         mlx5_free(priv_fdir_flow);
5356                 }
5357         }
5358         mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], flow_idx);
5359 }
5360
5361 /**
5362  * Destroy all flows.
5363  *
5364  * @param dev
5365  *   Pointer to Ethernet device.
5366  * @param list
5367  *   Pointer to the Indexed flow list.
5368  * @param active
5369  *   If flushing is called avtively.
5370  */
5371 void
5372 mlx5_flow_list_flush(struct rte_eth_dev *dev, uint32_t *list, bool active)
5373 {
5374         uint32_t num_flushed = 0;
5375
5376         while (*list) {
5377                 flow_list_destroy(dev, list, *list);
5378                 num_flushed++;
5379         }
5380         if (active) {
5381                 DRV_LOG(INFO, "port %u: %u flows flushed before stopping",
5382                         dev->data->port_id, num_flushed);
5383         }
5384 }
5385
5386 /**
5387  * Remove all flows.
5388  *
5389  * @param dev
5390  *   Pointer to Ethernet device.
5391  * @param list
5392  *   Pointer to the Indexed flow list.
5393  */
5394 void
5395 mlx5_flow_stop(struct rte_eth_dev *dev, uint32_t *list)
5396 {
5397         struct mlx5_priv *priv = dev->data->dev_private;
5398         struct rte_flow *flow = NULL;
5399         uint32_t idx;
5400
5401         ILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], *list, idx,
5402                       flow, next) {
5403                 flow_drv_remove(dev, flow);
5404                 flow_mreg_stop_copy_action(dev, flow);
5405         }
5406         flow_mreg_del_default_copy_action(dev);
5407         flow_rxq_flags_clear(dev);
5408 }
5409
5410 /**
5411  * Add all flows.
5412  *
5413  * @param dev
5414  *   Pointer to Ethernet device.
5415  * @param list
5416  *   Pointer to the Indexed flow list.
5417  *
5418  * @return
5419  *   0 on success, a negative errno value otherwise and rte_errno is set.
5420  */
5421 int
5422 mlx5_flow_start(struct rte_eth_dev *dev, uint32_t *list)
5423 {
5424         struct mlx5_priv *priv = dev->data->dev_private;
5425         struct rte_flow *flow = NULL;
5426         struct rte_flow_error error;
5427         uint32_t idx;
5428         int ret = 0;
5429
5430         /* Make sure default copy action (reg_c[0] -> reg_b) is created. */
5431         ret = flow_mreg_add_default_copy_action(dev, &error);
5432         if (ret < 0)
5433                 return -rte_errno;
5434         /* Apply Flows created by application. */
5435         ILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], *list, idx,
5436                       flow, next) {
5437                 ret = flow_mreg_start_copy_action(dev, flow);
5438                 if (ret < 0)
5439                         goto error;
5440                 ret = flow_drv_apply(dev, flow, &error);
5441                 if (ret < 0)
5442                         goto error;
5443                 flow_rxq_flags_set(dev, flow);
5444         }
5445         return 0;
5446 error:
5447         ret = rte_errno; /* Save rte_errno before cleanup. */
5448         mlx5_flow_stop(dev, list);
5449         rte_errno = ret; /* Restore rte_errno. */
5450         return -rte_errno;
5451 }
5452
5453 /**
5454  * Stop all default actions for flows.
5455  *
5456  * @param dev
5457  *   Pointer to Ethernet device.
5458  */
5459 void
5460 mlx5_flow_stop_default(struct rte_eth_dev *dev)
5461 {
5462         flow_mreg_del_default_copy_action(dev);
5463         flow_rxq_flags_clear(dev);
5464 }
5465
5466 /**
5467  * Start all default actions for flows.
5468  *
5469  * @param dev
5470  *   Pointer to Ethernet device.
5471  * @return
5472  *   0 on success, a negative errno value otherwise and rte_errno is set.
5473  */
5474 int
5475 mlx5_flow_start_default(struct rte_eth_dev *dev)
5476 {
5477         struct rte_flow_error error;
5478
5479         /* Make sure default copy action (reg_c[0] -> reg_b) is created. */
5480         return flow_mreg_add_default_copy_action(dev, &error);
5481 }
5482
5483 /**
5484  * Allocate intermediate resources for flow creation.
5485  *
5486  * @param dev
5487  *   Pointer to Ethernet device.
5488  */
5489 void
5490 mlx5_flow_alloc_intermediate(struct rte_eth_dev *dev)
5491 {
5492         struct mlx5_priv *priv = dev->data->dev_private;
5493
5494         if (!priv->inter_flows) {
5495                 priv->inter_flows = mlx5_malloc(MLX5_MEM_ZERO,
5496                                     MLX5_NUM_MAX_DEV_FLOWS *
5497                                     sizeof(struct mlx5_flow) +
5498                                     (sizeof(struct mlx5_flow_rss_desc) +
5499                                     sizeof(uint16_t) * UINT16_MAX) * 2, 0,
5500                                     SOCKET_ID_ANY);
5501                 if (!priv->inter_flows) {
5502                         DRV_LOG(ERR, "can't allocate intermediate memory.");
5503                         return;
5504                 }
5505         }
5506         priv->rss_desc = &((struct mlx5_flow *)priv->inter_flows)
5507                          [MLX5_NUM_MAX_DEV_FLOWS];
5508         /* Reset the index. */
5509         priv->flow_idx = 0;
5510         priv->flow_nested_idx = 0;
5511 }
5512
5513 /**
5514  * Free intermediate resources for flows.
5515  *
5516  * @param dev
5517  *   Pointer to Ethernet device.
5518  */
5519 void
5520 mlx5_flow_free_intermediate(struct rte_eth_dev *dev)
5521 {
5522         struct mlx5_priv *priv = dev->data->dev_private;
5523
5524         mlx5_free(priv->inter_flows);
5525         priv->inter_flows = NULL;
5526 }
5527
5528 /**
5529  * Verify the flow list is empty
5530  *
5531  * @param dev
5532  *  Pointer to Ethernet device.
5533  *
5534  * @return the number of flows not released.
5535  */
5536 int
5537 mlx5_flow_verify(struct rte_eth_dev *dev)
5538 {
5539         struct mlx5_priv *priv = dev->data->dev_private;
5540         struct rte_flow *flow;
5541         uint32_t idx;
5542         int ret = 0;
5543
5544         ILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], priv->flows, idx,
5545                       flow, next) {
5546                 DRV_LOG(DEBUG, "port %u flow %p still referenced",
5547                         dev->data->port_id, (void *)flow);
5548                 ++ret;
5549         }
5550         return ret;
5551 }
5552
5553 /**
5554  * Enable default hairpin egress flow.
5555  *
5556  * @param dev
5557  *   Pointer to Ethernet device.
5558  * @param queue
5559  *   The queue index.
5560  *
5561  * @return
5562  *   0 on success, a negative errno value otherwise and rte_errno is set.
5563  */
5564 int
5565 mlx5_ctrl_flow_source_queue(struct rte_eth_dev *dev,
5566                             uint32_t queue)
5567 {
5568         struct mlx5_priv *priv = dev->data->dev_private;
5569         const struct rte_flow_attr attr = {
5570                 .egress = 1,
5571                 .priority = 0,
5572         };
5573         struct mlx5_rte_flow_item_tx_queue queue_spec = {
5574                 .queue = queue,
5575         };
5576         struct mlx5_rte_flow_item_tx_queue queue_mask = {
5577                 .queue = UINT32_MAX,
5578         };
5579         struct rte_flow_item items[] = {
5580                 {
5581                         .type = (enum rte_flow_item_type)
5582                                 MLX5_RTE_FLOW_ITEM_TYPE_TX_QUEUE,
5583                         .spec = &queue_spec,
5584                         .last = NULL,
5585                         .mask = &queue_mask,
5586                 },
5587                 {
5588                         .type = RTE_FLOW_ITEM_TYPE_END,
5589                 },
5590         };
5591         struct rte_flow_action_jump jump = {
5592                 .group = MLX5_HAIRPIN_TX_TABLE,
5593         };
5594         struct rte_flow_action actions[2];
5595         uint32_t flow_idx;
5596         struct rte_flow_error error;
5597
5598         actions[0].type = RTE_FLOW_ACTION_TYPE_JUMP;
5599         actions[0].conf = &jump;
5600         actions[1].type = RTE_FLOW_ACTION_TYPE_END;
5601         flow_idx = flow_list_create(dev, &priv->ctrl_flows,
5602                                 &attr, items, actions, false, &error);
5603         if (!flow_idx) {
5604                 DRV_LOG(DEBUG,
5605                         "Failed to create ctrl flow: rte_errno(%d),"
5606                         " type(%d), message(%s)",
5607                         rte_errno, error.type,
5608                         error.message ? error.message : " (no stated reason)");
5609                 return -rte_errno;
5610         }
5611         return 0;
5612 }
5613
5614 /**
5615  * Enable a control flow configured from the control plane.
5616  *
5617  * @param dev
5618  *   Pointer to Ethernet device.
5619  * @param eth_spec
5620  *   An Ethernet flow spec to apply.
5621  * @param eth_mask
5622  *   An Ethernet flow mask to apply.
5623  * @param vlan_spec
5624  *   A VLAN flow spec to apply.
5625  * @param vlan_mask
5626  *   A VLAN flow mask to apply.
5627  *
5628  * @return
5629  *   0 on success, a negative errno value otherwise and rte_errno is set.
5630  */
5631 int
5632 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
5633                     struct rte_flow_item_eth *eth_spec,
5634                     struct rte_flow_item_eth *eth_mask,
5635                     struct rte_flow_item_vlan *vlan_spec,
5636                     struct rte_flow_item_vlan *vlan_mask)
5637 {
5638         struct mlx5_priv *priv = dev->data->dev_private;
5639         const struct rte_flow_attr attr = {
5640                 .ingress = 1,
5641                 .priority = MLX5_FLOW_PRIO_RSVD,
5642         };
5643         struct rte_flow_item items[] = {
5644                 {
5645                         .type = RTE_FLOW_ITEM_TYPE_ETH,
5646                         .spec = eth_spec,
5647                         .last = NULL,
5648                         .mask = eth_mask,
5649                 },
5650                 {
5651                         .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
5652                                               RTE_FLOW_ITEM_TYPE_END,
5653                         .spec = vlan_spec,
5654                         .last = NULL,
5655                         .mask = vlan_mask,
5656                 },
5657                 {
5658                         .type = RTE_FLOW_ITEM_TYPE_END,
5659                 },
5660         };
5661         uint16_t queue[priv->reta_idx_n];
5662         struct rte_flow_action_rss action_rss = {
5663                 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
5664                 .level = 0,
5665                 .types = priv->rss_conf.rss_hf,
5666                 .key_len = priv->rss_conf.rss_key_len,
5667                 .queue_num = priv->reta_idx_n,
5668                 .key = priv->rss_conf.rss_key,
5669                 .queue = queue,
5670         };
5671         struct rte_flow_action actions[] = {
5672                 {
5673                         .type = RTE_FLOW_ACTION_TYPE_RSS,
5674                         .conf = &action_rss,
5675                 },
5676                 {
5677                         .type = RTE_FLOW_ACTION_TYPE_END,
5678                 },
5679         };
5680         uint32_t flow_idx;
5681         struct rte_flow_error error;
5682         unsigned int i;
5683
5684         if (!priv->reta_idx_n || !priv->rxqs_n) {
5685                 return 0;
5686         }
5687         if (!(dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG))
5688                 action_rss.types = 0;
5689         for (i = 0; i != priv->reta_idx_n; ++i)
5690                 queue[i] = (*priv->reta_idx)[i];
5691         flow_idx = flow_list_create(dev, &priv->ctrl_flows,
5692                                 &attr, items, actions, false, &error);
5693         if (!flow_idx)
5694                 return -rte_errno;
5695         return 0;
5696 }
5697
5698 /**
5699  * Enable a flow control configured from the control plane.
5700  *
5701  * @param dev
5702  *   Pointer to Ethernet device.
5703  * @param eth_spec
5704  *   An Ethernet flow spec to apply.
5705  * @param eth_mask
5706  *   An Ethernet flow mask to apply.
5707  *
5708  * @return
5709  *   0 on success, a negative errno value otherwise and rte_errno is set.
5710  */
5711 int
5712 mlx5_ctrl_flow(struct rte_eth_dev *dev,
5713                struct rte_flow_item_eth *eth_spec,
5714                struct rte_flow_item_eth *eth_mask)
5715 {
5716         return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
5717 }
5718
5719 /**
5720  * Create default miss flow rule matching lacp traffic
5721  *
5722  * @param dev
5723  *   Pointer to Ethernet device.
5724  * @param eth_spec
5725  *   An Ethernet flow spec to apply.
5726  *
5727  * @return
5728  *   0 on success, a negative errno value otherwise and rte_errno is set.
5729  */
5730 int
5731 mlx5_flow_lacp_miss(struct rte_eth_dev *dev)
5732 {
5733         struct mlx5_priv *priv = dev->data->dev_private;
5734         /*
5735          * The LACP matching is done by only using ether type since using
5736          * a multicast dst mac causes kernel to give low priority to this flow.
5737          */
5738         static const struct rte_flow_item_eth lacp_spec = {
5739                 .type = RTE_BE16(0x8809),
5740         };
5741         static const struct rte_flow_item_eth lacp_mask = {
5742                 .type = 0xffff,
5743         };
5744         const struct rte_flow_attr attr = {
5745                 .ingress = 1,
5746         };
5747         struct rte_flow_item items[] = {
5748                 {
5749                         .type = RTE_FLOW_ITEM_TYPE_ETH,
5750                         .spec = &lacp_spec,
5751                         .mask = &lacp_mask,
5752                 },
5753                 {
5754                         .type = RTE_FLOW_ITEM_TYPE_END,
5755                 },
5756         };
5757         struct rte_flow_action actions[] = {
5758                 {
5759                         .type = (enum rte_flow_action_type)
5760                                 MLX5_RTE_FLOW_ACTION_TYPE_DEFAULT_MISS,
5761                 },
5762                 {
5763                         .type = RTE_FLOW_ACTION_TYPE_END,
5764                 },
5765         };
5766         struct rte_flow_error error;
5767         uint32_t flow_idx = flow_list_create(dev, &priv->ctrl_flows,
5768                                 &attr, items, actions, false, &error);
5769
5770         if (!flow_idx)
5771                 return -rte_errno;
5772         return 0;
5773 }
5774
5775 /**
5776  * Destroy a flow.
5777  *
5778  * @see rte_flow_destroy()
5779  * @see rte_flow_ops
5780  */
5781 int
5782 mlx5_flow_destroy(struct rte_eth_dev *dev,
5783                   struct rte_flow *flow,
5784                   struct rte_flow_error *error __rte_unused)
5785 {
5786         struct mlx5_priv *priv = dev->data->dev_private;
5787
5788         flow_list_destroy(dev, &priv->flows, (uintptr_t)(void *)flow);
5789         return 0;
5790 }
5791
5792 /**
5793  * Destroy all flows.
5794  *
5795  * @see rte_flow_flush()
5796  * @see rte_flow_ops
5797  */
5798 int
5799 mlx5_flow_flush(struct rte_eth_dev *dev,
5800                 struct rte_flow_error *error __rte_unused)
5801 {
5802         struct mlx5_priv *priv = dev->data->dev_private;
5803
5804         mlx5_flow_list_flush(dev, &priv->flows, false);
5805         return 0;
5806 }
5807
5808 /**
5809  * Isolated mode.
5810  *
5811  * @see rte_flow_isolate()
5812  * @see rte_flow_ops
5813  */
5814 int
5815 mlx5_flow_isolate(struct rte_eth_dev *dev,
5816                   int enable,
5817                   struct rte_flow_error *error)
5818 {
5819         struct mlx5_priv *priv = dev->data->dev_private;
5820
5821         if (dev->data->dev_started) {
5822                 rte_flow_error_set(error, EBUSY,
5823                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
5824                                    NULL,
5825                                    "port must be stopped first");
5826                 return -rte_errno;
5827         }
5828         priv->isolated = !!enable;
5829         if (enable)
5830                 dev->dev_ops = &mlx5_os_dev_ops_isolate;
5831         else
5832                 dev->dev_ops = &mlx5_os_dev_ops;
5833
5834         dev->rx_descriptor_status = mlx5_rx_descriptor_status;
5835         dev->tx_descriptor_status = mlx5_tx_descriptor_status;
5836
5837         return 0;
5838 }
5839
5840 /**
5841  * Query a flow.
5842  *
5843  * @see rte_flow_query()
5844  * @see rte_flow_ops
5845  */
5846 static int
5847 flow_drv_query(struct rte_eth_dev *dev,
5848                uint32_t flow_idx,
5849                const struct rte_flow_action *actions,
5850                void *data,
5851                struct rte_flow_error *error)
5852 {
5853         struct mlx5_priv *priv = dev->data->dev_private;
5854         const struct mlx5_flow_driver_ops *fops;
5855         struct rte_flow *flow = mlx5_ipool_get(priv->sh->ipool
5856                                                [MLX5_IPOOL_RTE_FLOW],
5857                                                flow_idx);
5858         enum mlx5_flow_drv_type ftype;
5859
5860         if (!flow) {
5861                 return rte_flow_error_set(error, ENOENT,
5862                           RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
5863                           NULL,
5864                           "invalid flow handle");
5865         }
5866         ftype = flow->drv_type;
5867         MLX5_ASSERT(ftype > MLX5_FLOW_TYPE_MIN && ftype < MLX5_FLOW_TYPE_MAX);
5868         fops = flow_get_drv_ops(ftype);
5869
5870         return fops->query(dev, flow, actions, data, error);
5871 }
5872
5873 /**
5874  * Query a flow.
5875  *
5876  * @see rte_flow_query()
5877  * @see rte_flow_ops
5878  */
5879 int
5880 mlx5_flow_query(struct rte_eth_dev *dev,
5881                 struct rte_flow *flow,
5882                 const struct rte_flow_action *actions,
5883                 void *data,
5884                 struct rte_flow_error *error)
5885 {
5886         int ret;
5887
5888         ret = flow_drv_query(dev, (uintptr_t)(void *)flow, actions, data,
5889                              error);
5890         if (ret < 0)
5891                 return ret;
5892         return 0;
5893 }
5894
5895 /**
5896  * Convert a flow director filter to a generic flow.
5897  *
5898  * @param dev
5899  *   Pointer to Ethernet device.
5900  * @param fdir_filter
5901  *   Flow director filter to add.
5902  * @param attributes
5903  *   Generic flow parameters structure.
5904  *
5905  * @return
5906  *   0 on success, a negative errno value otherwise and rte_errno is set.
5907  */
5908 static int
5909 flow_fdir_filter_convert(struct rte_eth_dev *dev,
5910                          const struct rte_eth_fdir_filter *fdir_filter,
5911                          struct mlx5_fdir *attributes)
5912 {
5913         struct mlx5_priv *priv = dev->data->dev_private;
5914         const struct rte_eth_fdir_input *input = &fdir_filter->input;
5915         const struct rte_eth_fdir_masks *mask =
5916                 &dev->data->dev_conf.fdir_conf.mask;
5917
5918         /* Validate queue number. */
5919         if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
5920                 DRV_LOG(ERR, "port %u invalid queue number %d",
5921                         dev->data->port_id, fdir_filter->action.rx_queue);
5922                 rte_errno = EINVAL;
5923                 return -rte_errno;
5924         }
5925         attributes->attr.ingress = 1;
5926         attributes->items[0] = (struct rte_flow_item) {
5927                 .type = RTE_FLOW_ITEM_TYPE_ETH,
5928                 .spec = &attributes->l2,
5929                 .mask = &attributes->l2_mask,
5930         };
5931         switch (fdir_filter->action.behavior) {
5932         case RTE_ETH_FDIR_ACCEPT:
5933                 attributes->actions[0] = (struct rte_flow_action){
5934                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
5935                         .conf = &attributes->queue,
5936                 };
5937                 break;
5938         case RTE_ETH_FDIR_REJECT:
5939                 attributes->actions[0] = (struct rte_flow_action){
5940                         .type = RTE_FLOW_ACTION_TYPE_DROP,
5941                 };
5942                 break;
5943         default:
5944                 DRV_LOG(ERR, "port %u invalid behavior %d",
5945                         dev->data->port_id,
5946                         fdir_filter->action.behavior);
5947                 rte_errno = ENOTSUP;
5948                 return -rte_errno;
5949         }
5950         attributes->queue.index = fdir_filter->action.rx_queue;
5951         /* Handle L3. */
5952         switch (fdir_filter->input.flow_type) {
5953         case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
5954         case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
5955         case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
5956                 attributes->l3.ipv4.hdr = (struct rte_ipv4_hdr){
5957                         .src_addr = input->flow.ip4_flow.src_ip,
5958                         .dst_addr = input->flow.ip4_flow.dst_ip,
5959                         .time_to_live = input->flow.ip4_flow.ttl,
5960                         .type_of_service = input->flow.ip4_flow.tos,
5961                 };
5962                 attributes->l3_mask.ipv4.hdr = (struct rte_ipv4_hdr){
5963                         .src_addr = mask->ipv4_mask.src_ip,
5964                         .dst_addr = mask->ipv4_mask.dst_ip,
5965                         .time_to_live = mask->ipv4_mask.ttl,
5966                         .type_of_service = mask->ipv4_mask.tos,
5967                         .next_proto_id = mask->ipv4_mask.proto,
5968                 };
5969                 attributes->items[1] = (struct rte_flow_item){
5970                         .type = RTE_FLOW_ITEM_TYPE_IPV4,
5971                         .spec = &attributes->l3,
5972                         .mask = &attributes->l3_mask,
5973                 };
5974                 break;
5975         case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
5976         case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
5977         case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
5978                 attributes->l3.ipv6.hdr = (struct rte_ipv6_hdr){
5979                         .hop_limits = input->flow.ipv6_flow.hop_limits,
5980                         .proto = input->flow.ipv6_flow.proto,
5981                 };
5982
5983                 memcpy(attributes->l3.ipv6.hdr.src_addr,
5984                        input->flow.ipv6_flow.src_ip,
5985                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
5986                 memcpy(attributes->l3.ipv6.hdr.dst_addr,
5987                        input->flow.ipv6_flow.dst_ip,
5988                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
5989                 memcpy(attributes->l3_mask.ipv6.hdr.src_addr,
5990                        mask->ipv6_mask.src_ip,
5991                        RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
5992                 memcpy(attributes->l3_mask.ipv6.hdr.dst_addr,
5993                        mask->ipv6_mask.dst_ip,
5994                        RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
5995                 attributes->items[1] = (struct rte_flow_item){
5996                         .type = RTE_FLOW_ITEM_TYPE_IPV6,
5997                         .spec = &attributes->l3,
5998                         .mask = &attributes->l3_mask,
5999                 };
6000                 break;
6001         default:
6002                 DRV_LOG(ERR, "port %u invalid flow type%d",
6003                         dev->data->port_id, fdir_filter->input.flow_type);
6004                 rte_errno = ENOTSUP;
6005                 return -rte_errno;
6006         }
6007         /* Handle L4. */
6008         switch (fdir_filter->input.flow_type) {
6009         case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
6010                 attributes->l4.udp.hdr = (struct rte_udp_hdr){
6011                         .src_port = input->flow.udp4_flow.src_port,
6012                         .dst_port = input->flow.udp4_flow.dst_port,
6013                 };
6014                 attributes->l4_mask.udp.hdr = (struct rte_udp_hdr){
6015                         .src_port = mask->src_port_mask,
6016                         .dst_port = mask->dst_port_mask,
6017                 };
6018                 attributes->items[2] = (struct rte_flow_item){
6019                         .type = RTE_FLOW_ITEM_TYPE_UDP,
6020                         .spec = &attributes->l4,
6021                         .mask = &attributes->l4_mask,
6022                 };
6023                 break;
6024         case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
6025                 attributes->l4.tcp.hdr = (struct rte_tcp_hdr){
6026                         .src_port = input->flow.tcp4_flow.src_port,
6027                         .dst_port = input->flow.tcp4_flow.dst_port,
6028                 };
6029                 attributes->l4_mask.tcp.hdr = (struct rte_tcp_hdr){
6030                         .src_port = mask->src_port_mask,
6031                         .dst_port = mask->dst_port_mask,
6032                 };
6033                 attributes->items[2] = (struct rte_flow_item){
6034                         .type = RTE_FLOW_ITEM_TYPE_TCP,
6035                         .spec = &attributes->l4,
6036                         .mask = &attributes->l4_mask,
6037                 };
6038                 break;
6039         case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
6040                 attributes->l4.udp.hdr = (struct rte_udp_hdr){
6041                         .src_port = input->flow.udp6_flow.src_port,
6042                         .dst_port = input->flow.udp6_flow.dst_port,
6043                 };
6044                 attributes->l4_mask.udp.hdr = (struct rte_udp_hdr){
6045                         .src_port = mask->src_port_mask,
6046                         .dst_port = mask->dst_port_mask,
6047                 };
6048                 attributes->items[2] = (struct rte_flow_item){
6049                         .type = RTE_FLOW_ITEM_TYPE_UDP,
6050                         .spec = &attributes->l4,
6051                         .mask = &attributes->l4_mask,
6052                 };
6053                 break;
6054         case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
6055                 attributes->l4.tcp.hdr = (struct rte_tcp_hdr){
6056                         .src_port = input->flow.tcp6_flow.src_port,
6057                         .dst_port = input->flow.tcp6_flow.dst_port,
6058                 };
6059                 attributes->l4_mask.tcp.hdr = (struct rte_tcp_hdr){
6060                         .src_port = mask->src_port_mask,
6061                         .dst_port = mask->dst_port_mask,
6062                 };
6063                 attributes->items[2] = (struct rte_flow_item){
6064                         .type = RTE_FLOW_ITEM_TYPE_TCP,
6065                         .spec = &attributes->l4,
6066                         .mask = &attributes->l4_mask,
6067                 };
6068                 break;
6069         case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
6070         case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
6071                 break;
6072         default:
6073                 DRV_LOG(ERR, "port %u invalid flow type%d",
6074                         dev->data->port_id, fdir_filter->input.flow_type);
6075                 rte_errno = ENOTSUP;
6076                 return -rte_errno;
6077         }
6078         return 0;
6079 }
6080
6081 #define FLOW_FDIR_CMP(f1, f2, fld) \
6082         memcmp(&(f1)->fld, &(f2)->fld, sizeof(f1->fld))
6083
6084 /**
6085  * Compare two FDIR flows. If items and actions are identical, the two flows are
6086  * regarded as same.
6087  *
6088  * @param dev
6089  *   Pointer to Ethernet device.
6090  * @param f1
6091  *   FDIR flow to compare.
6092  * @param f2
6093  *   FDIR flow to compare.
6094  *
6095  * @return
6096  *   Zero on match, 1 otherwise.
6097  */
6098 static int
6099 flow_fdir_cmp(const struct mlx5_fdir *f1, const struct mlx5_fdir *f2)
6100 {
6101         if (FLOW_FDIR_CMP(f1, f2, attr) ||
6102             FLOW_FDIR_CMP(f1, f2, l2) ||
6103             FLOW_FDIR_CMP(f1, f2, l2_mask) ||
6104             FLOW_FDIR_CMP(f1, f2, l3) ||
6105             FLOW_FDIR_CMP(f1, f2, l3_mask) ||
6106             FLOW_FDIR_CMP(f1, f2, l4) ||
6107             FLOW_FDIR_CMP(f1, f2, l4_mask) ||
6108             FLOW_FDIR_CMP(f1, f2, actions[0].type))
6109                 return 1;
6110         if (f1->actions[0].type == RTE_FLOW_ACTION_TYPE_QUEUE &&
6111             FLOW_FDIR_CMP(f1, f2, queue))
6112                 return 1;
6113         return 0;
6114 }
6115
6116 /**
6117  * Search device flow list to find out a matched FDIR flow.
6118  *
6119  * @param dev
6120  *   Pointer to Ethernet device.
6121  * @param fdir_flow
6122  *   FDIR flow to lookup.
6123  *
6124  * @return
6125  *   Index of flow if found, 0 otherwise.
6126  */
6127 static uint32_t
6128 flow_fdir_filter_lookup(struct rte_eth_dev *dev, struct mlx5_fdir *fdir_flow)
6129 {
6130         struct mlx5_priv *priv = dev->data->dev_private;
6131         uint32_t flow_idx = 0;
6132         struct mlx5_fdir_flow *priv_fdir_flow = NULL;
6133
6134         MLX5_ASSERT(fdir_flow);
6135         LIST_FOREACH(priv_fdir_flow, &priv->fdir_flows, next) {
6136                 if (!flow_fdir_cmp(priv_fdir_flow->fdir, fdir_flow)) {
6137                         DRV_LOG(DEBUG, "port %u found FDIR flow %u",
6138                                 dev->data->port_id, flow_idx);
6139                         flow_idx = priv_fdir_flow->rix_flow;
6140                         break;
6141                 }
6142         }
6143         return flow_idx;
6144 }
6145
6146 /**
6147  * Add new flow director filter and store it in list.
6148  *
6149  * @param dev
6150  *   Pointer to Ethernet device.
6151  * @param fdir_filter
6152  *   Flow director filter to add.
6153  *
6154  * @return
6155  *   0 on success, a negative errno value otherwise and rte_errno is set.
6156  */
6157 static int
6158 flow_fdir_filter_add(struct rte_eth_dev *dev,
6159                      const struct rte_eth_fdir_filter *fdir_filter)
6160 {
6161         struct mlx5_priv *priv = dev->data->dev_private;
6162         struct mlx5_fdir *fdir_flow;
6163         struct rte_flow *flow;
6164         struct mlx5_fdir_flow *priv_fdir_flow = NULL;
6165         uint32_t flow_idx;
6166         int ret;
6167
6168         fdir_flow = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*fdir_flow), 0,
6169                                 SOCKET_ID_ANY);
6170         if (!fdir_flow) {
6171                 rte_errno = ENOMEM;
6172                 return -rte_errno;
6173         }
6174         ret = flow_fdir_filter_convert(dev, fdir_filter, fdir_flow);
6175         if (ret)
6176                 goto error;
6177         flow_idx = flow_fdir_filter_lookup(dev, fdir_flow);
6178         if (flow_idx) {
6179                 rte_errno = EEXIST;
6180                 goto error;
6181         }
6182         priv_fdir_flow = mlx5_malloc(MLX5_MEM_ZERO,
6183                                      sizeof(struct mlx5_fdir_flow),
6184                                      0, SOCKET_ID_ANY);
6185         if (!priv_fdir_flow) {
6186                 rte_errno = ENOMEM;
6187                 goto error;
6188         }
6189         flow_idx = flow_list_create(dev, &priv->flows, &fdir_flow->attr,
6190                                     fdir_flow->items, fdir_flow->actions, true,
6191                                     NULL);
6192         flow = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], flow_idx);
6193         if (!flow)
6194                 goto error;
6195         flow->fdir = 1;
6196         priv_fdir_flow->fdir = fdir_flow;
6197         priv_fdir_flow->rix_flow = flow_idx;
6198         LIST_INSERT_HEAD(&priv->fdir_flows, priv_fdir_flow, next);
6199         DRV_LOG(DEBUG, "port %u created FDIR flow %p",
6200                 dev->data->port_id, (void *)flow);
6201         return 0;
6202 error:
6203         mlx5_free(priv_fdir_flow);
6204         mlx5_free(fdir_flow);
6205         return -rte_errno;
6206 }
6207
6208 /**
6209  * Delete specific filter.
6210  *
6211  * @param dev
6212  *   Pointer to Ethernet device.
6213  * @param fdir_filter
6214  *   Filter to be deleted.
6215  *
6216  * @return
6217  *   0 on success, a negative errno value otherwise and rte_errno is set.
6218  */
6219 static int
6220 flow_fdir_filter_delete(struct rte_eth_dev *dev,
6221                         const struct rte_eth_fdir_filter *fdir_filter)
6222 {
6223         struct mlx5_priv *priv = dev->data->dev_private;
6224         uint32_t flow_idx;
6225         struct mlx5_fdir fdir_flow = {
6226                 .attr.group = 0,
6227         };
6228         struct mlx5_fdir_flow *priv_fdir_flow = NULL;
6229         int ret;
6230
6231         ret = flow_fdir_filter_convert(dev, fdir_filter, &fdir_flow);
6232         if (ret)
6233                 return -rte_errno;
6234         LIST_FOREACH(priv_fdir_flow, &priv->fdir_flows, next) {
6235                 /* Find the fdir in priv list */
6236                 if (!flow_fdir_cmp(priv_fdir_flow->fdir, &fdir_flow))
6237                         break;
6238         }
6239         if (!priv_fdir_flow)
6240                 return 0;
6241         LIST_REMOVE(priv_fdir_flow, next);
6242         flow_idx = priv_fdir_flow->rix_flow;
6243         flow_list_destroy(dev, &priv->flows, flow_idx);
6244         mlx5_free(priv_fdir_flow->fdir);
6245         mlx5_free(priv_fdir_flow);
6246         DRV_LOG(DEBUG, "port %u deleted FDIR flow %u",
6247                 dev->data->port_id, flow_idx);
6248         return 0;
6249 }
6250
6251 /**
6252  * Update queue for specific filter.
6253  *
6254  * @param dev
6255  *   Pointer to Ethernet device.
6256  * @param fdir_filter
6257  *   Filter to be updated.
6258  *
6259  * @return
6260  *   0 on success, a negative errno value otherwise and rte_errno is set.
6261  */
6262 static int
6263 flow_fdir_filter_update(struct rte_eth_dev *dev,
6264                         const struct rte_eth_fdir_filter *fdir_filter)
6265 {
6266         int ret;
6267
6268         ret = flow_fdir_filter_delete(dev, fdir_filter);
6269         if (ret)
6270                 return ret;
6271         return flow_fdir_filter_add(dev, fdir_filter);
6272 }
6273
6274 /**
6275  * Flush all filters.
6276  *
6277  * @param dev
6278  *   Pointer to Ethernet device.
6279  */
6280 static void
6281 flow_fdir_filter_flush(struct rte_eth_dev *dev)
6282 {
6283         struct mlx5_priv *priv = dev->data->dev_private;
6284         struct mlx5_fdir_flow *priv_fdir_flow = NULL;
6285
6286         while (!LIST_EMPTY(&priv->fdir_flows)) {
6287                 priv_fdir_flow = LIST_FIRST(&priv->fdir_flows);
6288                 LIST_REMOVE(priv_fdir_flow, next);
6289                 flow_list_destroy(dev, &priv->flows, priv_fdir_flow->rix_flow);
6290                 mlx5_free(priv_fdir_flow->fdir);
6291                 mlx5_free(priv_fdir_flow);
6292         }
6293 }
6294
6295 /**
6296  * Get flow director information.
6297  *
6298  * @param dev
6299  *   Pointer to Ethernet device.
6300  * @param[out] fdir_info
6301  *   Resulting flow director information.
6302  */
6303 static void
6304 flow_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info)
6305 {
6306         struct rte_eth_fdir_masks *mask =
6307                 &dev->data->dev_conf.fdir_conf.mask;
6308
6309         fdir_info->mode = dev->data->dev_conf.fdir_conf.mode;
6310         fdir_info->guarant_spc = 0;
6311         rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
6312         fdir_info->max_flexpayload = 0;
6313         fdir_info->flow_types_mask[0] = 0;
6314         fdir_info->flex_payload_unit = 0;
6315         fdir_info->max_flex_payload_segment_num = 0;
6316         fdir_info->flex_payload_limit = 0;
6317         memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
6318 }
6319
6320 /**
6321  * Deal with flow director operations.
6322  *
6323  * @param dev
6324  *   Pointer to Ethernet device.
6325  * @param filter_op
6326  *   Operation to perform.
6327  * @param arg
6328  *   Pointer to operation-specific structure.
6329  *
6330  * @return
6331  *   0 on success, a negative errno value otherwise and rte_errno is set.
6332  */
6333 static int
6334 flow_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op,
6335                     void *arg)
6336 {
6337         enum rte_fdir_mode fdir_mode =
6338                 dev->data->dev_conf.fdir_conf.mode;
6339
6340         if (filter_op == RTE_ETH_FILTER_NOP)
6341                 return 0;
6342         if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
6343             fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
6344                 DRV_LOG(ERR, "port %u flow director mode %d not supported",
6345                         dev->data->port_id, fdir_mode);
6346                 rte_errno = EINVAL;
6347                 return -rte_errno;
6348         }
6349         switch (filter_op) {
6350         case RTE_ETH_FILTER_ADD:
6351                 return flow_fdir_filter_add(dev, arg);
6352         case RTE_ETH_FILTER_UPDATE:
6353                 return flow_fdir_filter_update(dev, arg);
6354         case RTE_ETH_FILTER_DELETE:
6355                 return flow_fdir_filter_delete(dev, arg);
6356         case RTE_ETH_FILTER_FLUSH:
6357                 flow_fdir_filter_flush(dev);
6358                 break;
6359         case RTE_ETH_FILTER_INFO:
6360                 flow_fdir_info_get(dev, arg);
6361                 break;
6362         default:
6363                 DRV_LOG(DEBUG, "port %u unknown operation %u",
6364                         dev->data->port_id, filter_op);
6365                 rte_errno = EINVAL;
6366                 return -rte_errno;
6367         }
6368         return 0;
6369 }
6370
6371 /**
6372  * Manage filter operations.
6373  *
6374  * @param dev
6375  *   Pointer to Ethernet device structure.
6376  * @param filter_type
6377  *   Filter type.
6378  * @param filter_op
6379  *   Operation to perform.
6380  * @param arg
6381  *   Pointer to operation-specific structure.
6382  *
6383  * @return
6384  *   0 on success, a negative errno value otherwise and rte_errno is set.
6385  */
6386 int
6387 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
6388                      enum rte_filter_type filter_type,
6389                      enum rte_filter_op filter_op,
6390                      void *arg)
6391 {
6392         switch (filter_type) {
6393         case RTE_ETH_FILTER_GENERIC:
6394                 if (filter_op != RTE_ETH_FILTER_GET) {
6395                         rte_errno = EINVAL;
6396                         return -rte_errno;
6397                 }
6398                 *(const void **)arg = &mlx5_flow_ops;
6399                 return 0;
6400         case RTE_ETH_FILTER_FDIR:
6401                 return flow_fdir_ctrl_func(dev, filter_op, arg);
6402         default:
6403                 DRV_LOG(ERR, "port %u filter type (%d) not supported",
6404                         dev->data->port_id, filter_type);
6405                 rte_errno = ENOTSUP;
6406                 return -rte_errno;
6407         }
6408         return 0;
6409 }
6410
6411 /**
6412  * Create the needed meter and suffix tables.
6413  *
6414  * @param[in] dev
6415  *   Pointer to Ethernet device.
6416  * @param[in] fm
6417  *   Pointer to the flow meter.
6418  *
6419  * @return
6420  *   Pointer to table set on success, NULL otherwise.
6421  */
6422 struct mlx5_meter_domains_infos *
6423 mlx5_flow_create_mtr_tbls(struct rte_eth_dev *dev,
6424                           const struct mlx5_flow_meter *fm)
6425 {
6426         const struct mlx5_flow_driver_ops *fops;
6427
6428         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
6429         return fops->create_mtr_tbls(dev, fm);
6430 }
6431
6432 /**
6433  * Destroy the meter table set.
6434  *
6435  * @param[in] dev
6436  *   Pointer to Ethernet device.
6437  * @param[in] tbl
6438  *   Pointer to the meter table set.
6439  *
6440  * @return
6441  *   0 on success.
6442  */
6443 int
6444 mlx5_flow_destroy_mtr_tbls(struct rte_eth_dev *dev,
6445                            struct mlx5_meter_domains_infos *tbls)
6446 {
6447         const struct mlx5_flow_driver_ops *fops;
6448
6449         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
6450         return fops->destroy_mtr_tbls(dev, tbls);
6451 }
6452
6453 /**
6454  * Create policer rules.
6455  *
6456  * @param[in] dev
6457  *   Pointer to Ethernet device.
6458  * @param[in] fm
6459  *   Pointer to flow meter structure.
6460  * @param[in] attr
6461  *   Pointer to flow attributes.
6462  *
6463  * @return
6464  *   0 on success, -1 otherwise.
6465  */
6466 int
6467 mlx5_flow_create_policer_rules(struct rte_eth_dev *dev,
6468                                struct mlx5_flow_meter *fm,
6469                                const struct rte_flow_attr *attr)
6470 {
6471         const struct mlx5_flow_driver_ops *fops;
6472
6473         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
6474         return fops->create_policer_rules(dev, fm, attr);
6475 }
6476
6477 /**
6478  * Destroy policer rules.
6479  *
6480  * @param[in] fm
6481  *   Pointer to flow meter structure.
6482  * @param[in] attr
6483  *   Pointer to flow attributes.
6484  *
6485  * @return
6486  *   0 on success, -1 otherwise.
6487  */
6488 int
6489 mlx5_flow_destroy_policer_rules(struct rte_eth_dev *dev,
6490                                 struct mlx5_flow_meter *fm,
6491                                 const struct rte_flow_attr *attr)
6492 {
6493         const struct mlx5_flow_driver_ops *fops;
6494
6495         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
6496         return fops->destroy_policer_rules(dev, fm, attr);
6497 }
6498
6499 /**
6500  * Allocate a counter.
6501  *
6502  * @param[in] dev
6503  *   Pointer to Ethernet device structure.
6504  *
6505  * @return
6506  *   Index to allocated counter  on success, 0 otherwise.
6507  */
6508 uint32_t
6509 mlx5_counter_alloc(struct rte_eth_dev *dev)
6510 {
6511         const struct mlx5_flow_driver_ops *fops;
6512         struct rte_flow_attr attr = { .transfer = 0 };
6513
6514         if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
6515                 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
6516                 return fops->counter_alloc(dev);
6517         }
6518         DRV_LOG(ERR,
6519                 "port %u counter allocate is not supported.",
6520                  dev->data->port_id);
6521         return 0;
6522 }
6523
6524 /**
6525  * Free a counter.
6526  *
6527  * @param[in] dev
6528  *   Pointer to Ethernet device structure.
6529  * @param[in] cnt
6530  *   Index to counter to be free.
6531  */
6532 void
6533 mlx5_counter_free(struct rte_eth_dev *dev, uint32_t cnt)
6534 {
6535         const struct mlx5_flow_driver_ops *fops;
6536         struct rte_flow_attr attr = { .transfer = 0 };
6537
6538         if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
6539                 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
6540                 fops->counter_free(dev, cnt);
6541                 return;
6542         }
6543         DRV_LOG(ERR,
6544                 "port %u counter free is not supported.",
6545                  dev->data->port_id);
6546 }
6547
6548 /**
6549  * Query counter statistics.
6550  *
6551  * @param[in] dev
6552  *   Pointer to Ethernet device structure.
6553  * @param[in] cnt
6554  *   Index to counter to query.
6555  * @param[in] clear
6556  *   Set to clear counter statistics.
6557  * @param[out] pkts
6558  *   The counter hits packets number to save.
6559  * @param[out] bytes
6560  *   The counter hits bytes number to save.
6561  *
6562  * @return
6563  *   0 on success, a negative errno value otherwise.
6564  */
6565 int
6566 mlx5_counter_query(struct rte_eth_dev *dev, uint32_t cnt,
6567                    bool clear, uint64_t *pkts, uint64_t *bytes)
6568 {
6569         const struct mlx5_flow_driver_ops *fops;
6570         struct rte_flow_attr attr = { .transfer = 0 };
6571
6572         if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
6573                 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
6574                 return fops->counter_query(dev, cnt, clear, pkts, bytes);
6575         }
6576         DRV_LOG(ERR,
6577                 "port %u counter query is not supported.",
6578                  dev->data->port_id);
6579         return -ENOTSUP;
6580 }
6581
6582 #define MLX5_POOL_QUERY_FREQ_US 1000000
6583
6584 /**
6585  * Get number of all validate pools.
6586  *
6587  * @param[in] sh
6588  *   Pointer to mlx5_dev_ctx_shared object.
6589  *
6590  * @return
6591  *   The number of all validate pools.
6592  */
6593 static uint32_t
6594 mlx5_get_all_valid_pool_count(struct mlx5_dev_ctx_shared *sh)
6595 {
6596         int i;
6597         uint32_t pools_n = 0;
6598
6599         for (i = 0; i < MLX5_CCONT_TYPE_MAX; ++i)
6600                 pools_n += rte_atomic16_read(&sh->cmng.ccont[i].n_valid);
6601         return pools_n;
6602 }
6603
6604 /**
6605  * Set the periodic procedure for triggering asynchronous batch queries for all
6606  * the counter pools.
6607  *
6608  * @param[in] sh
6609  *   Pointer to mlx5_dev_ctx_shared object.
6610  */
6611 void
6612 mlx5_set_query_alarm(struct mlx5_dev_ctx_shared *sh)
6613 {
6614         uint32_t pools_n, us;
6615
6616         pools_n = mlx5_get_all_valid_pool_count(sh);
6617         us = MLX5_POOL_QUERY_FREQ_US / pools_n;
6618         DRV_LOG(DEBUG, "Set alarm for %u pools each %u us", pools_n, us);
6619         if (rte_eal_alarm_set(us, mlx5_flow_query_alarm, sh)) {
6620                 sh->cmng.query_thread_on = 0;
6621                 DRV_LOG(ERR, "Cannot reinitialize query alarm");
6622         } else {
6623                 sh->cmng.query_thread_on = 1;
6624         }
6625 }
6626
6627 /**
6628  * The periodic procedure for triggering asynchronous batch queries for all the
6629  * counter pools. This function is probably called by the host thread.
6630  *
6631  * @param[in] arg
6632  *   The parameter for the alarm process.
6633  */
6634 void
6635 mlx5_flow_query_alarm(void *arg)
6636 {
6637         struct mlx5_dev_ctx_shared *sh = arg;
6638         struct mlx5_devx_obj *dcs;
6639         uint16_t offset;
6640         int ret;
6641         uint8_t batch = sh->cmng.batch;
6642         uint8_t age = sh->cmng.age;
6643         uint16_t pool_index = sh->cmng.pool_index;
6644         struct mlx5_pools_container *cont;
6645         struct mlx5_flow_counter_pool *pool;
6646         int cont_loop = MLX5_CCONT_TYPE_MAX;
6647
6648         if (sh->cmng.pending_queries >= MLX5_MAX_PENDING_QUERIES)
6649                 goto set_alarm;
6650 next_container:
6651         cont = MLX5_CNT_CONTAINER(sh, batch, age);
6652         rte_spinlock_lock(&cont->resize_sl);
6653         if (!cont->pools) {
6654                 rte_spinlock_unlock(&cont->resize_sl);
6655                 /* Check if all the containers are empty. */
6656                 if (unlikely(--cont_loop == 0))
6657                         goto set_alarm;
6658                 batch ^= 0x1;
6659                 pool_index = 0;
6660                 if (batch == 0 && pool_index == 0) {
6661                         age ^= 0x1;
6662                         sh->cmng.batch = batch;
6663                         sh->cmng.age = age;
6664                 }
6665                 goto next_container;
6666         }
6667         pool = cont->pools[pool_index];
6668         rte_spinlock_unlock(&cont->resize_sl);
6669         if (pool->raw_hw)
6670                 /* There is a pool query in progress. */
6671                 goto set_alarm;
6672         pool->raw_hw =
6673                 LIST_FIRST(&sh->cmng.free_stat_raws);
6674         if (!pool->raw_hw)
6675                 /* No free counter statistics raw memory. */
6676                 goto set_alarm;
6677         dcs = (struct mlx5_devx_obj *)(uintptr_t)rte_atomic64_read
6678                                                               (&pool->a64_dcs);
6679         if (dcs->id & (MLX5_CNT_BATCH_QUERY_ID_ALIGNMENT - 1)) {
6680                 /* Pool without valid counter. */
6681                 pool->raw_hw = NULL;
6682                 goto next_pool;
6683         }
6684         offset = batch ? 0 : dcs->id % MLX5_COUNTERS_PER_POOL;
6685         /*
6686          * Identify the counters released between query trigger and query
6687          * handle more effiecntly. The counter released in this gap period
6688          * should wait for a new round of query as the new arrived packets
6689          * will not be taken into account.
6690          */
6691         pool->query_gen++;
6692         ret = mlx5_devx_cmd_flow_counter_query(dcs, 0, MLX5_COUNTERS_PER_POOL -
6693                                                offset, NULL, NULL,
6694                                                pool->raw_hw->mem_mng->dm->id,
6695                                                (void *)(uintptr_t)
6696                                                (pool->raw_hw->data + offset),
6697                                                sh->devx_comp,
6698                                                (uint64_t)(uintptr_t)pool);
6699         if (ret) {
6700                 DRV_LOG(ERR, "Failed to trigger asynchronous query for dcs ID"
6701                         " %d", pool->min_dcs->id);
6702                 pool->raw_hw = NULL;
6703                 goto set_alarm;
6704         }
6705         pool->raw_hw->min_dcs_id = dcs->id;
6706         LIST_REMOVE(pool->raw_hw, next);
6707         sh->cmng.pending_queries++;
6708 next_pool:
6709         pool_index++;
6710         if (pool_index >= rte_atomic16_read(&cont->n_valid)) {
6711                 batch ^= 0x1;
6712                 pool_index = 0;
6713                 if (batch == 0 && pool_index == 0)
6714                         age ^= 0x1;
6715         }
6716 set_alarm:
6717         sh->cmng.batch = batch;
6718         sh->cmng.pool_index = pool_index;
6719         sh->cmng.age = age;
6720         mlx5_set_query_alarm(sh);
6721 }
6722
6723 /**
6724  * Check and callback event for new aged flow in the counter pool
6725  *
6726  * @param[in] sh
6727  *   Pointer to mlx5_dev_ctx_shared object.
6728  * @param[in] pool
6729  *   Pointer to Current counter pool.
6730  */
6731 static void
6732 mlx5_flow_aging_check(struct mlx5_dev_ctx_shared *sh,
6733                    struct mlx5_flow_counter_pool *pool)
6734 {
6735         struct mlx5_priv *priv;
6736         struct mlx5_flow_counter *cnt;
6737         struct mlx5_age_info *age_info;
6738         struct mlx5_age_param *age_param;
6739         struct mlx5_counter_stats_raw *cur = pool->raw_hw;
6740         struct mlx5_counter_stats_raw *prev = pool->raw;
6741         uint16_t curr = rte_rdtsc() / (rte_get_tsc_hz() / 10);
6742         uint32_t i;
6743
6744         for (i = 0; i < MLX5_COUNTERS_PER_POOL; ++i) {
6745                 cnt = MLX5_POOL_GET_CNT(pool, i);
6746                 age_param = MLX5_CNT_TO_AGE(cnt);
6747                 if (rte_atomic16_read(&age_param->state) != AGE_CANDIDATE)
6748                         continue;
6749                 if (cur->data[i].hits != prev->data[i].hits) {
6750                         age_param->expire = curr + age_param->timeout;
6751                         continue;
6752                 }
6753                 if ((uint16_t)(curr - age_param->expire) >= (UINT16_MAX / 2))
6754                         continue;
6755                 /**
6756                  * Hold the lock first, or if between the
6757                  * state AGE_TMOUT and tailq operation the
6758                  * release happened, the release procedure
6759                  * may delete a non-existent tailq node.
6760                  */
6761                 priv = rte_eth_devices[age_param->port_id].data->dev_private;
6762                 age_info = GET_PORT_AGE_INFO(priv);
6763                 rte_spinlock_lock(&age_info->aged_sl);
6764                 /* If the cpmset fails, release happens. */
6765                 if (rte_atomic16_cmpset((volatile uint16_t *)
6766                                         &age_param->state,
6767                                         AGE_CANDIDATE,
6768                                         AGE_TMOUT) ==
6769                                         AGE_CANDIDATE) {
6770                         TAILQ_INSERT_TAIL(&age_info->aged_counters, cnt, next);
6771                         MLX5_AGE_SET(age_info, MLX5_AGE_EVENT_NEW);
6772                 }
6773                 rte_spinlock_unlock(&age_info->aged_sl);
6774         }
6775         for (i = 0; i < sh->max_port; i++) {
6776                 age_info = &sh->port[i].age_info;
6777                 if (!MLX5_AGE_GET(age_info, MLX5_AGE_EVENT_NEW))
6778                         continue;
6779                 if (MLX5_AGE_GET(age_info, MLX5_AGE_TRIGGER))
6780                         rte_eth_dev_callback_process
6781                                 (&rte_eth_devices[sh->port[i].devx_ih_port_id],
6782                                 RTE_ETH_EVENT_FLOW_AGED, NULL);
6783                 age_info->flags = 0;
6784         }
6785 }
6786
6787 /**
6788  * Handler for the HW respond about ready values from an asynchronous batch
6789  * query. This function is probably called by the host thread.
6790  *
6791  * @param[in] sh
6792  *   The pointer to the shared device context.
6793  * @param[in] async_id
6794  *   The Devx async ID.
6795  * @param[in] status
6796  *   The status of the completion.
6797  */
6798 void
6799 mlx5_flow_async_pool_query_handle(struct mlx5_dev_ctx_shared *sh,
6800                                   uint64_t async_id, int status)
6801 {
6802         struct mlx5_flow_counter_pool *pool =
6803                 (struct mlx5_flow_counter_pool *)(uintptr_t)async_id;
6804         struct mlx5_counter_stats_raw *raw_to_free;
6805         uint8_t age = !!IS_AGE_POOL(pool);
6806         uint8_t query_gen = pool->query_gen ^ 1;
6807         struct mlx5_pools_container *cont =
6808                 MLX5_CNT_CONTAINER(sh, !IS_EXT_POOL(pool), age);
6809
6810         if (unlikely(status)) {
6811                 raw_to_free = pool->raw_hw;
6812         } else {
6813                 raw_to_free = pool->raw;
6814                 if (IS_AGE_POOL(pool))
6815                         mlx5_flow_aging_check(sh, pool);
6816                 rte_spinlock_lock(&pool->sl);
6817                 pool->raw = pool->raw_hw;
6818                 rte_spinlock_unlock(&pool->sl);
6819                 /* Be sure the new raw counters data is updated in memory. */
6820                 rte_io_wmb();
6821                 if (!TAILQ_EMPTY(&pool->counters[query_gen])) {
6822                         rte_spinlock_lock(&cont->csl);
6823                         TAILQ_CONCAT(&cont->counters,
6824                                      &pool->counters[query_gen], next);
6825                         rte_spinlock_unlock(&cont->csl);
6826                 }
6827         }
6828         LIST_INSERT_HEAD(&sh->cmng.free_stat_raws, raw_to_free, next);
6829         pool->raw_hw = NULL;
6830         sh->cmng.pending_queries--;
6831 }
6832
6833 /**
6834  * Translate the rte_flow group index to HW table value.
6835  *
6836  * @param[in] attributes
6837  *   Pointer to flow attributes
6838  * @param[in] external
6839  *   Value is part of flow rule created by request external to PMD.
6840  * @param[in] group
6841  *   rte_flow group index value.
6842  * @param[out] fdb_def_rule
6843  *   Whether fdb jump to table 1 is configured.
6844  * @param[out] table
6845  *   HW table value.
6846  * @param[out] error
6847  *   Pointer to error structure.
6848  *
6849  * @return
6850  *   0 on success, a negative errno value otherwise and rte_errno is set.
6851  */
6852 int
6853 mlx5_flow_group_to_table(const struct rte_flow_attr *attributes, bool external,
6854                          uint32_t group, bool fdb_def_rule, uint32_t *table,
6855                          struct rte_flow_error *error)
6856 {
6857         if (attributes->transfer && external && fdb_def_rule) {
6858                 if (group == UINT32_MAX)
6859                         return rte_flow_error_set
6860                                                 (error, EINVAL,
6861                                                  RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
6862                                                  NULL,
6863                                                  "group index not supported");
6864                 *table = group + 1;
6865         } else {
6866                 *table = group;
6867         }
6868         return 0;
6869 }
6870
6871 /**
6872  * Discover availability of metadata reg_c's.
6873  *
6874  * Iteratively use test flows to check availability.
6875  *
6876  * @param[in] dev
6877  *   Pointer to the Ethernet device structure.
6878  *
6879  * @return
6880  *   0 on success, a negative errno value otherwise and rte_errno is set.
6881  */
6882 int
6883 mlx5_flow_discover_mreg_c(struct rte_eth_dev *dev)
6884 {
6885         struct mlx5_priv *priv = dev->data->dev_private;
6886         struct mlx5_dev_config *config = &priv->config;
6887         enum modify_reg idx;
6888         int n = 0;
6889
6890         /* reg_c[0] and reg_c[1] are reserved. */
6891         config->flow_mreg_c[n++] = REG_C_0;
6892         config->flow_mreg_c[n++] = REG_C_1;
6893         /* Discover availability of other reg_c's. */
6894         for (idx = REG_C_2; idx <= REG_C_7; ++idx) {
6895                 struct rte_flow_attr attr = {
6896                         .group = MLX5_FLOW_MREG_CP_TABLE_GROUP,
6897                         .priority = MLX5_FLOW_PRIO_RSVD,
6898                         .ingress = 1,
6899                 };
6900                 struct rte_flow_item items[] = {
6901                         [0] = {
6902                                 .type = RTE_FLOW_ITEM_TYPE_END,
6903                         },
6904                 };
6905                 struct rte_flow_action actions[] = {
6906                         [0] = {
6907                                 .type = (enum rte_flow_action_type)
6908                                         MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
6909                                 .conf = &(struct mlx5_flow_action_copy_mreg){
6910                                         .src = REG_C_1,
6911                                         .dst = idx,
6912                                 },
6913                         },
6914                         [1] = {
6915                                 .type = RTE_FLOW_ACTION_TYPE_JUMP,
6916                                 .conf = &(struct rte_flow_action_jump){
6917                                         .group = MLX5_FLOW_MREG_ACT_TABLE_GROUP,
6918                                 },
6919                         },
6920                         [2] = {
6921                                 .type = RTE_FLOW_ACTION_TYPE_END,
6922                         },
6923                 };
6924                 uint32_t flow_idx;
6925                 struct rte_flow *flow;
6926                 struct rte_flow_error error;
6927
6928                 if (!config->dv_flow_en)
6929                         break;
6930                 /* Create internal flow, validation skips copy action. */
6931                 flow_idx = flow_list_create(dev, NULL, &attr, items,
6932                                             actions, false, &error);
6933                 flow = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW],
6934                                       flow_idx);
6935                 if (!flow)
6936                         continue;
6937                 if (dev->data->dev_started || !flow_drv_apply(dev, flow, NULL))
6938                         config->flow_mreg_c[n++] = idx;
6939                 flow_list_destroy(dev, NULL, flow_idx);
6940         }
6941         for (; n < MLX5_MREG_C_NUM; ++n)
6942                 config->flow_mreg_c[n] = REG_NON;
6943         return 0;
6944 }
6945
6946 /**
6947  * Dump flow raw hw data to file
6948  *
6949  * @param[in] dev
6950  *    The pointer to Ethernet device.
6951  * @param[in] file
6952  *   A pointer to a file for output.
6953  * @param[out] error
6954  *   Perform verbose error reporting if not NULL. PMDs initialize this
6955  *   structure in case of error only.
6956  * @return
6957  *   0 on success, a nagative value otherwise.
6958  */
6959 int
6960 mlx5_flow_dev_dump(struct rte_eth_dev *dev,
6961                    FILE *file,
6962                    struct rte_flow_error *error __rte_unused)
6963 {
6964         struct mlx5_priv *priv = dev->data->dev_private;
6965         struct mlx5_dev_ctx_shared *sh = priv->sh;
6966
6967         if (!priv->config.dv_flow_en) {
6968                 if (fputs("device dv flow disabled\n", file) <= 0)
6969                         return -errno;
6970                 return -ENOTSUP;
6971         }
6972         return mlx5_devx_cmd_flow_dump(sh->fdb_domain, sh->rx_domain,
6973                                        sh->tx_domain, file);
6974 }
6975
6976 /**
6977  * Get aged-out flows.
6978  *
6979  * @param[in] dev
6980  *   Pointer to the Ethernet device structure.
6981  * @param[in] context
6982  *   The address of an array of pointers to the aged-out flows contexts.
6983  * @param[in] nb_countexts
6984  *   The length of context array pointers.
6985  * @param[out] error
6986  *   Perform verbose error reporting if not NULL. Initialized in case of
6987  *   error only.
6988  *
6989  * @return
6990  *   how many contexts get in success, otherwise negative errno value.
6991  *   if nb_contexts is 0, return the amount of all aged contexts.
6992  *   if nb_contexts is not 0 , return the amount of aged flows reported
6993  *   in the context array.
6994  */
6995 int
6996 mlx5_flow_get_aged_flows(struct rte_eth_dev *dev, void **contexts,
6997                         uint32_t nb_contexts, struct rte_flow_error *error)
6998 {
6999         const struct mlx5_flow_driver_ops *fops;
7000         struct rte_flow_attr attr = { .transfer = 0 };
7001
7002         if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
7003                 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7004                 return fops->get_aged_flows(dev, contexts, nb_contexts,
7005                                                     error);
7006         }
7007         DRV_LOG(ERR,
7008                 "port %u get aged flows is not supported.",
7009                  dev->data->port_id);
7010         return -ENOTSUP;
7011 }