f4027d01717e71ad0eadb0d8585d8a76e4db9a2e
[dpdk.git] / drivers / net / mlx5 / mlx5_flow.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2016 6WIND S.A.
3  * Copyright 2016 Mellanox Technologies, Ltd
4  */
5
6 #include <netinet/in.h>
7 #include <sys/queue.h>
8 #include <stdalign.h>
9 #include <stdint.h>
10 #include <string.h>
11 #include <stdbool.h>
12
13 #include <rte_common.h>
14 #include <rte_ether.h>
15 #include <rte_ethdev_driver.h>
16 #include <rte_flow.h>
17 #include <rte_cycles.h>
18 #include <rte_flow_driver.h>
19 #include <rte_malloc.h>
20 #include <rte_ip.h>
21
22 #include <mlx5_glue.h>
23 #include <mlx5_devx_cmds.h>
24 #include <mlx5_prm.h>
25 #include <mlx5_malloc.h>
26
27 #include "mlx5_defs.h"
28 #include "mlx5.h"
29 #include "mlx5_flow.h"
30 #include "mlx5_flow_os.h"
31 #include "mlx5_rxtx.h"
32
33 /** Device flow drivers. */
34 extern const struct mlx5_flow_driver_ops mlx5_flow_verbs_drv_ops;
35
36 const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops;
37
38 const struct mlx5_flow_driver_ops *flow_drv_ops[] = {
39         [MLX5_FLOW_TYPE_MIN] = &mlx5_flow_null_drv_ops,
40 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
41         [MLX5_FLOW_TYPE_DV] = &mlx5_flow_dv_drv_ops,
42 #endif
43         [MLX5_FLOW_TYPE_VERBS] = &mlx5_flow_verbs_drv_ops,
44         [MLX5_FLOW_TYPE_MAX] = &mlx5_flow_null_drv_ops
45 };
46
47 /** Helper macro to build input graph for mlx5_flow_expand_rss(). */
48 #define MLX5_FLOW_EXPAND_RSS_NEXT(...) \
49         (const int []){ \
50                 __VA_ARGS__, 0, \
51         }
52
53 /** Node object of input graph for mlx5_flow_expand_rss(). */
54 struct mlx5_flow_expand_node {
55         const int *const next;
56         /**<
57          * List of next node indexes. Index 0 is interpreted as a terminator.
58          */
59         const enum rte_flow_item_type type;
60         /**< Pattern item type of current node. */
61         uint64_t rss_types;
62         /**<
63          * RSS types bit-field associated with this node
64          * (see ETH_RSS_* definitions).
65          */
66 };
67
68 /** Object returned by mlx5_flow_expand_rss(). */
69 struct mlx5_flow_expand_rss {
70         uint32_t entries;
71         /**< Number of entries @p patterns and @p priorities. */
72         struct {
73                 struct rte_flow_item *pattern; /**< Expanded pattern array. */
74                 uint32_t priority; /**< Priority offset for each expansion. */
75         } entry[];
76 };
77
78 static enum rte_flow_item_type
79 mlx5_flow_expand_rss_item_complete(const struct rte_flow_item *item)
80 {
81         enum rte_flow_item_type ret = RTE_FLOW_ITEM_TYPE_VOID;
82         uint16_t ether_type = 0;
83         uint16_t ether_type_m;
84         uint8_t ip_next_proto = 0;
85         uint8_t ip_next_proto_m;
86
87         if (item == NULL || item->spec == NULL)
88                 return ret;
89         switch (item->type) {
90         case RTE_FLOW_ITEM_TYPE_ETH:
91                 if (item->mask)
92                         ether_type_m = ((const struct rte_flow_item_eth *)
93                                                 (item->mask))->type;
94                 else
95                         ether_type_m = rte_flow_item_eth_mask.type;
96                 if (ether_type_m != RTE_BE16(0xFFFF))
97                         break;
98                 ether_type = ((const struct rte_flow_item_eth *)
99                                 (item->spec))->type;
100                 if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_IPV4)
101                         ret = RTE_FLOW_ITEM_TYPE_IPV4;
102                 else if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_IPV6)
103                         ret = RTE_FLOW_ITEM_TYPE_IPV6;
104                 else if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_VLAN)
105                         ret = RTE_FLOW_ITEM_TYPE_VLAN;
106                 else
107                         ret = RTE_FLOW_ITEM_TYPE_END;
108                 break;
109         case RTE_FLOW_ITEM_TYPE_VLAN:
110                 if (item->mask)
111                         ether_type_m = ((const struct rte_flow_item_vlan *)
112                                                 (item->mask))->inner_type;
113                 else
114                         ether_type_m = rte_flow_item_vlan_mask.inner_type;
115                 if (ether_type_m != RTE_BE16(0xFFFF))
116                         break;
117                 ether_type = ((const struct rte_flow_item_vlan *)
118                                 (item->spec))->inner_type;
119                 if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_IPV4)
120                         ret = RTE_FLOW_ITEM_TYPE_IPV4;
121                 else if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_IPV6)
122                         ret = RTE_FLOW_ITEM_TYPE_IPV6;
123                 else if (rte_be_to_cpu_16(ether_type) == RTE_ETHER_TYPE_VLAN)
124                         ret = RTE_FLOW_ITEM_TYPE_VLAN;
125                 else
126                         ret = RTE_FLOW_ITEM_TYPE_END;
127                 break;
128         case RTE_FLOW_ITEM_TYPE_IPV4:
129                 if (item->mask)
130                         ip_next_proto_m = ((const struct rte_flow_item_ipv4 *)
131                                         (item->mask))->hdr.next_proto_id;
132                 else
133                         ip_next_proto_m =
134                                 rte_flow_item_ipv4_mask.hdr.next_proto_id;
135                 if (ip_next_proto_m != 0xFF)
136                         break;
137                 ip_next_proto = ((const struct rte_flow_item_ipv4 *)
138                                 (item->spec))->hdr.next_proto_id;
139                 if (ip_next_proto == IPPROTO_UDP)
140                         ret = RTE_FLOW_ITEM_TYPE_UDP;
141                 else if (ip_next_proto == IPPROTO_TCP)
142                         ret = RTE_FLOW_ITEM_TYPE_TCP;
143                 else if (ip_next_proto == IPPROTO_IP)
144                         ret = RTE_FLOW_ITEM_TYPE_IPV4;
145                 else if (ip_next_proto == IPPROTO_IPV6)
146                         ret = RTE_FLOW_ITEM_TYPE_IPV6;
147                 else
148                         ret = RTE_FLOW_ITEM_TYPE_END;
149                 break;
150         case RTE_FLOW_ITEM_TYPE_IPV6:
151                 if (item->mask)
152                         ip_next_proto_m = ((const struct rte_flow_item_ipv6 *)
153                                                 (item->mask))->hdr.proto;
154                 else
155                         ip_next_proto_m =
156                                 rte_flow_item_ipv6_mask.hdr.proto;
157                 if (ip_next_proto_m != 0xFF)
158                         break;
159                 ip_next_proto = ((const struct rte_flow_item_ipv6 *)
160                                 (item->spec))->hdr.proto;
161                 if (ip_next_proto == IPPROTO_UDP)
162                         ret = RTE_FLOW_ITEM_TYPE_UDP;
163                 else if (ip_next_proto == IPPROTO_TCP)
164                         ret = RTE_FLOW_ITEM_TYPE_TCP;
165                 else if (ip_next_proto == IPPROTO_IP)
166                         ret = RTE_FLOW_ITEM_TYPE_IPV4;
167                 else if (ip_next_proto == IPPROTO_IPV6)
168                         ret = RTE_FLOW_ITEM_TYPE_IPV6;
169                 else
170                         ret = RTE_FLOW_ITEM_TYPE_END;
171                 break;
172         default:
173                 ret = RTE_FLOW_ITEM_TYPE_VOID;
174                 break;
175         }
176         return ret;
177 }
178
179 /**
180  * Expand RSS flows into several possible flows according to the RSS hash
181  * fields requested and the driver capabilities.
182  *
183  * @param[out] buf
184  *   Buffer to store the result expansion.
185  * @param[in] size
186  *   Buffer size in bytes. If 0, @p buf can be NULL.
187  * @param[in] pattern
188  *   User flow pattern.
189  * @param[in] types
190  *   RSS types to expand (see ETH_RSS_* definitions).
191  * @param[in] graph
192  *   Input graph to expand @p pattern according to @p types.
193  * @param[in] graph_root_index
194  *   Index of root node in @p graph, typically 0.
195  *
196  * @return
197  *   A positive value representing the size of @p buf in bytes regardless of
198  *   @p size on success, a negative errno value otherwise and rte_errno is
199  *   set, the following errors are defined:
200  *
201  *   -E2BIG: graph-depth @p graph is too deep.
202  */
203 static int
204 mlx5_flow_expand_rss(struct mlx5_flow_expand_rss *buf, size_t size,
205                      const struct rte_flow_item *pattern, uint64_t types,
206                      const struct mlx5_flow_expand_node graph[],
207                      int graph_root_index)
208 {
209         const int elt_n = 8;
210         const struct rte_flow_item *item;
211         const struct mlx5_flow_expand_node *node = &graph[graph_root_index];
212         const int *next_node;
213         const int *stack[elt_n];
214         int stack_pos = 0;
215         struct rte_flow_item flow_items[elt_n];
216         unsigned int i;
217         size_t lsize;
218         size_t user_pattern_size = 0;
219         void *addr = NULL;
220         const struct mlx5_flow_expand_node *next = NULL;
221         struct rte_flow_item missed_item;
222         int missed = 0;
223         int elt = 0;
224         const struct rte_flow_item *last_item = NULL;
225
226         memset(&missed_item, 0, sizeof(missed_item));
227         lsize = offsetof(struct mlx5_flow_expand_rss, entry) +
228                 elt_n * sizeof(buf->entry[0]);
229         if (lsize <= size) {
230                 buf->entry[0].priority = 0;
231                 buf->entry[0].pattern = (void *)&buf->entry[elt_n];
232                 buf->entries = 0;
233                 addr = buf->entry[0].pattern;
234         }
235         for (item = pattern; item->type != RTE_FLOW_ITEM_TYPE_END; item++) {
236                 if (item->type != RTE_FLOW_ITEM_TYPE_VOID)
237                         last_item = item;
238                 for (i = 0; node->next && node->next[i]; ++i) {
239                         next = &graph[node->next[i]];
240                         if (next->type == item->type)
241                                 break;
242                 }
243                 if (next)
244                         node = next;
245                 user_pattern_size += sizeof(*item);
246         }
247         user_pattern_size += sizeof(*item); /* Handle END item. */
248         lsize += user_pattern_size;
249         /* Copy the user pattern in the first entry of the buffer. */
250         if (lsize <= size) {
251                 rte_memcpy(addr, pattern, user_pattern_size);
252                 addr = (void *)(((uintptr_t)addr) + user_pattern_size);
253                 buf->entries = 1;
254         }
255         /* Start expanding. */
256         memset(flow_items, 0, sizeof(flow_items));
257         user_pattern_size -= sizeof(*item);
258         /*
259          * Check if the last valid item has spec set, need complete pattern,
260          * and the pattern can be used for expansion.
261          */
262         missed_item.type = mlx5_flow_expand_rss_item_complete(last_item);
263         if (missed_item.type == RTE_FLOW_ITEM_TYPE_END) {
264                 /* Item type END indicates expansion is not required. */
265                 return lsize;
266         }
267         if (missed_item.type != RTE_FLOW_ITEM_TYPE_VOID) {
268                 next = NULL;
269                 missed = 1;
270                 for (i = 0; node->next && node->next[i]; ++i) {
271                         next = &graph[node->next[i]];
272                         if (next->type == missed_item.type) {
273                                 flow_items[0].type = missed_item.type;
274                                 flow_items[1].type = RTE_FLOW_ITEM_TYPE_END;
275                                 break;
276                         }
277                         next = NULL;
278                 }
279         }
280         if (next && missed) {
281                 elt = 2; /* missed item + item end. */
282                 node = next;
283                 lsize += elt * sizeof(*item) + user_pattern_size;
284                 if ((node->rss_types & types) && lsize <= size) {
285                         buf->entry[buf->entries].priority = 1;
286                         buf->entry[buf->entries].pattern = addr;
287                         buf->entries++;
288                         rte_memcpy(addr, buf->entry[0].pattern,
289                                    user_pattern_size);
290                         addr = (void *)(((uintptr_t)addr) + user_pattern_size);
291                         rte_memcpy(addr, flow_items, elt * sizeof(*item));
292                         addr = (void *)(((uintptr_t)addr) +
293                                         elt * sizeof(*item));
294                 }
295         }
296         memset(flow_items, 0, sizeof(flow_items));
297         next_node = node->next;
298         stack[stack_pos] = next_node;
299         node = next_node ? &graph[*next_node] : NULL;
300         while (node) {
301                 flow_items[stack_pos].type = node->type;
302                 if (node->rss_types & types) {
303                         /*
304                          * compute the number of items to copy from the
305                          * expansion and copy it.
306                          * When the stack_pos is 0, there are 1 element in it,
307                          * plus the addition END item.
308                          */
309                         elt = stack_pos + 2;
310                         flow_items[stack_pos + 1].type = RTE_FLOW_ITEM_TYPE_END;
311                         lsize += elt * sizeof(*item) + user_pattern_size;
312                         if (lsize <= size) {
313                                 size_t n = elt * sizeof(*item);
314
315                                 buf->entry[buf->entries].priority =
316                                         stack_pos + 1 + missed;
317                                 buf->entry[buf->entries].pattern = addr;
318                                 buf->entries++;
319                                 rte_memcpy(addr, buf->entry[0].pattern,
320                                            user_pattern_size);
321                                 addr = (void *)(((uintptr_t)addr) +
322                                                 user_pattern_size);
323                                 rte_memcpy(addr, &missed_item,
324                                            missed * sizeof(*item));
325                                 addr = (void *)(((uintptr_t)addr) +
326                                         missed * sizeof(*item));
327                                 rte_memcpy(addr, flow_items, n);
328                                 addr = (void *)(((uintptr_t)addr) + n);
329                         }
330                 }
331                 /* Go deeper. */
332                 if (node->next) {
333                         next_node = node->next;
334                         if (stack_pos++ == elt_n) {
335                                 rte_errno = E2BIG;
336                                 return -rte_errno;
337                         }
338                         stack[stack_pos] = next_node;
339                 } else if (*(next_node + 1)) {
340                         /* Follow up with the next possibility. */
341                         ++next_node;
342                 } else {
343                         /* Move to the next path. */
344                         if (stack_pos)
345                                 next_node = stack[--stack_pos];
346                         next_node++;
347                         stack[stack_pos] = next_node;
348                 }
349                 node = *next_node ? &graph[*next_node] : NULL;
350         };
351         /* no expanded flows but we have missed item, create one rule for it */
352         if (buf->entries == 1 && missed != 0) {
353                 elt = 2;
354                 lsize += elt * sizeof(*item) + user_pattern_size;
355                 if (lsize <= size) {
356                         buf->entry[buf->entries].priority = 1;
357                         buf->entry[buf->entries].pattern = addr;
358                         buf->entries++;
359                         flow_items[0].type = missed_item.type;
360                         flow_items[1].type = RTE_FLOW_ITEM_TYPE_END;
361                         rte_memcpy(addr, buf->entry[0].pattern,
362                                    user_pattern_size);
363                         addr = (void *)(((uintptr_t)addr) + user_pattern_size);
364                         rte_memcpy(addr, flow_items, elt * sizeof(*item));
365                         addr = (void *)(((uintptr_t)addr) +
366                                         elt * sizeof(*item));
367                 }
368         }
369         return lsize;
370 }
371
372 enum mlx5_expansion {
373         MLX5_EXPANSION_ROOT,
374         MLX5_EXPANSION_ROOT_OUTER,
375         MLX5_EXPANSION_ROOT_ETH_VLAN,
376         MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN,
377         MLX5_EXPANSION_OUTER_ETH,
378         MLX5_EXPANSION_OUTER_ETH_VLAN,
379         MLX5_EXPANSION_OUTER_VLAN,
380         MLX5_EXPANSION_OUTER_IPV4,
381         MLX5_EXPANSION_OUTER_IPV4_UDP,
382         MLX5_EXPANSION_OUTER_IPV4_TCP,
383         MLX5_EXPANSION_OUTER_IPV6,
384         MLX5_EXPANSION_OUTER_IPV6_UDP,
385         MLX5_EXPANSION_OUTER_IPV6_TCP,
386         MLX5_EXPANSION_VXLAN,
387         MLX5_EXPANSION_VXLAN_GPE,
388         MLX5_EXPANSION_GRE,
389         MLX5_EXPANSION_MPLS,
390         MLX5_EXPANSION_ETH,
391         MLX5_EXPANSION_ETH_VLAN,
392         MLX5_EXPANSION_VLAN,
393         MLX5_EXPANSION_IPV4,
394         MLX5_EXPANSION_IPV4_UDP,
395         MLX5_EXPANSION_IPV4_TCP,
396         MLX5_EXPANSION_IPV6,
397         MLX5_EXPANSION_IPV6_UDP,
398         MLX5_EXPANSION_IPV6_TCP,
399 };
400
401 /** Supported expansion of items. */
402 static const struct mlx5_flow_expand_node mlx5_support_expansion[] = {
403         [MLX5_EXPANSION_ROOT] = {
404                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
405                                                   MLX5_EXPANSION_IPV4,
406                                                   MLX5_EXPANSION_IPV6),
407                 .type = RTE_FLOW_ITEM_TYPE_END,
408         },
409         [MLX5_EXPANSION_ROOT_OUTER] = {
410                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_ETH,
411                                                   MLX5_EXPANSION_OUTER_IPV4,
412                                                   MLX5_EXPANSION_OUTER_IPV6),
413                 .type = RTE_FLOW_ITEM_TYPE_END,
414         },
415         [MLX5_EXPANSION_ROOT_ETH_VLAN] = {
416                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH_VLAN),
417                 .type = RTE_FLOW_ITEM_TYPE_END,
418         },
419         [MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN] = {
420                 .next = MLX5_FLOW_EXPAND_RSS_NEXT
421                                                 (MLX5_EXPANSION_OUTER_ETH_VLAN),
422                 .type = RTE_FLOW_ITEM_TYPE_END,
423         },
424         [MLX5_EXPANSION_OUTER_ETH] = {
425                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_IPV4,
426                                                   MLX5_EXPANSION_OUTER_IPV6,
427                                                   MLX5_EXPANSION_MPLS),
428                 .type = RTE_FLOW_ITEM_TYPE_ETH,
429                 .rss_types = 0,
430         },
431         [MLX5_EXPANSION_OUTER_ETH_VLAN] = {
432                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_VLAN),
433                 .type = RTE_FLOW_ITEM_TYPE_ETH,
434                 .rss_types = 0,
435         },
436         [MLX5_EXPANSION_OUTER_VLAN] = {
437                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_IPV4,
438                                                   MLX5_EXPANSION_OUTER_IPV6),
439                 .type = RTE_FLOW_ITEM_TYPE_VLAN,
440         },
441         [MLX5_EXPANSION_OUTER_IPV4] = {
442                 .next = MLX5_FLOW_EXPAND_RSS_NEXT
443                         (MLX5_EXPANSION_OUTER_IPV4_UDP,
444                          MLX5_EXPANSION_OUTER_IPV4_TCP,
445                          MLX5_EXPANSION_GRE,
446                          MLX5_EXPANSION_IPV4,
447                          MLX5_EXPANSION_IPV6),
448                 .type = RTE_FLOW_ITEM_TYPE_IPV4,
449                 .rss_types = ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
450                         ETH_RSS_NONFRAG_IPV4_OTHER,
451         },
452         [MLX5_EXPANSION_OUTER_IPV4_UDP] = {
453                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN,
454                                                   MLX5_EXPANSION_VXLAN_GPE),
455                 .type = RTE_FLOW_ITEM_TYPE_UDP,
456                 .rss_types = ETH_RSS_NONFRAG_IPV4_UDP,
457         },
458         [MLX5_EXPANSION_OUTER_IPV4_TCP] = {
459                 .type = RTE_FLOW_ITEM_TYPE_TCP,
460                 .rss_types = ETH_RSS_NONFRAG_IPV4_TCP,
461         },
462         [MLX5_EXPANSION_OUTER_IPV6] = {
463                 .next = MLX5_FLOW_EXPAND_RSS_NEXT
464                         (MLX5_EXPANSION_OUTER_IPV6_UDP,
465                          MLX5_EXPANSION_OUTER_IPV6_TCP,
466                          MLX5_EXPANSION_IPV4,
467                          MLX5_EXPANSION_IPV6),
468                 .type = RTE_FLOW_ITEM_TYPE_IPV6,
469                 .rss_types = ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 |
470                         ETH_RSS_NONFRAG_IPV6_OTHER,
471         },
472         [MLX5_EXPANSION_OUTER_IPV6_UDP] = {
473                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN,
474                                                   MLX5_EXPANSION_VXLAN_GPE),
475                 .type = RTE_FLOW_ITEM_TYPE_UDP,
476                 .rss_types = ETH_RSS_NONFRAG_IPV6_UDP,
477         },
478         [MLX5_EXPANSION_OUTER_IPV6_TCP] = {
479                 .type = RTE_FLOW_ITEM_TYPE_TCP,
480                 .rss_types = ETH_RSS_NONFRAG_IPV6_TCP,
481         },
482         [MLX5_EXPANSION_VXLAN] = {
483                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
484                                                   MLX5_EXPANSION_IPV4,
485                                                   MLX5_EXPANSION_IPV6),
486                 .type = RTE_FLOW_ITEM_TYPE_VXLAN,
487         },
488         [MLX5_EXPANSION_VXLAN_GPE] = {
489                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
490                                                   MLX5_EXPANSION_IPV4,
491                                                   MLX5_EXPANSION_IPV6),
492                 .type = RTE_FLOW_ITEM_TYPE_VXLAN_GPE,
493         },
494         [MLX5_EXPANSION_GRE] = {
495                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4),
496                 .type = RTE_FLOW_ITEM_TYPE_GRE,
497         },
498         [MLX5_EXPANSION_MPLS] = {
499                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
500                                                   MLX5_EXPANSION_IPV6),
501                 .type = RTE_FLOW_ITEM_TYPE_MPLS,
502         },
503         [MLX5_EXPANSION_ETH] = {
504                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
505                                                   MLX5_EXPANSION_IPV6),
506                 .type = RTE_FLOW_ITEM_TYPE_ETH,
507         },
508         [MLX5_EXPANSION_ETH_VLAN] = {
509                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VLAN),
510                 .type = RTE_FLOW_ITEM_TYPE_ETH,
511         },
512         [MLX5_EXPANSION_VLAN] = {
513                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
514                                                   MLX5_EXPANSION_IPV6),
515                 .type = RTE_FLOW_ITEM_TYPE_VLAN,
516         },
517         [MLX5_EXPANSION_IPV4] = {
518                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4_UDP,
519                                                   MLX5_EXPANSION_IPV4_TCP),
520                 .type = RTE_FLOW_ITEM_TYPE_IPV4,
521                 .rss_types = ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
522                         ETH_RSS_NONFRAG_IPV4_OTHER,
523         },
524         [MLX5_EXPANSION_IPV4_UDP] = {
525                 .type = RTE_FLOW_ITEM_TYPE_UDP,
526                 .rss_types = ETH_RSS_NONFRAG_IPV4_UDP,
527         },
528         [MLX5_EXPANSION_IPV4_TCP] = {
529                 .type = RTE_FLOW_ITEM_TYPE_TCP,
530                 .rss_types = ETH_RSS_NONFRAG_IPV4_TCP,
531         },
532         [MLX5_EXPANSION_IPV6] = {
533                 .next = MLX5_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV6_UDP,
534                                                   MLX5_EXPANSION_IPV6_TCP),
535                 .type = RTE_FLOW_ITEM_TYPE_IPV6,
536                 .rss_types = ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 |
537                         ETH_RSS_NONFRAG_IPV6_OTHER,
538         },
539         [MLX5_EXPANSION_IPV6_UDP] = {
540                 .type = RTE_FLOW_ITEM_TYPE_UDP,
541                 .rss_types = ETH_RSS_NONFRAG_IPV6_UDP,
542         },
543         [MLX5_EXPANSION_IPV6_TCP] = {
544                 .type = RTE_FLOW_ITEM_TYPE_TCP,
545                 .rss_types = ETH_RSS_NONFRAG_IPV6_TCP,
546         },
547 };
548
549 static const struct rte_flow_ops mlx5_flow_ops = {
550         .validate = mlx5_flow_validate,
551         .create = mlx5_flow_create,
552         .destroy = mlx5_flow_destroy,
553         .flush = mlx5_flow_flush,
554         .isolate = mlx5_flow_isolate,
555         .query = mlx5_flow_query,
556         .dev_dump = mlx5_flow_dev_dump,
557         .get_aged_flows = mlx5_flow_get_aged_flows,
558 };
559
560 /* Convert FDIR request to Generic flow. */
561 struct mlx5_fdir {
562         struct rte_flow_attr attr;
563         struct rte_flow_item items[4];
564         struct rte_flow_item_eth l2;
565         struct rte_flow_item_eth l2_mask;
566         union {
567                 struct rte_flow_item_ipv4 ipv4;
568                 struct rte_flow_item_ipv6 ipv6;
569         } l3;
570         union {
571                 struct rte_flow_item_ipv4 ipv4;
572                 struct rte_flow_item_ipv6 ipv6;
573         } l3_mask;
574         union {
575                 struct rte_flow_item_udp udp;
576                 struct rte_flow_item_tcp tcp;
577         } l4;
578         union {
579                 struct rte_flow_item_udp udp;
580                 struct rte_flow_item_tcp tcp;
581         } l4_mask;
582         struct rte_flow_action actions[2];
583         struct rte_flow_action_queue queue;
584 };
585
586 /* Tunnel information. */
587 struct mlx5_flow_tunnel_info {
588         uint64_t tunnel; /**< Tunnel bit (see MLX5_FLOW_*). */
589         uint32_t ptype; /**< Tunnel Ptype (see RTE_PTYPE_*). */
590 };
591
592 static struct mlx5_flow_tunnel_info tunnels_info[] = {
593         {
594                 .tunnel = MLX5_FLOW_LAYER_VXLAN,
595                 .ptype = RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L4_UDP,
596         },
597         {
598                 .tunnel = MLX5_FLOW_LAYER_GENEVE,
599                 .ptype = RTE_PTYPE_TUNNEL_GENEVE | RTE_PTYPE_L4_UDP,
600         },
601         {
602                 .tunnel = MLX5_FLOW_LAYER_VXLAN_GPE,
603                 .ptype = RTE_PTYPE_TUNNEL_VXLAN_GPE | RTE_PTYPE_L4_UDP,
604         },
605         {
606                 .tunnel = MLX5_FLOW_LAYER_GRE,
607                 .ptype = RTE_PTYPE_TUNNEL_GRE,
608         },
609         {
610                 .tunnel = MLX5_FLOW_LAYER_MPLS | MLX5_FLOW_LAYER_OUTER_L4_UDP,
611                 .ptype = RTE_PTYPE_TUNNEL_MPLS_IN_UDP | RTE_PTYPE_L4_UDP,
612         },
613         {
614                 .tunnel = MLX5_FLOW_LAYER_MPLS,
615                 .ptype = RTE_PTYPE_TUNNEL_MPLS_IN_GRE,
616         },
617         {
618                 .tunnel = MLX5_FLOW_LAYER_NVGRE,
619                 .ptype = RTE_PTYPE_TUNNEL_NVGRE,
620         },
621         {
622                 .tunnel = MLX5_FLOW_LAYER_IPIP,
623                 .ptype = RTE_PTYPE_TUNNEL_IP,
624         },
625         {
626                 .tunnel = MLX5_FLOW_LAYER_IPV6_ENCAP,
627                 .ptype = RTE_PTYPE_TUNNEL_IP,
628         },
629         {
630                 .tunnel = MLX5_FLOW_LAYER_GTP,
631                 .ptype = RTE_PTYPE_TUNNEL_GTPU,
632         },
633 };
634
635 /**
636  * Translate tag ID to register.
637  *
638  * @param[in] dev
639  *   Pointer to the Ethernet device structure.
640  * @param[in] feature
641  *   The feature that request the register.
642  * @param[in] id
643  *   The request register ID.
644  * @param[out] error
645  *   Error description in case of any.
646  *
647  * @return
648  *   The request register on success, a negative errno
649  *   value otherwise and rte_errno is set.
650  */
651 int
652 mlx5_flow_get_reg_id(struct rte_eth_dev *dev,
653                      enum mlx5_feature_name feature,
654                      uint32_t id,
655                      struct rte_flow_error *error)
656 {
657         struct mlx5_priv *priv = dev->data->dev_private;
658         struct mlx5_dev_config *config = &priv->config;
659         enum modify_reg start_reg;
660         bool skip_mtr_reg = false;
661
662         switch (feature) {
663         case MLX5_HAIRPIN_RX:
664                 return REG_B;
665         case MLX5_HAIRPIN_TX:
666                 return REG_A;
667         case MLX5_METADATA_RX:
668                 switch (config->dv_xmeta_en) {
669                 case MLX5_XMETA_MODE_LEGACY:
670                         return REG_B;
671                 case MLX5_XMETA_MODE_META16:
672                         return REG_C_0;
673                 case MLX5_XMETA_MODE_META32:
674                         return REG_C_1;
675                 }
676                 break;
677         case MLX5_METADATA_TX:
678                 return REG_A;
679         case MLX5_METADATA_FDB:
680                 switch (config->dv_xmeta_en) {
681                 case MLX5_XMETA_MODE_LEGACY:
682                         return REG_NON;
683                 case MLX5_XMETA_MODE_META16:
684                         return REG_C_0;
685                 case MLX5_XMETA_MODE_META32:
686                         return REG_C_1;
687                 }
688                 break;
689         case MLX5_FLOW_MARK:
690                 switch (config->dv_xmeta_en) {
691                 case MLX5_XMETA_MODE_LEGACY:
692                         return REG_NON;
693                 case MLX5_XMETA_MODE_META16:
694                         return REG_C_1;
695                 case MLX5_XMETA_MODE_META32:
696                         return REG_C_0;
697                 }
698                 break;
699         case MLX5_MTR_SFX:
700                 /*
701                  * If meter color and flow match share one register, flow match
702                  * should use the meter color register for match.
703                  */
704                 if (priv->mtr_reg_share)
705                         return priv->mtr_color_reg;
706                 else
707                         return priv->mtr_color_reg != REG_C_2 ? REG_C_2 :
708                                REG_C_3;
709         case MLX5_MTR_COLOR:
710                 MLX5_ASSERT(priv->mtr_color_reg != REG_NON);
711                 return priv->mtr_color_reg;
712         case MLX5_COPY_MARK:
713                 /*
714                  * Metadata COPY_MARK register using is in meter suffix sub
715                  * flow while with meter. It's safe to share the same register.
716                  */
717                 return priv->mtr_color_reg != REG_C_2 ? REG_C_2 : REG_C_3;
718         case MLX5_APP_TAG:
719                 /*
720                  * If meter is enable, it will engage the register for color
721                  * match and flow match. If meter color match is not using the
722                  * REG_C_2, need to skip the REG_C_x be used by meter color
723                  * match.
724                  * If meter is disable, free to use all available registers.
725                  */
726                 start_reg = priv->mtr_color_reg != REG_C_2 ? REG_C_2 :
727                             (priv->mtr_reg_share ? REG_C_3 : REG_C_4);
728                 skip_mtr_reg = !!(priv->mtr_en && start_reg == REG_C_2);
729                 if (id > (REG_C_7 - start_reg))
730                         return rte_flow_error_set(error, EINVAL,
731                                                   RTE_FLOW_ERROR_TYPE_ITEM,
732                                                   NULL, "invalid tag id");
733                 if (config->flow_mreg_c[id + start_reg - REG_C_0] == REG_NON)
734                         return rte_flow_error_set(error, ENOTSUP,
735                                                   RTE_FLOW_ERROR_TYPE_ITEM,
736                                                   NULL, "unsupported tag id");
737                 /*
738                  * This case means meter is using the REG_C_x great than 2.
739                  * Take care not to conflict with meter color REG_C_x.
740                  * If the available index REG_C_y >= REG_C_x, skip the
741                  * color register.
742                  */
743                 if (skip_mtr_reg && config->flow_mreg_c
744                     [id + start_reg - REG_C_0] >= priv->mtr_color_reg) {
745                         if (id >= (REG_C_7 - start_reg))
746                                 return rte_flow_error_set(error, EINVAL,
747                                                        RTE_FLOW_ERROR_TYPE_ITEM,
748                                                         NULL, "invalid tag id");
749                         if (config->flow_mreg_c
750                             [id + 1 + start_reg - REG_C_0] != REG_NON)
751                                 return config->flow_mreg_c
752                                                [id + 1 + start_reg - REG_C_0];
753                         return rte_flow_error_set(error, ENOTSUP,
754                                                   RTE_FLOW_ERROR_TYPE_ITEM,
755                                                   NULL, "unsupported tag id");
756                 }
757                 return config->flow_mreg_c[id + start_reg - REG_C_0];
758         }
759         MLX5_ASSERT(false);
760         return rte_flow_error_set(error, EINVAL,
761                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
762                                   NULL, "invalid feature name");
763 }
764
765 /**
766  * Check extensive flow metadata register support.
767  *
768  * @param dev
769  *   Pointer to rte_eth_dev structure.
770  *
771  * @return
772  *   True if device supports extensive flow metadata register, otherwise false.
773  */
774 bool
775 mlx5_flow_ext_mreg_supported(struct rte_eth_dev *dev)
776 {
777         struct mlx5_priv *priv = dev->data->dev_private;
778         struct mlx5_dev_config *config = &priv->config;
779
780         /*
781          * Having available reg_c can be regarded inclusively as supporting
782          * extensive flow metadata register, which could mean,
783          * - metadata register copy action by modify header.
784          * - 16 modify header actions is supported.
785          * - reg_c's are preserved across different domain (FDB and NIC) on
786          *   packet loopback by flow lookup miss.
787          */
788         return config->flow_mreg_c[2] != REG_NON;
789 }
790
791 /**
792  * Verify the @p item specifications (spec, last, mask) are compatible with the
793  * NIC capabilities.
794  *
795  * @param[in] item
796  *   Item specification.
797  * @param[in] mask
798  *   @p item->mask or flow default bit-masks.
799  * @param[in] nic_mask
800  *   Bit-masks covering supported fields by the NIC to compare with user mask.
801  * @param[in] size
802  *   Bit-masks size in bytes.
803  * @param[in] range_accepted
804  *   True if range of values is accepted for specific fields, false otherwise.
805  * @param[out] error
806  *   Pointer to error structure.
807  *
808  * @return
809  *   0 on success, a negative errno value otherwise and rte_errno is set.
810  */
811 int
812 mlx5_flow_item_acceptable(const struct rte_flow_item *item,
813                           const uint8_t *mask,
814                           const uint8_t *nic_mask,
815                           unsigned int size,
816                           bool range_accepted,
817                           struct rte_flow_error *error)
818 {
819         unsigned int i;
820
821         MLX5_ASSERT(nic_mask);
822         for (i = 0; i < size; ++i)
823                 if ((nic_mask[i] | mask[i]) != nic_mask[i])
824                         return rte_flow_error_set(error, ENOTSUP,
825                                                   RTE_FLOW_ERROR_TYPE_ITEM,
826                                                   item,
827                                                   "mask enables non supported"
828                                                   " bits");
829         if (!item->spec && (item->mask || item->last))
830                 return rte_flow_error_set(error, EINVAL,
831                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
832                                           "mask/last without a spec is not"
833                                           " supported");
834         if (item->spec && item->last && !range_accepted) {
835                 uint8_t spec[size];
836                 uint8_t last[size];
837                 unsigned int i;
838                 int ret;
839
840                 for (i = 0; i < size; ++i) {
841                         spec[i] = ((const uint8_t *)item->spec)[i] & mask[i];
842                         last[i] = ((const uint8_t *)item->last)[i] & mask[i];
843                 }
844                 ret = memcmp(spec, last, size);
845                 if (ret != 0)
846                         return rte_flow_error_set(error, EINVAL,
847                                                   RTE_FLOW_ERROR_TYPE_ITEM,
848                                                   item,
849                                                   "range is not valid");
850         }
851         return 0;
852 }
853
854 /**
855  * Adjust the hash fields according to the @p flow information.
856  *
857  * @param[in] dev_flow.
858  *   Pointer to the mlx5_flow.
859  * @param[in] tunnel
860  *   1 when the hash field is for a tunnel item.
861  * @param[in] layer_types
862  *   ETH_RSS_* types.
863  * @param[in] hash_fields
864  *   Item hash fields.
865  *
866  * @return
867  *   The hash fields that should be used.
868  */
869 uint64_t
870 mlx5_flow_hashfields_adjust(struct mlx5_flow_rss_desc *rss_desc,
871                             int tunnel __rte_unused, uint64_t layer_types,
872                             uint64_t hash_fields)
873 {
874 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
875         int rss_request_inner = rss_desc->level >= 2;
876
877         /* Check RSS hash level for tunnel. */
878         if (tunnel && rss_request_inner)
879                 hash_fields |= IBV_RX_HASH_INNER;
880         else if (tunnel || rss_request_inner)
881                 return 0;
882 #endif
883         /* Check if requested layer matches RSS hash fields. */
884         if (!(rss_desc->types & layer_types))
885                 return 0;
886         return hash_fields;
887 }
888
889 /**
890  * Lookup and set the ptype in the data Rx part.  A single Ptype can be used,
891  * if several tunnel rules are used on this queue, the tunnel ptype will be
892  * cleared.
893  *
894  * @param rxq_ctrl
895  *   Rx queue to update.
896  */
897 static void
898 flow_rxq_tunnel_ptype_update(struct mlx5_rxq_ctrl *rxq_ctrl)
899 {
900         unsigned int i;
901         uint32_t tunnel_ptype = 0;
902
903         /* Look up for the ptype to use. */
904         for (i = 0; i != MLX5_FLOW_TUNNEL; ++i) {
905                 if (!rxq_ctrl->flow_tunnels_n[i])
906                         continue;
907                 if (!tunnel_ptype) {
908                         tunnel_ptype = tunnels_info[i].ptype;
909                 } else {
910                         tunnel_ptype = 0;
911                         break;
912                 }
913         }
914         rxq_ctrl->rxq.tunnel = tunnel_ptype;
915 }
916
917 /**
918  * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) according to the devive
919  * flow.
920  *
921  * @param[in] dev
922  *   Pointer to the Ethernet device structure.
923  * @param[in] dev_handle
924  *   Pointer to device flow handle structure.
925  */
926 static void
927 flow_drv_rxq_flags_set(struct rte_eth_dev *dev,
928                        struct mlx5_flow_handle *dev_handle)
929 {
930         struct mlx5_priv *priv = dev->data->dev_private;
931         const int mark = dev_handle->mark;
932         const int tunnel = !!(dev_handle->layers & MLX5_FLOW_LAYER_TUNNEL);
933         struct mlx5_hrxq *hrxq;
934         unsigned int i;
935
936         if (dev_handle->fate_action != MLX5_FLOW_FATE_QUEUE)
937                 return;
938         hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ],
939                               dev_handle->rix_hrxq);
940         if (!hrxq)
941                 return;
942         for (i = 0; i != hrxq->ind_table->queues_n; ++i) {
943                 int idx = hrxq->ind_table->queues[i];
944                 struct mlx5_rxq_ctrl *rxq_ctrl =
945                         container_of((*priv->rxqs)[idx],
946                                      struct mlx5_rxq_ctrl, rxq);
947
948                 /*
949                  * To support metadata register copy on Tx loopback,
950                  * this must be always enabled (metadata may arive
951                  * from other port - not from local flows only.
952                  */
953                 if (priv->config.dv_flow_en &&
954                     priv->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY &&
955                     mlx5_flow_ext_mreg_supported(dev)) {
956                         rxq_ctrl->rxq.mark = 1;
957                         rxq_ctrl->flow_mark_n = 1;
958                 } else if (mark) {
959                         rxq_ctrl->rxq.mark = 1;
960                         rxq_ctrl->flow_mark_n++;
961                 }
962                 if (tunnel) {
963                         unsigned int j;
964
965                         /* Increase the counter matching the flow. */
966                         for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) {
967                                 if ((tunnels_info[j].tunnel &
968                                      dev_handle->layers) ==
969                                     tunnels_info[j].tunnel) {
970                                         rxq_ctrl->flow_tunnels_n[j]++;
971                                         break;
972                                 }
973                         }
974                         flow_rxq_tunnel_ptype_update(rxq_ctrl);
975                 }
976         }
977 }
978
979 /**
980  * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) for a flow
981  *
982  * @param[in] dev
983  *   Pointer to the Ethernet device structure.
984  * @param[in] flow
985  *   Pointer to flow structure.
986  */
987 static void
988 flow_rxq_flags_set(struct rte_eth_dev *dev, struct rte_flow *flow)
989 {
990         struct mlx5_priv *priv = dev->data->dev_private;
991         uint32_t handle_idx;
992         struct mlx5_flow_handle *dev_handle;
993
994         SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles,
995                        handle_idx, dev_handle, next)
996                 flow_drv_rxq_flags_set(dev, dev_handle);
997 }
998
999 /**
1000  * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the
1001  * device flow if no other flow uses it with the same kind of request.
1002  *
1003  * @param dev
1004  *   Pointer to Ethernet device.
1005  * @param[in] dev_handle
1006  *   Pointer to the device flow handle structure.
1007  */
1008 static void
1009 flow_drv_rxq_flags_trim(struct rte_eth_dev *dev,
1010                         struct mlx5_flow_handle *dev_handle)
1011 {
1012         struct mlx5_priv *priv = dev->data->dev_private;
1013         const int mark = dev_handle->mark;
1014         const int tunnel = !!(dev_handle->layers & MLX5_FLOW_LAYER_TUNNEL);
1015         struct mlx5_hrxq *hrxq;
1016         unsigned int i;
1017
1018         if (dev_handle->fate_action != MLX5_FLOW_FATE_QUEUE)
1019                 return;
1020         hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ],
1021                               dev_handle->rix_hrxq);
1022         if (!hrxq)
1023                 return;
1024         MLX5_ASSERT(dev->data->dev_started);
1025         for (i = 0; i != hrxq->ind_table->queues_n; ++i) {
1026                 int idx = hrxq->ind_table->queues[i];
1027                 struct mlx5_rxq_ctrl *rxq_ctrl =
1028                         container_of((*priv->rxqs)[idx],
1029                                      struct mlx5_rxq_ctrl, rxq);
1030
1031                 if (priv->config.dv_flow_en &&
1032                     priv->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY &&
1033                     mlx5_flow_ext_mreg_supported(dev)) {
1034                         rxq_ctrl->rxq.mark = 1;
1035                         rxq_ctrl->flow_mark_n = 1;
1036                 } else if (mark) {
1037                         rxq_ctrl->flow_mark_n--;
1038                         rxq_ctrl->rxq.mark = !!rxq_ctrl->flow_mark_n;
1039                 }
1040                 if (tunnel) {
1041                         unsigned int j;
1042
1043                         /* Decrease the counter matching the flow. */
1044                         for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) {
1045                                 if ((tunnels_info[j].tunnel &
1046                                      dev_handle->layers) ==
1047                                     tunnels_info[j].tunnel) {
1048                                         rxq_ctrl->flow_tunnels_n[j]--;
1049                                         break;
1050                                 }
1051                         }
1052                         flow_rxq_tunnel_ptype_update(rxq_ctrl);
1053                 }
1054         }
1055 }
1056
1057 /**
1058  * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the
1059  * @p flow if no other flow uses it with the same kind of request.
1060  *
1061  * @param dev
1062  *   Pointer to Ethernet device.
1063  * @param[in] flow
1064  *   Pointer to the flow.
1065  */
1066 static void
1067 flow_rxq_flags_trim(struct rte_eth_dev *dev, struct rte_flow *flow)
1068 {
1069         struct mlx5_priv *priv = dev->data->dev_private;
1070         uint32_t handle_idx;
1071         struct mlx5_flow_handle *dev_handle;
1072
1073         SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles,
1074                        handle_idx, dev_handle, next)
1075                 flow_drv_rxq_flags_trim(dev, dev_handle);
1076 }
1077
1078 /**
1079  * Clear the Mark/Flag and Tunnel ptype information in all Rx queues.
1080  *
1081  * @param dev
1082  *   Pointer to Ethernet device.
1083  */
1084 static void
1085 flow_rxq_flags_clear(struct rte_eth_dev *dev)
1086 {
1087         struct mlx5_priv *priv = dev->data->dev_private;
1088         unsigned int i;
1089
1090         for (i = 0; i != priv->rxqs_n; ++i) {
1091                 struct mlx5_rxq_ctrl *rxq_ctrl;
1092                 unsigned int j;
1093
1094                 if (!(*priv->rxqs)[i])
1095                         continue;
1096                 rxq_ctrl = container_of((*priv->rxqs)[i],
1097                                         struct mlx5_rxq_ctrl, rxq);
1098                 rxq_ctrl->flow_mark_n = 0;
1099                 rxq_ctrl->rxq.mark = 0;
1100                 for (j = 0; j != MLX5_FLOW_TUNNEL; ++j)
1101                         rxq_ctrl->flow_tunnels_n[j] = 0;
1102                 rxq_ctrl->rxq.tunnel = 0;
1103         }
1104 }
1105
1106 /**
1107  * Set the Rx queue dynamic metadata (mask and offset) for a flow
1108  *
1109  * @param[in] dev
1110  *   Pointer to the Ethernet device structure.
1111  */
1112 void
1113 mlx5_flow_rxq_dynf_metadata_set(struct rte_eth_dev *dev)
1114 {
1115         struct mlx5_priv *priv = dev->data->dev_private;
1116         struct mlx5_rxq_data *data;
1117         unsigned int i;
1118
1119         for (i = 0; i != priv->rxqs_n; ++i) {
1120                 if (!(*priv->rxqs)[i])
1121                         continue;
1122                 data = (*priv->rxqs)[i];
1123                 if (!rte_flow_dynf_metadata_avail()) {
1124                         data->dynf_meta = 0;
1125                         data->flow_meta_mask = 0;
1126                         data->flow_meta_offset = -1;
1127                 } else {
1128                         data->dynf_meta = 1;
1129                         data->flow_meta_mask = rte_flow_dynf_metadata_mask;
1130                         data->flow_meta_offset = rte_flow_dynf_metadata_offs;
1131                 }
1132         }
1133 }
1134
1135 /*
1136  * return a pointer to the desired action in the list of actions.
1137  *
1138  * @param[in] actions
1139  *   The list of actions to search the action in.
1140  * @param[in] action
1141  *   The action to find.
1142  *
1143  * @return
1144  *   Pointer to the action in the list, if found. NULL otherwise.
1145  */
1146 const struct rte_flow_action *
1147 mlx5_flow_find_action(const struct rte_flow_action *actions,
1148                       enum rte_flow_action_type action)
1149 {
1150         if (actions == NULL)
1151                 return NULL;
1152         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++)
1153                 if (actions->type == action)
1154                         return actions;
1155         return NULL;
1156 }
1157
1158 /*
1159  * Validate the flag action.
1160  *
1161  * @param[in] action_flags
1162  *   Bit-fields that holds the actions detected until now.
1163  * @param[in] attr
1164  *   Attributes of flow that includes this action.
1165  * @param[out] error
1166  *   Pointer to error structure.
1167  *
1168  * @return
1169  *   0 on success, a negative errno value otherwise and rte_errno is set.
1170  */
1171 int
1172 mlx5_flow_validate_action_flag(uint64_t action_flags,
1173                                const struct rte_flow_attr *attr,
1174                                struct rte_flow_error *error)
1175 {
1176         if (action_flags & MLX5_FLOW_ACTION_MARK)
1177                 return rte_flow_error_set(error, EINVAL,
1178                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1179                                           "can't mark and flag in same flow");
1180         if (action_flags & MLX5_FLOW_ACTION_FLAG)
1181                 return rte_flow_error_set(error, EINVAL,
1182                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1183                                           "can't have 2 flag"
1184                                           " actions in same flow");
1185         if (attr->egress)
1186                 return rte_flow_error_set(error, ENOTSUP,
1187                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1188                                           "flag action not supported for "
1189                                           "egress");
1190         return 0;
1191 }
1192
1193 /*
1194  * Validate the mark action.
1195  *
1196  * @param[in] action
1197  *   Pointer to the queue action.
1198  * @param[in] action_flags
1199  *   Bit-fields that holds the actions detected until now.
1200  * @param[in] attr
1201  *   Attributes of flow that includes this action.
1202  * @param[out] error
1203  *   Pointer to error structure.
1204  *
1205  * @return
1206  *   0 on success, a negative errno value otherwise and rte_errno is set.
1207  */
1208 int
1209 mlx5_flow_validate_action_mark(const struct rte_flow_action *action,
1210                                uint64_t action_flags,
1211                                const struct rte_flow_attr *attr,
1212                                struct rte_flow_error *error)
1213 {
1214         const struct rte_flow_action_mark *mark = action->conf;
1215
1216         if (!mark)
1217                 return rte_flow_error_set(error, EINVAL,
1218                                           RTE_FLOW_ERROR_TYPE_ACTION,
1219                                           action,
1220                                           "configuration cannot be null");
1221         if (mark->id >= MLX5_FLOW_MARK_MAX)
1222                 return rte_flow_error_set(error, EINVAL,
1223                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1224                                           &mark->id,
1225                                           "mark id must in 0 <= id < "
1226                                           RTE_STR(MLX5_FLOW_MARK_MAX));
1227         if (action_flags & MLX5_FLOW_ACTION_FLAG)
1228                 return rte_flow_error_set(error, EINVAL,
1229                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1230                                           "can't flag and mark in same flow");
1231         if (action_flags & MLX5_FLOW_ACTION_MARK)
1232                 return rte_flow_error_set(error, EINVAL,
1233                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1234                                           "can't have 2 mark actions in same"
1235                                           " flow");
1236         if (attr->egress)
1237                 return rte_flow_error_set(error, ENOTSUP,
1238                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1239                                           "mark action not supported for "
1240                                           "egress");
1241         return 0;
1242 }
1243
1244 /*
1245  * Validate the drop action.
1246  *
1247  * @param[in] action_flags
1248  *   Bit-fields that holds the actions detected until now.
1249  * @param[in] attr
1250  *   Attributes of flow that includes this action.
1251  * @param[out] error
1252  *   Pointer to error structure.
1253  *
1254  * @return
1255  *   0 on success, a negative errno value otherwise and rte_errno is set.
1256  */
1257 int
1258 mlx5_flow_validate_action_drop(uint64_t action_flags __rte_unused,
1259                                const struct rte_flow_attr *attr,
1260                                struct rte_flow_error *error)
1261 {
1262         if (attr->egress)
1263                 return rte_flow_error_set(error, ENOTSUP,
1264                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1265                                           "drop action not supported for "
1266                                           "egress");
1267         return 0;
1268 }
1269
1270 /*
1271  * Validate the queue action.
1272  *
1273  * @param[in] action
1274  *   Pointer to the queue action.
1275  * @param[in] action_flags
1276  *   Bit-fields that holds the actions detected until now.
1277  * @param[in] dev
1278  *   Pointer to the Ethernet device structure.
1279  * @param[in] attr
1280  *   Attributes of flow that includes this action.
1281  * @param[out] error
1282  *   Pointer to error structure.
1283  *
1284  * @return
1285  *   0 on success, a negative errno value otherwise and rte_errno is set.
1286  */
1287 int
1288 mlx5_flow_validate_action_queue(const struct rte_flow_action *action,
1289                                 uint64_t action_flags,
1290                                 struct rte_eth_dev *dev,
1291                                 const struct rte_flow_attr *attr,
1292                                 struct rte_flow_error *error)
1293 {
1294         struct mlx5_priv *priv = dev->data->dev_private;
1295         const struct rte_flow_action_queue *queue = action->conf;
1296
1297         if (action_flags & MLX5_FLOW_FATE_ACTIONS)
1298                 return rte_flow_error_set(error, EINVAL,
1299                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1300                                           "can't have 2 fate actions in"
1301                                           " same flow");
1302         if (!priv->rxqs_n)
1303                 return rte_flow_error_set(error, EINVAL,
1304                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1305                                           NULL, "No Rx queues configured");
1306         if (queue->index >= priv->rxqs_n)
1307                 return rte_flow_error_set(error, EINVAL,
1308                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1309                                           &queue->index,
1310                                           "queue index out of range");
1311         if (!(*priv->rxqs)[queue->index])
1312                 return rte_flow_error_set(error, EINVAL,
1313                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1314                                           &queue->index,
1315                                           "queue is not configured");
1316         if (attr->egress)
1317                 return rte_flow_error_set(error, ENOTSUP,
1318                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1319                                           "queue action not supported for "
1320                                           "egress");
1321         return 0;
1322 }
1323
1324 /*
1325  * Validate the rss action.
1326  *
1327  * @param[in] action
1328  *   Pointer to the queue action.
1329  * @param[in] action_flags
1330  *   Bit-fields that holds the actions detected until now.
1331  * @param[in] dev
1332  *   Pointer to the Ethernet device structure.
1333  * @param[in] attr
1334  *   Attributes of flow that includes this action.
1335  * @param[in] item_flags
1336  *   Items that were detected.
1337  * @param[out] error
1338  *   Pointer to error structure.
1339  *
1340  * @return
1341  *   0 on success, a negative errno value otherwise and rte_errno is set.
1342  */
1343 int
1344 mlx5_flow_validate_action_rss(const struct rte_flow_action *action,
1345                               uint64_t action_flags,
1346                               struct rte_eth_dev *dev,
1347                               const struct rte_flow_attr *attr,
1348                               uint64_t item_flags,
1349                               struct rte_flow_error *error)
1350 {
1351         struct mlx5_priv *priv = dev->data->dev_private;
1352         const struct rte_flow_action_rss *rss = action->conf;
1353         int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1354         unsigned int i;
1355
1356         if (action_flags & MLX5_FLOW_FATE_ACTIONS)
1357                 return rte_flow_error_set(error, EINVAL,
1358                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1359                                           "can't have 2 fate actions"
1360                                           " in same flow");
1361         if (rss->func != RTE_ETH_HASH_FUNCTION_DEFAULT &&
1362             rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ)
1363                 return rte_flow_error_set(error, ENOTSUP,
1364                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1365                                           &rss->func,
1366                                           "RSS hash function not supported");
1367 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
1368         if (rss->level > 2)
1369 #else
1370         if (rss->level > 1)
1371 #endif
1372                 return rte_flow_error_set(error, ENOTSUP,
1373                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1374                                           &rss->level,
1375                                           "tunnel RSS is not supported");
1376         /* allow RSS key_len 0 in case of NULL (default) RSS key. */
1377         if (rss->key_len == 0 && rss->key != NULL)
1378                 return rte_flow_error_set(error, ENOTSUP,
1379                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1380                                           &rss->key_len,
1381                                           "RSS hash key length 0");
1382         if (rss->key_len > 0 && rss->key_len < MLX5_RSS_HASH_KEY_LEN)
1383                 return rte_flow_error_set(error, ENOTSUP,
1384                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1385                                           &rss->key_len,
1386                                           "RSS hash key too small");
1387         if (rss->key_len > MLX5_RSS_HASH_KEY_LEN)
1388                 return rte_flow_error_set(error, ENOTSUP,
1389                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1390                                           &rss->key_len,
1391                                           "RSS hash key too large");
1392         if (rss->queue_num > priv->config.ind_table_max_size)
1393                 return rte_flow_error_set(error, ENOTSUP,
1394                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1395                                           &rss->queue_num,
1396                                           "number of queues too large");
1397         if (rss->types & MLX5_RSS_HF_MASK)
1398                 return rte_flow_error_set(error, ENOTSUP,
1399                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1400                                           &rss->types,
1401                                           "some RSS protocols are not"
1402                                           " supported");
1403         if ((rss->types & (ETH_RSS_L3_SRC_ONLY | ETH_RSS_L3_DST_ONLY)) &&
1404             !(rss->types & ETH_RSS_IP))
1405                 return rte_flow_error_set(error, EINVAL,
1406                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
1407                                           "L3 partial RSS requested but L3 RSS"
1408                                           " type not specified");
1409         if ((rss->types & (ETH_RSS_L4_SRC_ONLY | ETH_RSS_L4_DST_ONLY)) &&
1410             !(rss->types & (ETH_RSS_UDP | ETH_RSS_TCP)))
1411                 return rte_flow_error_set(error, EINVAL,
1412                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
1413                                           "L4 partial RSS requested but L4 RSS"
1414                                           " type not specified");
1415         if (!priv->rxqs_n)
1416                 return rte_flow_error_set(error, EINVAL,
1417                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1418                                           NULL, "No Rx queues configured");
1419         if (!rss->queue_num)
1420                 return rte_flow_error_set(error, EINVAL,
1421                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1422                                           NULL, "No queues configured");
1423         for (i = 0; i != rss->queue_num; ++i) {
1424                 if (rss->queue[i] >= priv->rxqs_n)
1425                         return rte_flow_error_set
1426                                 (error, EINVAL,
1427                                  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1428                                  &rss->queue[i], "queue index out of range");
1429                 if (!(*priv->rxqs)[rss->queue[i]])
1430                         return rte_flow_error_set
1431                                 (error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1432                                  &rss->queue[i], "queue is not configured");
1433         }
1434         if (attr->egress)
1435                 return rte_flow_error_set(error, ENOTSUP,
1436                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1437                                           "rss action not supported for "
1438                                           "egress");
1439         if (rss->level > 1 && !tunnel)
1440                 return rte_flow_error_set(error, EINVAL,
1441                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
1442                                           "inner RSS is not supported for "
1443                                           "non-tunnel flows");
1444         if ((item_flags & MLX5_FLOW_LAYER_ECPRI) &&
1445             !(item_flags & MLX5_FLOW_LAYER_INNER_L4_UDP)) {
1446                 return rte_flow_error_set(error, EINVAL,
1447                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
1448                                           "RSS on eCPRI is not supported now");
1449         }
1450         return 0;
1451 }
1452
1453 /*
1454  * Validate the default miss action.
1455  *
1456  * @param[in] action_flags
1457  *   Bit-fields that holds the actions detected until now.
1458  * @param[out] error
1459  *   Pointer to error structure.
1460  *
1461  * @return
1462  *   0 on success, a negative errno value otherwise and rte_errno is set.
1463  */
1464 int
1465 mlx5_flow_validate_action_default_miss(uint64_t action_flags,
1466                                 const struct rte_flow_attr *attr,
1467                                 struct rte_flow_error *error)
1468 {
1469         if (action_flags & MLX5_FLOW_FATE_ACTIONS)
1470                 return rte_flow_error_set(error, EINVAL,
1471                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1472                                           "can't have 2 fate actions in"
1473                                           " same flow");
1474         if (attr->egress)
1475                 return rte_flow_error_set(error, ENOTSUP,
1476                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1477                                           "default miss action not supported "
1478                                           "for egress");
1479         if (attr->group)
1480                 return rte_flow_error_set(error, ENOTSUP,
1481                                           RTE_FLOW_ERROR_TYPE_ATTR_GROUP, NULL,
1482                                           "only group 0 is supported");
1483         if (attr->transfer)
1484                 return rte_flow_error_set(error, ENOTSUP,
1485                                           RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
1486                                           NULL, "transfer is not supported");
1487         return 0;
1488 }
1489
1490 /*
1491  * Validate the count action.
1492  *
1493  * @param[in] dev
1494  *   Pointer to the Ethernet device structure.
1495  * @param[in] attr
1496  *   Attributes of flow that includes this action.
1497  * @param[out] error
1498  *   Pointer to error structure.
1499  *
1500  * @return
1501  *   0 on success, a negative errno value otherwise and rte_errno is set.
1502  */
1503 int
1504 mlx5_flow_validate_action_count(struct rte_eth_dev *dev __rte_unused,
1505                                 const struct rte_flow_attr *attr,
1506                                 struct rte_flow_error *error)
1507 {
1508         if (attr->egress)
1509                 return rte_flow_error_set(error, ENOTSUP,
1510                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1511                                           "count action not supported for "
1512                                           "egress");
1513         return 0;
1514 }
1515
1516 /**
1517  * Verify the @p attributes will be correctly understood by the NIC and store
1518  * them in the @p flow if everything is correct.
1519  *
1520  * @param[in] dev
1521  *   Pointer to the Ethernet device structure.
1522  * @param[in] attributes
1523  *   Pointer to flow attributes
1524  * @param[out] error
1525  *   Pointer to error structure.
1526  *
1527  * @return
1528  *   0 on success, a negative errno value otherwise and rte_errno is set.
1529  */
1530 int
1531 mlx5_flow_validate_attributes(struct rte_eth_dev *dev,
1532                               const struct rte_flow_attr *attributes,
1533                               struct rte_flow_error *error)
1534 {
1535         struct mlx5_priv *priv = dev->data->dev_private;
1536         uint32_t priority_max = priv->config.flow_prio - 1;
1537
1538         if (attributes->group)
1539                 return rte_flow_error_set(error, ENOTSUP,
1540                                           RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
1541                                           NULL, "groups is not supported");
1542         if (attributes->priority != MLX5_FLOW_PRIO_RSVD &&
1543             attributes->priority >= priority_max)
1544                 return rte_flow_error_set(error, ENOTSUP,
1545                                           RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
1546                                           NULL, "priority out of range");
1547         if (attributes->egress)
1548                 return rte_flow_error_set(error, ENOTSUP,
1549                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1550                                           "egress is not supported");
1551         if (attributes->transfer && !priv->config.dv_esw_en)
1552                 return rte_flow_error_set(error, ENOTSUP,
1553                                           RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
1554                                           NULL, "transfer is not supported");
1555         if (!attributes->ingress)
1556                 return rte_flow_error_set(error, EINVAL,
1557                                           RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
1558                                           NULL,
1559                                           "ingress attribute is mandatory");
1560         return 0;
1561 }
1562
1563 /**
1564  * Validate ICMP6 item.
1565  *
1566  * @param[in] item
1567  *   Item specification.
1568  * @param[in] item_flags
1569  *   Bit-fields that holds the items detected until now.
1570  * @param[out] error
1571  *   Pointer to error structure.
1572  *
1573  * @return
1574  *   0 on success, a negative errno value otherwise and rte_errno is set.
1575  */
1576 int
1577 mlx5_flow_validate_item_icmp6(const struct rte_flow_item *item,
1578                                uint64_t item_flags,
1579                                uint8_t target_protocol,
1580                                struct rte_flow_error *error)
1581 {
1582         const struct rte_flow_item_icmp6 *mask = item->mask;
1583         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1584         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
1585                                       MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1586         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
1587                                       MLX5_FLOW_LAYER_OUTER_L4;
1588         int ret;
1589
1590         if (target_protocol != 0xFF && target_protocol != IPPROTO_ICMPV6)
1591                 return rte_flow_error_set(error, EINVAL,
1592                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1593                                           "protocol filtering not compatible"
1594                                           " with ICMP6 layer");
1595         if (!(item_flags & l3m))
1596                 return rte_flow_error_set(error, EINVAL,
1597                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1598                                           "IPv6 is mandatory to filter on"
1599                                           " ICMP6");
1600         if (item_flags & l4m)
1601                 return rte_flow_error_set(error, EINVAL,
1602                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1603                                           "multiple L4 layers not supported");
1604         if (!mask)
1605                 mask = &rte_flow_item_icmp6_mask;
1606         ret = mlx5_flow_item_acceptable
1607                 (item, (const uint8_t *)mask,
1608                  (const uint8_t *)&rte_flow_item_icmp6_mask,
1609                  sizeof(struct rte_flow_item_icmp6),
1610                  MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
1611         if (ret < 0)
1612                 return ret;
1613         return 0;
1614 }
1615
1616 /**
1617  * Validate ICMP item.
1618  *
1619  * @param[in] item
1620  *   Item specification.
1621  * @param[in] item_flags
1622  *   Bit-fields that holds the items detected until now.
1623  * @param[out] error
1624  *   Pointer to error structure.
1625  *
1626  * @return
1627  *   0 on success, a negative errno value otherwise and rte_errno is set.
1628  */
1629 int
1630 mlx5_flow_validate_item_icmp(const struct rte_flow_item *item,
1631                              uint64_t item_flags,
1632                              uint8_t target_protocol,
1633                              struct rte_flow_error *error)
1634 {
1635         const struct rte_flow_item_icmp *mask = item->mask;
1636         const struct rte_flow_item_icmp nic_mask = {
1637                 .hdr.icmp_type = 0xff,
1638                 .hdr.icmp_code = 0xff,
1639                 .hdr.icmp_ident = RTE_BE16(0xffff),
1640                 .hdr.icmp_seq_nb = RTE_BE16(0xffff),
1641         };
1642         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1643         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
1644                                       MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1645         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
1646                                       MLX5_FLOW_LAYER_OUTER_L4;
1647         int ret;
1648
1649         if (target_protocol != 0xFF && target_protocol != IPPROTO_ICMP)
1650                 return rte_flow_error_set(error, EINVAL,
1651                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1652                                           "protocol filtering not compatible"
1653                                           " with ICMP layer");
1654         if (!(item_flags & l3m))
1655                 return rte_flow_error_set(error, EINVAL,
1656                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1657                                           "IPv4 is mandatory to filter"
1658                                           " on ICMP");
1659         if (item_flags & l4m)
1660                 return rte_flow_error_set(error, EINVAL,
1661                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1662                                           "multiple L4 layers not supported");
1663         if (!mask)
1664                 mask = &nic_mask;
1665         ret = mlx5_flow_item_acceptable
1666                 (item, (const uint8_t *)mask,
1667                  (const uint8_t *)&nic_mask,
1668                  sizeof(struct rte_flow_item_icmp),
1669                  MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
1670         if (ret < 0)
1671                 return ret;
1672         return 0;
1673 }
1674
1675 /**
1676  * Validate Ethernet item.
1677  *
1678  * @param[in] item
1679  *   Item specification.
1680  * @param[in] item_flags
1681  *   Bit-fields that holds the items detected until now.
1682  * @param[out] error
1683  *   Pointer to error structure.
1684  *
1685  * @return
1686  *   0 on success, a negative errno value otherwise and rte_errno is set.
1687  */
1688 int
1689 mlx5_flow_validate_item_eth(const struct rte_flow_item *item,
1690                             uint64_t item_flags,
1691                             struct rte_flow_error *error)
1692 {
1693         const struct rte_flow_item_eth *mask = item->mask;
1694         const struct rte_flow_item_eth nic_mask = {
1695                 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1696                 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1697                 .type = RTE_BE16(0xffff),
1698         };
1699         int ret;
1700         int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1701         const uint64_t ethm = tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
1702                                        MLX5_FLOW_LAYER_OUTER_L2;
1703
1704         if (item_flags & ethm)
1705                 return rte_flow_error_set(error, ENOTSUP,
1706                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1707                                           "multiple L2 layers not supported");
1708         if ((!tunnel && (item_flags & MLX5_FLOW_LAYER_OUTER_L3)) ||
1709             (tunnel && (item_flags & MLX5_FLOW_LAYER_INNER_L3)))
1710                 return rte_flow_error_set(error, EINVAL,
1711                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1712                                           "L2 layer should not follow "
1713                                           "L3 layers");
1714         if ((!tunnel && (item_flags & MLX5_FLOW_LAYER_OUTER_VLAN)) ||
1715             (tunnel && (item_flags & MLX5_FLOW_LAYER_INNER_VLAN)))
1716                 return rte_flow_error_set(error, EINVAL,
1717                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1718                                           "L2 layer should not follow VLAN");
1719         if (!mask)
1720                 mask = &rte_flow_item_eth_mask;
1721         ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
1722                                         (const uint8_t *)&nic_mask,
1723                                         sizeof(struct rte_flow_item_eth),
1724                                         MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
1725         return ret;
1726 }
1727
1728 /**
1729  * Validate VLAN item.
1730  *
1731  * @param[in] item
1732  *   Item specification.
1733  * @param[in] item_flags
1734  *   Bit-fields that holds the items detected until now.
1735  * @param[in] dev
1736  *   Ethernet device flow is being created on.
1737  * @param[out] error
1738  *   Pointer to error structure.
1739  *
1740  * @return
1741  *   0 on success, a negative errno value otherwise and rte_errno is set.
1742  */
1743 int
1744 mlx5_flow_validate_item_vlan(const struct rte_flow_item *item,
1745                              uint64_t item_flags,
1746                              struct rte_eth_dev *dev,
1747                              struct rte_flow_error *error)
1748 {
1749         const struct rte_flow_item_vlan *spec = item->spec;
1750         const struct rte_flow_item_vlan *mask = item->mask;
1751         const struct rte_flow_item_vlan nic_mask = {
1752                 .tci = RTE_BE16(UINT16_MAX),
1753                 .inner_type = RTE_BE16(UINT16_MAX),
1754         };
1755         uint16_t vlan_tag = 0;
1756         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1757         int ret;
1758         const uint64_t l34m = tunnel ? (MLX5_FLOW_LAYER_INNER_L3 |
1759                                         MLX5_FLOW_LAYER_INNER_L4) :
1760                                        (MLX5_FLOW_LAYER_OUTER_L3 |
1761                                         MLX5_FLOW_LAYER_OUTER_L4);
1762         const uint64_t vlanm = tunnel ? MLX5_FLOW_LAYER_INNER_VLAN :
1763                                         MLX5_FLOW_LAYER_OUTER_VLAN;
1764
1765         if (item_flags & vlanm)
1766                 return rte_flow_error_set(error, EINVAL,
1767                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1768                                           "multiple VLAN layers not supported");
1769         else if ((item_flags & l34m) != 0)
1770                 return rte_flow_error_set(error, EINVAL,
1771                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1772                                           "VLAN cannot follow L3/L4 layer");
1773         if (!mask)
1774                 mask = &rte_flow_item_vlan_mask;
1775         ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
1776                                         (const uint8_t *)&nic_mask,
1777                                         sizeof(struct rte_flow_item_vlan),
1778                                         MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
1779         if (ret)
1780                 return ret;
1781         if (!tunnel && mask->tci != RTE_BE16(0x0fff)) {
1782                 struct mlx5_priv *priv = dev->data->dev_private;
1783
1784                 if (priv->vmwa_context) {
1785                         /*
1786                          * Non-NULL context means we have a virtual machine
1787                          * and SR-IOV enabled, we have to create VLAN interface
1788                          * to make hypervisor to setup E-Switch vport
1789                          * context correctly. We avoid creating the multiple
1790                          * VLAN interfaces, so we cannot support VLAN tag mask.
1791                          */
1792                         return rte_flow_error_set(error, EINVAL,
1793                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1794                                                   item,
1795                                                   "VLAN tag mask is not"
1796                                                   " supported in virtual"
1797                                                   " environment");
1798                 }
1799         }
1800         if (spec) {
1801                 vlan_tag = spec->tci;
1802                 vlan_tag &= mask->tci;
1803         }
1804         /*
1805          * From verbs perspective an empty VLAN is equivalent
1806          * to a packet without VLAN layer.
1807          */
1808         if (!vlan_tag)
1809                 return rte_flow_error_set(error, EINVAL,
1810                                           RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
1811                                           item->spec,
1812                                           "VLAN cannot be empty");
1813         return 0;
1814 }
1815
1816 /**
1817  * Validate IPV4 item.
1818  *
1819  * @param[in] item
1820  *   Item specification.
1821  * @param[in] item_flags
1822  *   Bit-fields that holds the items detected until now.
1823  * @param[in] last_item
1824  *   Previous validated item in the pattern items.
1825  * @param[in] ether_type
1826  *   Type in the ethernet layer header (including dot1q).
1827  * @param[in] acc_mask
1828  *   Acceptable mask, if NULL default internal default mask
1829  *   will be used to check whether item fields are supported.
1830  * @param[in] range_accepted
1831  *   True if range of values is accepted for specific fields, false otherwise.
1832  * @param[out] error
1833  *   Pointer to error structure.
1834  *
1835  * @return
1836  *   0 on success, a negative errno value otherwise and rte_errno is set.
1837  */
1838 int
1839 mlx5_flow_validate_item_ipv4(const struct rte_flow_item *item,
1840                              uint64_t item_flags,
1841                              uint64_t last_item,
1842                              uint16_t ether_type,
1843                              const struct rte_flow_item_ipv4 *acc_mask,
1844                              bool range_accepted,
1845                              struct rte_flow_error *error)
1846 {
1847         const struct rte_flow_item_ipv4 *mask = item->mask;
1848         const struct rte_flow_item_ipv4 *spec = item->spec;
1849         const struct rte_flow_item_ipv4 nic_mask = {
1850                 .hdr = {
1851                         .src_addr = RTE_BE32(0xffffffff),
1852                         .dst_addr = RTE_BE32(0xffffffff),
1853                         .type_of_service = 0xff,
1854                         .next_proto_id = 0xff,
1855                 },
1856         };
1857         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1858         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
1859                                       MLX5_FLOW_LAYER_OUTER_L3;
1860         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
1861                                       MLX5_FLOW_LAYER_OUTER_L4;
1862         int ret;
1863         uint8_t next_proto = 0xFF;
1864         const uint64_t l2_vlan = (MLX5_FLOW_LAYER_L2 |
1865                                   MLX5_FLOW_LAYER_OUTER_VLAN |
1866                                   MLX5_FLOW_LAYER_INNER_VLAN);
1867
1868         if ((last_item & l2_vlan) && ether_type &&
1869             ether_type != RTE_ETHER_TYPE_IPV4)
1870                 return rte_flow_error_set(error, EINVAL,
1871                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1872                                           "IPv4 cannot follow L2/VLAN layer "
1873                                           "which ether type is not IPv4");
1874         if (item_flags & MLX5_FLOW_LAYER_IPIP) {
1875                 if (mask && spec)
1876                         next_proto = mask->hdr.next_proto_id &
1877                                      spec->hdr.next_proto_id;
1878                 if (next_proto == IPPROTO_IPIP || next_proto == IPPROTO_IPV6)
1879                         return rte_flow_error_set(error, EINVAL,
1880                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1881                                                   item,
1882                                                   "multiple tunnel "
1883                                                   "not supported");
1884         }
1885         if (item_flags & MLX5_FLOW_LAYER_IPV6_ENCAP)
1886                 return rte_flow_error_set(error, EINVAL,
1887                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1888                                           "wrong tunnel type - IPv6 specified "
1889                                           "but IPv4 item provided");
1890         if (item_flags & l3m)
1891                 return rte_flow_error_set(error, ENOTSUP,
1892                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1893                                           "multiple L3 layers not supported");
1894         else if (item_flags & l4m)
1895                 return rte_flow_error_set(error, EINVAL,
1896                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1897                                           "L3 cannot follow an L4 layer.");
1898         else if ((item_flags & MLX5_FLOW_LAYER_NVGRE) &&
1899                   !(item_flags & MLX5_FLOW_LAYER_INNER_L2))
1900                 return rte_flow_error_set(error, EINVAL,
1901                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1902                                           "L3 cannot follow an NVGRE layer.");
1903         if (!mask)
1904                 mask = &rte_flow_item_ipv4_mask;
1905         else if (mask->hdr.next_proto_id != 0 &&
1906                  mask->hdr.next_proto_id != 0xff)
1907                 return rte_flow_error_set(error, EINVAL,
1908                                           RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
1909                                           "partial mask is not supported"
1910                                           " for protocol");
1911         ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
1912                                         acc_mask ? (const uint8_t *)acc_mask
1913                                                  : (const uint8_t *)&nic_mask,
1914                                         sizeof(struct rte_flow_item_ipv4),
1915                                         range_accepted, error);
1916         if (ret < 0)
1917                 return ret;
1918         return 0;
1919 }
1920
1921 /**
1922  * Validate IPV6 item.
1923  *
1924  * @param[in] item
1925  *   Item specification.
1926  * @param[in] item_flags
1927  *   Bit-fields that holds the items detected until now.
1928  * @param[in] last_item
1929  *   Previous validated item in the pattern items.
1930  * @param[in] ether_type
1931  *   Type in the ethernet layer header (including dot1q).
1932  * @param[in] acc_mask
1933  *   Acceptable mask, if NULL default internal default mask
1934  *   will be used to check whether item fields are supported.
1935  * @param[out] error
1936  *   Pointer to error structure.
1937  *
1938  * @return
1939  *   0 on success, a negative errno value otherwise and rte_errno is set.
1940  */
1941 int
1942 mlx5_flow_validate_item_ipv6(const struct rte_flow_item *item,
1943                              uint64_t item_flags,
1944                              uint64_t last_item,
1945                              uint16_t ether_type,
1946                              const struct rte_flow_item_ipv6 *acc_mask,
1947                              struct rte_flow_error *error)
1948 {
1949         const struct rte_flow_item_ipv6 *mask = item->mask;
1950         const struct rte_flow_item_ipv6 *spec = item->spec;
1951         const struct rte_flow_item_ipv6 nic_mask = {
1952                 .hdr = {
1953                         .src_addr =
1954                                 "\xff\xff\xff\xff\xff\xff\xff\xff"
1955                                 "\xff\xff\xff\xff\xff\xff\xff\xff",
1956                         .dst_addr =
1957                                 "\xff\xff\xff\xff\xff\xff\xff\xff"
1958                                 "\xff\xff\xff\xff\xff\xff\xff\xff",
1959                         .vtc_flow = RTE_BE32(0xffffffff),
1960                         .proto = 0xff,
1961                 },
1962         };
1963         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1964         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
1965                                       MLX5_FLOW_LAYER_OUTER_L3;
1966         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
1967                                       MLX5_FLOW_LAYER_OUTER_L4;
1968         int ret;
1969         uint8_t next_proto = 0xFF;
1970         const uint64_t l2_vlan = (MLX5_FLOW_LAYER_L2 |
1971                                   MLX5_FLOW_LAYER_OUTER_VLAN |
1972                                   MLX5_FLOW_LAYER_INNER_VLAN);
1973
1974         if ((last_item & l2_vlan) && ether_type &&
1975             ether_type != RTE_ETHER_TYPE_IPV6)
1976                 return rte_flow_error_set(error, EINVAL,
1977                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1978                                           "IPv6 cannot follow L2/VLAN layer "
1979                                           "which ether type is not IPv6");
1980         if (mask && mask->hdr.proto == UINT8_MAX && spec)
1981                 next_proto = spec->hdr.proto;
1982         if (item_flags & MLX5_FLOW_LAYER_IPV6_ENCAP) {
1983                 if (next_proto == IPPROTO_IPIP || next_proto == IPPROTO_IPV6)
1984                         return rte_flow_error_set(error, EINVAL,
1985                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1986                                                   item,
1987                                                   "multiple tunnel "
1988                                                   "not supported");
1989         }
1990         if (next_proto == IPPROTO_HOPOPTS  ||
1991             next_proto == IPPROTO_ROUTING  ||
1992             next_proto == IPPROTO_FRAGMENT ||
1993             next_proto == IPPROTO_ESP      ||
1994             next_proto == IPPROTO_AH       ||
1995             next_proto == IPPROTO_DSTOPTS)
1996                 return rte_flow_error_set(error, EINVAL,
1997                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1998                                           "IPv6 proto (next header) should "
1999                                           "not be set as extension header");
2000         if (item_flags & MLX5_FLOW_LAYER_IPIP)
2001                 return rte_flow_error_set(error, EINVAL,
2002                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2003                                           "wrong tunnel type - IPv4 specified "
2004                                           "but IPv6 item provided");
2005         if (item_flags & l3m)
2006                 return rte_flow_error_set(error, ENOTSUP,
2007                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2008                                           "multiple L3 layers not supported");
2009         else if (item_flags & l4m)
2010                 return rte_flow_error_set(error, EINVAL,
2011                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2012                                           "L3 cannot follow an L4 layer.");
2013         else if ((item_flags & MLX5_FLOW_LAYER_NVGRE) &&
2014                   !(item_flags & MLX5_FLOW_LAYER_INNER_L2))
2015                 return rte_flow_error_set(error, EINVAL,
2016                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2017                                           "L3 cannot follow an NVGRE layer.");
2018         if (!mask)
2019                 mask = &rte_flow_item_ipv6_mask;
2020         ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
2021                                         acc_mask ? (const uint8_t *)acc_mask
2022                                                  : (const uint8_t *)&nic_mask,
2023                                         sizeof(struct rte_flow_item_ipv6),
2024                                         MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2025         if (ret < 0)
2026                 return ret;
2027         return 0;
2028 }
2029
2030 /**
2031  * Validate UDP item.
2032  *
2033  * @param[in] item
2034  *   Item specification.
2035  * @param[in] item_flags
2036  *   Bit-fields that holds the items detected until now.
2037  * @param[in] target_protocol
2038  *   The next protocol in the previous item.
2039  * @param[in] flow_mask
2040  *   mlx5 flow-specific (DV, verbs, etc.) supported header fields mask.
2041  * @param[out] error
2042  *   Pointer to error structure.
2043  *
2044  * @return
2045  *   0 on success, a negative errno value otherwise and rte_errno is set.
2046  */
2047 int
2048 mlx5_flow_validate_item_udp(const struct rte_flow_item *item,
2049                             uint64_t item_flags,
2050                             uint8_t target_protocol,
2051                             struct rte_flow_error *error)
2052 {
2053         const struct rte_flow_item_udp *mask = item->mask;
2054         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2055         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
2056                                       MLX5_FLOW_LAYER_OUTER_L3;
2057         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2058                                       MLX5_FLOW_LAYER_OUTER_L4;
2059         int ret;
2060
2061         if (target_protocol != 0xff && target_protocol != IPPROTO_UDP)
2062                 return rte_flow_error_set(error, EINVAL,
2063                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2064                                           "protocol filtering not compatible"
2065                                           " with UDP layer");
2066         if (!(item_flags & l3m))
2067                 return rte_flow_error_set(error, EINVAL,
2068                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2069                                           "L3 is mandatory to filter on L4");
2070         if (item_flags & l4m)
2071                 return rte_flow_error_set(error, EINVAL,
2072                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2073                                           "multiple L4 layers not supported");
2074         if (!mask)
2075                 mask = &rte_flow_item_udp_mask;
2076         ret = mlx5_flow_item_acceptable
2077                 (item, (const uint8_t *)mask,
2078                  (const uint8_t *)&rte_flow_item_udp_mask,
2079                  sizeof(struct rte_flow_item_udp), MLX5_ITEM_RANGE_NOT_ACCEPTED,
2080                  error);
2081         if (ret < 0)
2082                 return ret;
2083         return 0;
2084 }
2085
2086 /**
2087  * Validate TCP item.
2088  *
2089  * @param[in] item
2090  *   Item specification.
2091  * @param[in] item_flags
2092  *   Bit-fields that holds the items detected until now.
2093  * @param[in] target_protocol
2094  *   The next protocol in the previous item.
2095  * @param[out] error
2096  *   Pointer to error structure.
2097  *
2098  * @return
2099  *   0 on success, a negative errno value otherwise and rte_errno is set.
2100  */
2101 int
2102 mlx5_flow_validate_item_tcp(const struct rte_flow_item *item,
2103                             uint64_t item_flags,
2104                             uint8_t target_protocol,
2105                             const struct rte_flow_item_tcp *flow_mask,
2106                             struct rte_flow_error *error)
2107 {
2108         const struct rte_flow_item_tcp *mask = item->mask;
2109         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2110         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
2111                                       MLX5_FLOW_LAYER_OUTER_L3;
2112         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2113                                       MLX5_FLOW_LAYER_OUTER_L4;
2114         int ret;
2115
2116         MLX5_ASSERT(flow_mask);
2117         if (target_protocol != 0xff && target_protocol != IPPROTO_TCP)
2118                 return rte_flow_error_set(error, EINVAL,
2119                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2120                                           "protocol filtering not compatible"
2121                                           " with TCP layer");
2122         if (!(item_flags & l3m))
2123                 return rte_flow_error_set(error, EINVAL,
2124                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2125                                           "L3 is mandatory to filter on L4");
2126         if (item_flags & l4m)
2127                 return rte_flow_error_set(error, EINVAL,
2128                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2129                                           "multiple L4 layers not supported");
2130         if (!mask)
2131                 mask = &rte_flow_item_tcp_mask;
2132         ret = mlx5_flow_item_acceptable
2133                 (item, (const uint8_t *)mask,
2134                  (const uint8_t *)flow_mask,
2135                  sizeof(struct rte_flow_item_tcp), MLX5_ITEM_RANGE_NOT_ACCEPTED,
2136                  error);
2137         if (ret < 0)
2138                 return ret;
2139         return 0;
2140 }
2141
2142 /**
2143  * Validate VXLAN item.
2144  *
2145  * @param[in] item
2146  *   Item specification.
2147  * @param[in] item_flags
2148  *   Bit-fields that holds the items detected until now.
2149  * @param[in] target_protocol
2150  *   The next protocol in the previous item.
2151  * @param[out] error
2152  *   Pointer to error structure.
2153  *
2154  * @return
2155  *   0 on success, a negative errno value otherwise and rte_errno is set.
2156  */
2157 int
2158 mlx5_flow_validate_item_vxlan(const struct rte_flow_item *item,
2159                               uint64_t item_flags,
2160                               struct rte_flow_error *error)
2161 {
2162         const struct rte_flow_item_vxlan *spec = item->spec;
2163         const struct rte_flow_item_vxlan *mask = item->mask;
2164         int ret;
2165         union vni {
2166                 uint32_t vlan_id;
2167                 uint8_t vni[4];
2168         } id = { .vlan_id = 0, };
2169
2170
2171         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2172                 return rte_flow_error_set(error, ENOTSUP,
2173                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2174                                           "multiple tunnel layers not"
2175                                           " supported");
2176         /*
2177          * Verify only UDPv4 is present as defined in
2178          * https://tools.ietf.org/html/rfc7348
2179          */
2180         if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
2181                 return rte_flow_error_set(error, EINVAL,
2182                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2183                                           "no outer UDP layer found");
2184         if (!mask)
2185                 mask = &rte_flow_item_vxlan_mask;
2186         ret = mlx5_flow_item_acceptable
2187                 (item, (const uint8_t *)mask,
2188                  (const uint8_t *)&rte_flow_item_vxlan_mask,
2189                  sizeof(struct rte_flow_item_vxlan),
2190                  MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2191         if (ret < 0)
2192                 return ret;
2193         if (spec) {
2194                 memcpy(&id.vni[1], spec->vni, 3);
2195                 memcpy(&id.vni[1], mask->vni, 3);
2196         }
2197         if (!(item_flags & MLX5_FLOW_LAYER_OUTER))
2198                 return rte_flow_error_set(error, ENOTSUP,
2199                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2200                                           "VXLAN tunnel must be fully defined");
2201         return 0;
2202 }
2203
2204 /**
2205  * Validate VXLAN_GPE item.
2206  *
2207  * @param[in] item
2208  *   Item specification.
2209  * @param[in] item_flags
2210  *   Bit-fields that holds the items detected until now.
2211  * @param[in] priv
2212  *   Pointer to the private data structure.
2213  * @param[in] target_protocol
2214  *   The next protocol in the previous item.
2215  * @param[out] error
2216  *   Pointer to error structure.
2217  *
2218  * @return
2219  *   0 on success, a negative errno value otherwise and rte_errno is set.
2220  */
2221 int
2222 mlx5_flow_validate_item_vxlan_gpe(const struct rte_flow_item *item,
2223                                   uint64_t item_flags,
2224                                   struct rte_eth_dev *dev,
2225                                   struct rte_flow_error *error)
2226 {
2227         struct mlx5_priv *priv = dev->data->dev_private;
2228         const struct rte_flow_item_vxlan_gpe *spec = item->spec;
2229         const struct rte_flow_item_vxlan_gpe *mask = item->mask;
2230         int ret;
2231         union vni {
2232                 uint32_t vlan_id;
2233                 uint8_t vni[4];
2234         } id = { .vlan_id = 0, };
2235
2236         if (!priv->config.l3_vxlan_en)
2237                 return rte_flow_error_set(error, ENOTSUP,
2238                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2239                                           "L3 VXLAN is not enabled by device"
2240                                           " parameter and/or not configured in"
2241                                           " firmware");
2242         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2243                 return rte_flow_error_set(error, ENOTSUP,
2244                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2245                                           "multiple tunnel layers not"
2246                                           " supported");
2247         /*
2248          * Verify only UDPv4 is present as defined in
2249          * https://tools.ietf.org/html/rfc7348
2250          */
2251         if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
2252                 return rte_flow_error_set(error, EINVAL,
2253                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2254                                           "no outer UDP layer found");
2255         if (!mask)
2256                 mask = &rte_flow_item_vxlan_gpe_mask;
2257         ret = mlx5_flow_item_acceptable
2258                 (item, (const uint8_t *)mask,
2259                  (const uint8_t *)&rte_flow_item_vxlan_gpe_mask,
2260                  sizeof(struct rte_flow_item_vxlan_gpe),
2261                  MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2262         if (ret < 0)
2263                 return ret;
2264         if (spec) {
2265                 if (spec->protocol)
2266                         return rte_flow_error_set(error, ENOTSUP,
2267                                                   RTE_FLOW_ERROR_TYPE_ITEM,
2268                                                   item,
2269                                                   "VxLAN-GPE protocol"
2270                                                   " not supported");
2271                 memcpy(&id.vni[1], spec->vni, 3);
2272                 memcpy(&id.vni[1], mask->vni, 3);
2273         }
2274         if (!(item_flags & MLX5_FLOW_LAYER_OUTER))
2275                 return rte_flow_error_set(error, ENOTSUP,
2276                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2277                                           "VXLAN-GPE tunnel must be fully"
2278                                           " defined");
2279         return 0;
2280 }
2281 /**
2282  * Validate GRE Key item.
2283  *
2284  * @param[in] item
2285  *   Item specification.
2286  * @param[in] item_flags
2287  *   Bit flags to mark detected items.
2288  * @param[in] gre_item
2289  *   Pointer to gre_item
2290  * @param[out] error
2291  *   Pointer to error structure.
2292  *
2293  * @return
2294  *   0 on success, a negative errno value otherwise and rte_errno is set.
2295  */
2296 int
2297 mlx5_flow_validate_item_gre_key(const struct rte_flow_item *item,
2298                                 uint64_t item_flags,
2299                                 const struct rte_flow_item *gre_item,
2300                                 struct rte_flow_error *error)
2301 {
2302         const rte_be32_t *mask = item->mask;
2303         int ret = 0;
2304         rte_be32_t gre_key_default_mask = RTE_BE32(UINT32_MAX);
2305         const struct rte_flow_item_gre *gre_spec;
2306         const struct rte_flow_item_gre *gre_mask;
2307
2308         if (item_flags & MLX5_FLOW_LAYER_GRE_KEY)
2309                 return rte_flow_error_set(error, ENOTSUP,
2310                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2311                                           "Multiple GRE key not support");
2312         if (!(item_flags & MLX5_FLOW_LAYER_GRE))
2313                 return rte_flow_error_set(error, ENOTSUP,
2314                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2315                                           "No preceding GRE header");
2316         if (item_flags & MLX5_FLOW_LAYER_INNER)
2317                 return rte_flow_error_set(error, ENOTSUP,
2318                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2319                                           "GRE key following a wrong item");
2320         gre_mask = gre_item->mask;
2321         if (!gre_mask)
2322                 gre_mask = &rte_flow_item_gre_mask;
2323         gre_spec = gre_item->spec;
2324         if (gre_spec && (gre_mask->c_rsvd0_ver & RTE_BE16(0x2000)) &&
2325                          !(gre_spec->c_rsvd0_ver & RTE_BE16(0x2000)))
2326                 return rte_flow_error_set(error, EINVAL,
2327                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2328                                           "Key bit must be on");
2329
2330         if (!mask)
2331                 mask = &gre_key_default_mask;
2332         ret = mlx5_flow_item_acceptable
2333                 (item, (const uint8_t *)mask,
2334                  (const uint8_t *)&gre_key_default_mask,
2335                  sizeof(rte_be32_t), MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2336         return ret;
2337 }
2338
2339 /**
2340  * Validate GRE item.
2341  *
2342  * @param[in] item
2343  *   Item specification.
2344  * @param[in] item_flags
2345  *   Bit flags to mark detected items.
2346  * @param[in] target_protocol
2347  *   The next protocol in the previous item.
2348  * @param[out] error
2349  *   Pointer to error structure.
2350  *
2351  * @return
2352  *   0 on success, a negative errno value otherwise and rte_errno is set.
2353  */
2354 int
2355 mlx5_flow_validate_item_gre(const struct rte_flow_item *item,
2356                             uint64_t item_flags,
2357                             uint8_t target_protocol,
2358                             struct rte_flow_error *error)
2359 {
2360         const struct rte_flow_item_gre *spec __rte_unused = item->spec;
2361         const struct rte_flow_item_gre *mask = item->mask;
2362         int ret;
2363         const struct rte_flow_item_gre nic_mask = {
2364                 .c_rsvd0_ver = RTE_BE16(0xB000),
2365                 .protocol = RTE_BE16(UINT16_MAX),
2366         };
2367
2368         if (target_protocol != 0xff && target_protocol != IPPROTO_GRE)
2369                 return rte_flow_error_set(error, EINVAL,
2370                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2371                                           "protocol filtering not compatible"
2372                                           " with this GRE layer");
2373         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2374                 return rte_flow_error_set(error, ENOTSUP,
2375                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2376                                           "multiple tunnel layers not"
2377                                           " supported");
2378         if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3))
2379                 return rte_flow_error_set(error, ENOTSUP,
2380                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2381                                           "L3 Layer is missing");
2382         if (!mask)
2383                 mask = &rte_flow_item_gre_mask;
2384         ret = mlx5_flow_item_acceptable
2385                 (item, (const uint8_t *)mask,
2386                  (const uint8_t *)&nic_mask,
2387                  sizeof(struct rte_flow_item_gre), MLX5_ITEM_RANGE_NOT_ACCEPTED,
2388                  error);
2389         if (ret < 0)
2390                 return ret;
2391 #ifndef HAVE_MLX5DV_DR
2392 #ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT
2393         if (spec && (spec->protocol & mask->protocol))
2394                 return rte_flow_error_set(error, ENOTSUP,
2395                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2396                                           "without MPLS support the"
2397                                           " specification cannot be used for"
2398                                           " filtering");
2399 #endif
2400 #endif
2401         return 0;
2402 }
2403
2404 /**
2405  * Validate Geneve item.
2406  *
2407  * @param[in] item
2408  *   Item specification.
2409  * @param[in] itemFlags
2410  *   Bit-fields that holds the items detected until now.
2411  * @param[in] enPriv
2412  *   Pointer to the private data structure.
2413  * @param[out] error
2414  *   Pointer to error structure.
2415  *
2416  * @return
2417  *   0 on success, a negative errno value otherwise and rte_errno is set.
2418  */
2419
2420 int
2421 mlx5_flow_validate_item_geneve(const struct rte_flow_item *item,
2422                                uint64_t item_flags,
2423                                struct rte_eth_dev *dev,
2424                                struct rte_flow_error *error)
2425 {
2426         struct mlx5_priv *priv = dev->data->dev_private;
2427         const struct rte_flow_item_geneve *spec = item->spec;
2428         const struct rte_flow_item_geneve *mask = item->mask;
2429         int ret;
2430         uint16_t gbhdr;
2431         uint8_t opt_len = priv->config.hca_attr.geneve_max_opt_len ?
2432                           MLX5_GENEVE_OPT_LEN_1 : MLX5_GENEVE_OPT_LEN_0;
2433         const struct rte_flow_item_geneve nic_mask = {
2434                 .ver_opt_len_o_c_rsvd0 = RTE_BE16(0x3f80),
2435                 .vni = "\xff\xff\xff",
2436                 .protocol = RTE_BE16(UINT16_MAX),
2437         };
2438
2439         if (!priv->config.hca_attr.tunnel_stateless_geneve_rx)
2440                 return rte_flow_error_set(error, ENOTSUP,
2441                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2442                                           "L3 Geneve is not enabled by device"
2443                                           " parameter and/or not configured in"
2444                                           " firmware");
2445         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2446                 return rte_flow_error_set(error, ENOTSUP,
2447                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2448                                           "multiple tunnel layers not"
2449                                           " supported");
2450         /*
2451          * Verify only UDPv4 is present as defined in
2452          * https://tools.ietf.org/html/rfc7348
2453          */
2454         if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
2455                 return rte_flow_error_set(error, EINVAL,
2456                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2457                                           "no outer UDP layer found");
2458         if (!mask)
2459                 mask = &rte_flow_item_geneve_mask;
2460         ret = mlx5_flow_item_acceptable
2461                                   (item, (const uint8_t *)mask,
2462                                    (const uint8_t *)&nic_mask,
2463                                    sizeof(struct rte_flow_item_geneve),
2464                                    MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2465         if (ret)
2466                 return ret;
2467         if (spec) {
2468                 gbhdr = rte_be_to_cpu_16(spec->ver_opt_len_o_c_rsvd0);
2469                 if (MLX5_GENEVE_VER_VAL(gbhdr) ||
2470                      MLX5_GENEVE_CRITO_VAL(gbhdr) ||
2471                      MLX5_GENEVE_RSVD_VAL(gbhdr) || spec->rsvd1)
2472                         return rte_flow_error_set(error, ENOTSUP,
2473                                                   RTE_FLOW_ERROR_TYPE_ITEM,
2474                                                   item,
2475                                                   "Geneve protocol unsupported"
2476                                                   " fields are being used");
2477                 if (MLX5_GENEVE_OPTLEN_VAL(gbhdr) > opt_len)
2478                         return rte_flow_error_set
2479                                         (error, ENOTSUP,
2480                                          RTE_FLOW_ERROR_TYPE_ITEM,
2481                                          item,
2482                                          "Unsupported Geneve options length");
2483         }
2484         if (!(item_flags & MLX5_FLOW_LAYER_OUTER))
2485                 return rte_flow_error_set
2486                                     (error, ENOTSUP,
2487                                      RTE_FLOW_ERROR_TYPE_ITEM, item,
2488                                      "Geneve tunnel must be fully defined");
2489         return 0;
2490 }
2491
2492 /**
2493  * Validate MPLS item.
2494  *
2495  * @param[in] dev
2496  *   Pointer to the rte_eth_dev structure.
2497  * @param[in] item
2498  *   Item specification.
2499  * @param[in] item_flags
2500  *   Bit-fields that holds the items detected until now.
2501  * @param[in] prev_layer
2502  *   The protocol layer indicated in previous item.
2503  * @param[out] error
2504  *   Pointer to error structure.
2505  *
2506  * @return
2507  *   0 on success, a negative errno value otherwise and rte_errno is set.
2508  */
2509 int
2510 mlx5_flow_validate_item_mpls(struct rte_eth_dev *dev __rte_unused,
2511                              const struct rte_flow_item *item __rte_unused,
2512                              uint64_t item_flags __rte_unused,
2513                              uint64_t prev_layer __rte_unused,
2514                              struct rte_flow_error *error)
2515 {
2516 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
2517         const struct rte_flow_item_mpls *mask = item->mask;
2518         struct mlx5_priv *priv = dev->data->dev_private;
2519         int ret;
2520
2521         if (!priv->config.mpls_en)
2522                 return rte_flow_error_set(error, ENOTSUP,
2523                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2524                                           "MPLS not supported or"
2525                                           " disabled in firmware"
2526                                           " configuration.");
2527         /* MPLS over IP, UDP, GRE is allowed */
2528         if (!(prev_layer & (MLX5_FLOW_LAYER_OUTER_L3 |
2529                             MLX5_FLOW_LAYER_OUTER_L4_UDP |
2530                             MLX5_FLOW_LAYER_GRE)))
2531                 return rte_flow_error_set(error, EINVAL,
2532                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2533                                           "protocol filtering not compatible"
2534                                           " with MPLS layer");
2535         /* Multi-tunnel isn't allowed but MPLS over GRE is an exception. */
2536         if ((item_flags & MLX5_FLOW_LAYER_TUNNEL) &&
2537             !(item_flags & MLX5_FLOW_LAYER_GRE))
2538                 return rte_flow_error_set(error, ENOTSUP,
2539                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2540                                           "multiple tunnel layers not"
2541                                           " supported");
2542         if (!mask)
2543                 mask = &rte_flow_item_mpls_mask;
2544         ret = mlx5_flow_item_acceptable
2545                 (item, (const uint8_t *)mask,
2546                  (const uint8_t *)&rte_flow_item_mpls_mask,
2547                  sizeof(struct rte_flow_item_mpls),
2548                  MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2549         if (ret < 0)
2550                 return ret;
2551         return 0;
2552 #else
2553         return rte_flow_error_set(error, ENOTSUP,
2554                                   RTE_FLOW_ERROR_TYPE_ITEM, item,
2555                                   "MPLS is not supported by Verbs, please"
2556                                   " update.");
2557 #endif
2558 }
2559
2560 /**
2561  * Validate NVGRE item.
2562  *
2563  * @param[in] item
2564  *   Item specification.
2565  * @param[in] item_flags
2566  *   Bit flags to mark detected items.
2567  * @param[in] target_protocol
2568  *   The next protocol in the previous item.
2569  * @param[out] error
2570  *   Pointer to error structure.
2571  *
2572  * @return
2573  *   0 on success, a negative errno value otherwise and rte_errno is set.
2574  */
2575 int
2576 mlx5_flow_validate_item_nvgre(const struct rte_flow_item *item,
2577                               uint64_t item_flags,
2578                               uint8_t target_protocol,
2579                               struct rte_flow_error *error)
2580 {
2581         const struct rte_flow_item_nvgre *mask = item->mask;
2582         int ret;
2583
2584         if (target_protocol != 0xff && target_protocol != IPPROTO_GRE)
2585                 return rte_flow_error_set(error, EINVAL,
2586                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2587                                           "protocol filtering not compatible"
2588                                           " with this GRE layer");
2589         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2590                 return rte_flow_error_set(error, ENOTSUP,
2591                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2592                                           "multiple tunnel layers not"
2593                                           " supported");
2594         if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3))
2595                 return rte_flow_error_set(error, ENOTSUP,
2596                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2597                                           "L3 Layer is missing");
2598         if (!mask)
2599                 mask = &rte_flow_item_nvgre_mask;
2600         ret = mlx5_flow_item_acceptable
2601                 (item, (const uint8_t *)mask,
2602                  (const uint8_t *)&rte_flow_item_nvgre_mask,
2603                  sizeof(struct rte_flow_item_nvgre),
2604                  MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2605         if (ret < 0)
2606                 return ret;
2607         return 0;
2608 }
2609
2610 /**
2611  * Validate eCPRI item.
2612  *
2613  * @param[in] item
2614  *   Item specification.
2615  * @param[in] item_flags
2616  *   Bit-fields that holds the items detected until now.
2617  * @param[in] last_item
2618  *   Previous validated item in the pattern items.
2619  * @param[in] ether_type
2620  *   Type in the ethernet layer header (including dot1q).
2621  * @param[in] acc_mask
2622  *   Acceptable mask, if NULL default internal default mask
2623  *   will be used to check whether item fields are supported.
2624  * @param[out] error
2625  *   Pointer to error structure.
2626  *
2627  * @return
2628  *   0 on success, a negative errno value otherwise and rte_errno is set.
2629  */
2630 int
2631 mlx5_flow_validate_item_ecpri(const struct rte_flow_item *item,
2632                               uint64_t item_flags,
2633                               uint64_t last_item,
2634                               uint16_t ether_type,
2635                               const struct rte_flow_item_ecpri *acc_mask,
2636                               struct rte_flow_error *error)
2637 {
2638         const struct rte_flow_item_ecpri *mask = item->mask;
2639         const struct rte_flow_item_ecpri nic_mask = {
2640                 .hdr = {
2641                         .common = {
2642                                 .u32 =
2643                                 RTE_BE32(((const struct rte_ecpri_common_hdr) {
2644                                         .type = 0xFF,
2645                                         }).u32),
2646                         },
2647                         .dummy[0] = 0xFFFFFFFF,
2648                 },
2649         };
2650         const uint64_t outer_l2_vlan = (MLX5_FLOW_LAYER_OUTER_L2 |
2651                                         MLX5_FLOW_LAYER_OUTER_VLAN);
2652         struct rte_flow_item_ecpri mask_lo;
2653
2654         if ((last_item & outer_l2_vlan) && ether_type &&
2655             ether_type != RTE_ETHER_TYPE_ECPRI)
2656                 return rte_flow_error_set(error, EINVAL,
2657                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2658                                           "eCPRI cannot follow L2/VLAN layer "
2659                                           "which ether type is not 0xAEFE.");
2660         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2661                 return rte_flow_error_set(error, EINVAL,
2662                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2663                                           "eCPRI with tunnel is not supported "
2664                                           "right now.");
2665         if (item_flags & MLX5_FLOW_LAYER_OUTER_L3)
2666                 return rte_flow_error_set(error, ENOTSUP,
2667                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2668                                           "multiple L3 layers not supported");
2669         else if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_TCP)
2670                 return rte_flow_error_set(error, EINVAL,
2671                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2672                                           "eCPRI cannot follow a TCP layer.");
2673         /* In specification, eCPRI could be over UDP layer. */
2674         else if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)
2675                 return rte_flow_error_set(error, EINVAL,
2676                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2677                                           "eCPRI over UDP layer is not yet "
2678                                           "supported right now.");
2679         /* Mask for type field in common header could be zero. */
2680         if (!mask)
2681                 mask = &rte_flow_item_ecpri_mask;
2682         mask_lo.hdr.common.u32 = rte_be_to_cpu_32(mask->hdr.common.u32);
2683         /* Input mask is in big-endian format. */
2684         if (mask_lo.hdr.common.type != 0 && mask_lo.hdr.common.type != 0xff)
2685                 return rte_flow_error_set(error, EINVAL,
2686                                           RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
2687                                           "partial mask is not supported "
2688                                           "for protocol");
2689         else if (mask_lo.hdr.common.type == 0 && mask->hdr.dummy[0] != 0)
2690                 return rte_flow_error_set(error, EINVAL,
2691                                           RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
2692                                           "message header mask must be after "
2693                                           "a type mask");
2694         return mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
2695                                          acc_mask ? (const uint8_t *)acc_mask
2696                                                   : (const uint8_t *)&nic_mask,
2697                                          sizeof(struct rte_flow_item_ecpri),
2698                                          MLX5_ITEM_RANGE_NOT_ACCEPTED, error);
2699 }
2700
2701 /* Allocate unique ID for the split Q/RSS subflows. */
2702 static uint32_t
2703 flow_qrss_get_id(struct rte_eth_dev *dev)
2704 {
2705         struct mlx5_priv *priv = dev->data->dev_private;
2706         uint32_t qrss_id, ret;
2707
2708         ret = mlx5_flow_id_get(priv->qrss_id_pool, &qrss_id);
2709         if (ret)
2710                 return 0;
2711         MLX5_ASSERT(qrss_id);
2712         return qrss_id;
2713 }
2714
2715 /* Free unique ID for the split Q/RSS subflows. */
2716 static void
2717 flow_qrss_free_id(struct rte_eth_dev *dev,  uint32_t qrss_id)
2718 {
2719         struct mlx5_priv *priv = dev->data->dev_private;
2720
2721         if (qrss_id)
2722                 mlx5_flow_id_release(priv->qrss_id_pool, qrss_id);
2723 }
2724
2725 /**
2726  * Release resource related QUEUE/RSS action split.
2727  *
2728  * @param dev
2729  *   Pointer to Ethernet device.
2730  * @param flow
2731  *   Flow to release id's from.
2732  */
2733 static void
2734 flow_mreg_split_qrss_release(struct rte_eth_dev *dev,
2735                              struct rte_flow *flow)
2736 {
2737         struct mlx5_priv *priv = dev->data->dev_private;
2738         uint32_t handle_idx;
2739         struct mlx5_flow_handle *dev_handle;
2740
2741         SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles,
2742                        handle_idx, dev_handle, next)
2743                 if (dev_handle->split_flow_id)
2744                         flow_qrss_free_id(dev, dev_handle->split_flow_id);
2745 }
2746
2747 static int
2748 flow_null_validate(struct rte_eth_dev *dev __rte_unused,
2749                    const struct rte_flow_attr *attr __rte_unused,
2750                    const struct rte_flow_item items[] __rte_unused,
2751                    const struct rte_flow_action actions[] __rte_unused,
2752                    bool external __rte_unused,
2753                    int hairpin __rte_unused,
2754                    struct rte_flow_error *error)
2755 {
2756         return rte_flow_error_set(error, ENOTSUP,
2757                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
2758 }
2759
2760 static struct mlx5_flow *
2761 flow_null_prepare(struct rte_eth_dev *dev __rte_unused,
2762                   const struct rte_flow_attr *attr __rte_unused,
2763                   const struct rte_flow_item items[] __rte_unused,
2764                   const struct rte_flow_action actions[] __rte_unused,
2765                   struct rte_flow_error *error)
2766 {
2767         rte_flow_error_set(error, ENOTSUP,
2768                            RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
2769         return NULL;
2770 }
2771
2772 static int
2773 flow_null_translate(struct rte_eth_dev *dev __rte_unused,
2774                     struct mlx5_flow *dev_flow __rte_unused,
2775                     const struct rte_flow_attr *attr __rte_unused,
2776                     const struct rte_flow_item items[] __rte_unused,
2777                     const struct rte_flow_action actions[] __rte_unused,
2778                     struct rte_flow_error *error)
2779 {
2780         return rte_flow_error_set(error, ENOTSUP,
2781                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
2782 }
2783
2784 static int
2785 flow_null_apply(struct rte_eth_dev *dev __rte_unused,
2786                 struct rte_flow *flow __rte_unused,
2787                 struct rte_flow_error *error)
2788 {
2789         return rte_flow_error_set(error, ENOTSUP,
2790                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
2791 }
2792
2793 static void
2794 flow_null_remove(struct rte_eth_dev *dev __rte_unused,
2795                  struct rte_flow *flow __rte_unused)
2796 {
2797 }
2798
2799 static void
2800 flow_null_destroy(struct rte_eth_dev *dev __rte_unused,
2801                   struct rte_flow *flow __rte_unused)
2802 {
2803 }
2804
2805 static int
2806 flow_null_query(struct rte_eth_dev *dev __rte_unused,
2807                 struct rte_flow *flow __rte_unused,
2808                 const struct rte_flow_action *actions __rte_unused,
2809                 void *data __rte_unused,
2810                 struct rte_flow_error *error)
2811 {
2812         return rte_flow_error_set(error, ENOTSUP,
2813                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
2814 }
2815
2816 /* Void driver to protect from null pointer reference. */
2817 const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops = {
2818         .validate = flow_null_validate,
2819         .prepare = flow_null_prepare,
2820         .translate = flow_null_translate,
2821         .apply = flow_null_apply,
2822         .remove = flow_null_remove,
2823         .destroy = flow_null_destroy,
2824         .query = flow_null_query,
2825 };
2826
2827 /**
2828  * Select flow driver type according to flow attributes and device
2829  * configuration.
2830  *
2831  * @param[in] dev
2832  *   Pointer to the dev structure.
2833  * @param[in] attr
2834  *   Pointer to the flow attributes.
2835  *
2836  * @return
2837  *   flow driver type, MLX5_FLOW_TYPE_MAX otherwise.
2838  */
2839 static enum mlx5_flow_drv_type
2840 flow_get_drv_type(struct rte_eth_dev *dev, const struct rte_flow_attr *attr)
2841 {
2842         struct mlx5_priv *priv = dev->data->dev_private;
2843         /* The OS can determine first a specific flow type (DV, VERBS) */
2844         enum mlx5_flow_drv_type type = mlx5_flow_os_get_type();
2845
2846         if (type != MLX5_FLOW_TYPE_MAX)
2847                 return type;
2848         /* If no OS specific type - continue with DV/VERBS selection */
2849         if (attr->transfer && priv->config.dv_esw_en)
2850                 type = MLX5_FLOW_TYPE_DV;
2851         if (!attr->transfer)
2852                 type = priv->config.dv_flow_en ? MLX5_FLOW_TYPE_DV :
2853                                                  MLX5_FLOW_TYPE_VERBS;
2854         return type;
2855 }
2856
2857 #define flow_get_drv_ops(type) flow_drv_ops[type]
2858
2859 /**
2860  * Flow driver validation API. This abstracts calling driver specific functions.
2861  * The type of flow driver is determined according to flow attributes.
2862  *
2863  * @param[in] dev
2864  *   Pointer to the dev structure.
2865  * @param[in] attr
2866  *   Pointer to the flow attributes.
2867  * @param[in] items
2868  *   Pointer to the list of items.
2869  * @param[in] actions
2870  *   Pointer to the list of actions.
2871  * @param[in] external
2872  *   This flow rule is created by request external to PMD.
2873  * @param[in] hairpin
2874  *   Number of hairpin TX actions, 0 means classic flow.
2875  * @param[out] error
2876  *   Pointer to the error structure.
2877  *
2878  * @return
2879  *   0 on success, a negative errno value otherwise and rte_errno is set.
2880  */
2881 static inline int
2882 flow_drv_validate(struct rte_eth_dev *dev,
2883                   const struct rte_flow_attr *attr,
2884                   const struct rte_flow_item items[],
2885                   const struct rte_flow_action actions[],
2886                   bool external, int hairpin, struct rte_flow_error *error)
2887 {
2888         const struct mlx5_flow_driver_ops *fops;
2889         enum mlx5_flow_drv_type type = flow_get_drv_type(dev, attr);
2890
2891         fops = flow_get_drv_ops(type);
2892         return fops->validate(dev, attr, items, actions, external,
2893                               hairpin, error);
2894 }
2895
2896 /**
2897  * Flow driver preparation API. This abstracts calling driver specific
2898  * functions. Parent flow (rte_flow) should have driver type (drv_type). It
2899  * calculates the size of memory required for device flow, allocates the memory,
2900  * initializes the device flow and returns the pointer.
2901  *
2902  * @note
2903  *   This function initializes device flow structure such as dv or verbs in
2904  *   struct mlx5_flow. However, it is caller's responsibility to initialize the
2905  *   rest. For example, adding returning device flow to flow->dev_flow list and
2906  *   setting backward reference to the flow should be done out of this function.
2907  *   layers field is not filled either.
2908  *
2909  * @param[in] dev
2910  *   Pointer to the dev structure.
2911  * @param[in] attr
2912  *   Pointer to the flow attributes.
2913  * @param[in] items
2914  *   Pointer to the list of items.
2915  * @param[in] actions
2916  *   Pointer to the list of actions.
2917  * @param[in] flow_idx
2918  *   This memory pool index to the flow.
2919  * @param[out] error
2920  *   Pointer to the error structure.
2921  *
2922  * @return
2923  *   Pointer to device flow on success, otherwise NULL and rte_errno is set.
2924  */
2925 static inline struct mlx5_flow *
2926 flow_drv_prepare(struct rte_eth_dev *dev,
2927                  const struct rte_flow *flow,
2928                  const struct rte_flow_attr *attr,
2929                  const struct rte_flow_item items[],
2930                  const struct rte_flow_action actions[],
2931                  uint32_t flow_idx,
2932                  struct rte_flow_error *error)
2933 {
2934         const struct mlx5_flow_driver_ops *fops;
2935         enum mlx5_flow_drv_type type = flow->drv_type;
2936         struct mlx5_flow *mlx5_flow = NULL;
2937
2938         MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
2939         fops = flow_get_drv_ops(type);
2940         mlx5_flow = fops->prepare(dev, attr, items, actions, error);
2941         if (mlx5_flow)
2942                 mlx5_flow->flow_idx = flow_idx;
2943         return mlx5_flow;
2944 }
2945
2946 /**
2947  * Flow driver translation API. This abstracts calling driver specific
2948  * functions. Parent flow (rte_flow) should have driver type (drv_type). It
2949  * translates a generic flow into a driver flow. flow_drv_prepare() must
2950  * precede.
2951  *
2952  * @note
2953  *   dev_flow->layers could be filled as a result of parsing during translation
2954  *   if needed by flow_drv_apply(). dev_flow->flow->actions can also be filled
2955  *   if necessary. As a flow can have multiple dev_flows by RSS flow expansion,
2956  *   flow->actions could be overwritten even though all the expanded dev_flows
2957  *   have the same actions.
2958  *
2959  * @param[in] dev
2960  *   Pointer to the rte dev structure.
2961  * @param[in, out] dev_flow
2962  *   Pointer to the mlx5 flow.
2963  * @param[in] attr
2964  *   Pointer to the flow attributes.
2965  * @param[in] items
2966  *   Pointer to the list of items.
2967  * @param[in] actions
2968  *   Pointer to the list of actions.
2969  * @param[out] error
2970  *   Pointer to the error structure.
2971  *
2972  * @return
2973  *   0 on success, a negative errno value otherwise and rte_errno is set.
2974  */
2975 static inline int
2976 flow_drv_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
2977                    const struct rte_flow_attr *attr,
2978                    const struct rte_flow_item items[],
2979                    const struct rte_flow_action actions[],
2980                    struct rte_flow_error *error)
2981 {
2982         const struct mlx5_flow_driver_ops *fops;
2983         enum mlx5_flow_drv_type type = dev_flow->flow->drv_type;
2984
2985         MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
2986         fops = flow_get_drv_ops(type);
2987         return fops->translate(dev, dev_flow, attr, items, actions, error);
2988 }
2989
2990 /**
2991  * Flow driver apply API. This abstracts calling driver specific functions.
2992  * Parent flow (rte_flow) should have driver type (drv_type). It applies
2993  * translated driver flows on to device. flow_drv_translate() must precede.
2994  *
2995  * @param[in] dev
2996  *   Pointer to Ethernet device structure.
2997  * @param[in, out] flow
2998  *   Pointer to flow structure.
2999  * @param[out] error
3000  *   Pointer to error structure.
3001  *
3002  * @return
3003  *   0 on success, a negative errno value otherwise and rte_errno is set.
3004  */
3005 static inline int
3006 flow_drv_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
3007                struct rte_flow_error *error)
3008 {
3009         const struct mlx5_flow_driver_ops *fops;
3010         enum mlx5_flow_drv_type type = flow->drv_type;
3011
3012         MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
3013         fops = flow_get_drv_ops(type);
3014         return fops->apply(dev, flow, error);
3015 }
3016
3017 /**
3018  * Flow driver remove API. This abstracts calling driver specific functions.
3019  * Parent flow (rte_flow) should have driver type (drv_type). It removes a flow
3020  * on device. All the resources of the flow should be freed by calling
3021  * flow_drv_destroy().
3022  *
3023  * @param[in] dev
3024  *   Pointer to Ethernet device.
3025  * @param[in, out] flow
3026  *   Pointer to flow structure.
3027  */
3028 static inline void
3029 flow_drv_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
3030 {
3031         const struct mlx5_flow_driver_ops *fops;
3032         enum mlx5_flow_drv_type type = flow->drv_type;
3033
3034         MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
3035         fops = flow_get_drv_ops(type);
3036         fops->remove(dev, flow);
3037 }
3038
3039 /**
3040  * Flow driver destroy API. This abstracts calling driver specific functions.
3041  * Parent flow (rte_flow) should have driver type (drv_type). It removes a flow
3042  * on device and releases resources of the flow.
3043  *
3044  * @param[in] dev
3045  *   Pointer to Ethernet device.
3046  * @param[in, out] flow
3047  *   Pointer to flow structure.
3048  */
3049 static inline void
3050 flow_drv_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
3051 {
3052         const struct mlx5_flow_driver_ops *fops;
3053         enum mlx5_flow_drv_type type = flow->drv_type;
3054
3055         flow_mreg_split_qrss_release(dev, flow);
3056         MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
3057         fops = flow_get_drv_ops(type);
3058         fops->destroy(dev, flow);
3059 }
3060
3061 /**
3062  * Get RSS action from the action list.
3063  *
3064  * @param[in] actions
3065  *   Pointer to the list of actions.
3066  *
3067  * @return
3068  *   Pointer to the RSS action if exist, else return NULL.
3069  */
3070 static const struct rte_flow_action_rss*
3071 flow_get_rss_action(const struct rte_flow_action actions[])
3072 {
3073         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
3074                 switch (actions->type) {
3075                 case RTE_FLOW_ACTION_TYPE_RSS:
3076                         return (const struct rte_flow_action_rss *)
3077                                actions->conf;
3078                 default:
3079                         break;
3080                 }
3081         }
3082         return NULL;
3083 }
3084
3085 static unsigned int
3086 find_graph_root(const struct rte_flow_item pattern[], uint32_t rss_level)
3087 {
3088         const struct rte_flow_item *item;
3089         unsigned int has_vlan = 0;
3090
3091         for (item = pattern; item->type != RTE_FLOW_ITEM_TYPE_END; item++) {
3092                 if (item->type == RTE_FLOW_ITEM_TYPE_VLAN) {
3093                         has_vlan = 1;
3094                         break;
3095                 }
3096         }
3097         if (has_vlan)
3098                 return rss_level < 2 ? MLX5_EXPANSION_ROOT_ETH_VLAN :
3099                                        MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN;
3100         return rss_level < 2 ? MLX5_EXPANSION_ROOT :
3101                                MLX5_EXPANSION_ROOT_OUTER;
3102 }
3103
3104 /**
3105  *  Get layer flags from the prefix flow.
3106  *
3107  *  Some flows may be split to several subflows, the prefix subflow gets the
3108  *  match items and the suffix sub flow gets the actions.
3109  *  Some actions need the user defined match item flags to get the detail for
3110  *  the action.
3111  *  This function helps the suffix flow to get the item layer flags from prefix
3112  *  subflow.
3113  *
3114  * @param[in] dev_flow
3115  *   Pointer the created preifx subflow.
3116  *
3117  * @return
3118  *   The layers get from prefix subflow.
3119  */
3120 static inline uint64_t
3121 flow_get_prefix_layer_flags(struct mlx5_flow *dev_flow)
3122 {
3123         uint64_t layers = 0;
3124
3125         /*
3126          * Layers bits could be localization, but usually the compiler will
3127          * help to do the optimization work for source code.
3128          * If no decap actions, use the layers directly.
3129          */
3130         if (!(dev_flow->act_flags & MLX5_FLOW_ACTION_DECAP))
3131                 return dev_flow->handle->layers;
3132         /* Convert L3 layers with decap action. */
3133         if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L3_IPV4)
3134                 layers |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
3135         else if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L3_IPV6)
3136                 layers |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
3137         /* Convert L4 layers with decap action.  */
3138         if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L4_TCP)
3139                 layers |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
3140         else if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L4_UDP)
3141                 layers |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
3142         return layers;
3143 }
3144
3145 /**
3146  * Get metadata split action information.
3147  *
3148  * @param[in] actions
3149  *   Pointer to the list of actions.
3150  * @param[out] qrss
3151  *   Pointer to the return pointer.
3152  * @param[out] qrss_type
3153  *   Pointer to the action type to return. RTE_FLOW_ACTION_TYPE_END is returned
3154  *   if no QUEUE/RSS is found.
3155  * @param[out] encap_idx
3156  *   Pointer to the index of the encap action if exists, otherwise the last
3157  *   action index.
3158  *
3159  * @return
3160  *   Total number of actions.
3161  */
3162 static int
3163 flow_parse_metadata_split_actions_info(const struct rte_flow_action actions[],
3164                                        const struct rte_flow_action **qrss,
3165                                        int *encap_idx)
3166 {
3167         const struct rte_flow_action_raw_encap *raw_encap;
3168         int actions_n = 0;
3169         int raw_decap_idx = -1;
3170
3171         *encap_idx = -1;
3172         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
3173                 switch (actions->type) {
3174                 case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
3175                 case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
3176                         *encap_idx = actions_n;
3177                         break;
3178                 case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
3179                         raw_decap_idx = actions_n;
3180                         break;
3181                 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
3182                         raw_encap = actions->conf;
3183                         if (raw_encap->size > MLX5_ENCAPSULATION_DECISION_SIZE)
3184                                 *encap_idx = raw_decap_idx != -1 ?
3185                                                       raw_decap_idx : actions_n;
3186                         break;
3187                 case RTE_FLOW_ACTION_TYPE_QUEUE:
3188                 case RTE_FLOW_ACTION_TYPE_RSS:
3189                         *qrss = actions;
3190                         break;
3191                 default:
3192                         break;
3193                 }
3194                 actions_n++;
3195         }
3196         if (*encap_idx == -1)
3197                 *encap_idx = actions_n;
3198         /* Count RTE_FLOW_ACTION_TYPE_END. */
3199         return actions_n + 1;
3200 }
3201
3202 /**
3203  * Check meter action from the action list.
3204  *
3205  * @param[in] actions
3206  *   Pointer to the list of actions.
3207  * @param[out] mtr
3208  *   Pointer to the meter exist flag.
3209  *
3210  * @return
3211  *   Total number of actions.
3212  */
3213 static int
3214 flow_check_meter_action(const struct rte_flow_action actions[], uint32_t *mtr)
3215 {
3216         int actions_n = 0;
3217
3218         MLX5_ASSERT(mtr);
3219         *mtr = 0;
3220         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
3221                 switch (actions->type) {
3222                 case RTE_FLOW_ACTION_TYPE_METER:
3223                         *mtr = 1;
3224                         break;
3225                 default:
3226                         break;
3227                 }
3228                 actions_n++;
3229         }
3230         /* Count RTE_FLOW_ACTION_TYPE_END. */
3231         return actions_n + 1;
3232 }
3233
3234 /**
3235  * Check if the flow should be split due to hairpin.
3236  * The reason for the split is that in current HW we can't
3237  * support encap and push-vlan on Rx, so if a flow contains
3238  * these actions we move it to Tx.
3239  *
3240  * @param dev
3241  *   Pointer to Ethernet device.
3242  * @param[in] attr
3243  *   Flow rule attributes.
3244  * @param[in] actions
3245  *   Associated actions (list terminated by the END action).
3246  *
3247  * @return
3248  *   > 0 the number of actions and the flow should be split,
3249  *   0 when no split required.
3250  */
3251 static int
3252 flow_check_hairpin_split(struct rte_eth_dev *dev,
3253                          const struct rte_flow_attr *attr,
3254                          const struct rte_flow_action actions[])
3255 {
3256         int queue_action = 0;
3257         int action_n = 0;
3258         int split = 0;
3259         const struct rte_flow_action_queue *queue;
3260         const struct rte_flow_action_rss *rss;
3261         const struct rte_flow_action_raw_encap *raw_encap;
3262
3263         if (!attr->ingress)
3264                 return 0;
3265         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
3266                 switch (actions->type) {
3267                 case RTE_FLOW_ACTION_TYPE_QUEUE:
3268                         queue = actions->conf;
3269                         if (queue == NULL)
3270                                 return 0;
3271                         if (mlx5_rxq_get_type(dev, queue->index) !=
3272                             MLX5_RXQ_TYPE_HAIRPIN)
3273                                 return 0;
3274                         queue_action = 1;
3275                         action_n++;
3276                         break;
3277                 case RTE_FLOW_ACTION_TYPE_RSS:
3278                         rss = actions->conf;
3279                         if (rss == NULL || rss->queue_num == 0)
3280                                 return 0;
3281                         if (mlx5_rxq_get_type(dev, rss->queue[0]) !=
3282                             MLX5_RXQ_TYPE_HAIRPIN)
3283                                 return 0;
3284                         queue_action = 1;
3285                         action_n++;
3286                         break;
3287                 case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
3288                 case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
3289                 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
3290                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
3291                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
3292                         split++;
3293                         action_n++;
3294                         break;
3295                 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
3296                         raw_encap = actions->conf;
3297                         if (raw_encap->size >
3298                             (sizeof(struct rte_flow_item_eth) +
3299                              sizeof(struct rte_flow_item_ipv4)))
3300                                 split++;
3301                         action_n++;
3302                         break;
3303                 default:
3304                         action_n++;
3305                         break;
3306                 }
3307         }
3308         if (split && queue_action)
3309                 return action_n;
3310         return 0;
3311 }
3312
3313 /* Declare flow create/destroy prototype in advance. */
3314 static uint32_t
3315 flow_list_create(struct rte_eth_dev *dev, uint32_t *list,
3316                  const struct rte_flow_attr *attr,
3317                  const struct rte_flow_item items[],
3318                  const struct rte_flow_action actions[],
3319                  bool external, struct rte_flow_error *error);
3320
3321 static void
3322 flow_list_destroy(struct rte_eth_dev *dev, uint32_t *list,
3323                   uint32_t flow_idx);
3324
3325 /**
3326  * Add a flow of copying flow metadata registers in RX_CP_TBL.
3327  *
3328  * As mark_id is unique, if there's already a registered flow for the mark_id,
3329  * return by increasing the reference counter of the resource. Otherwise, create
3330  * the resource (mcp_res) and flow.
3331  *
3332  * Flow looks like,
3333  *   - If ingress port is ANY and reg_c[1] is mark_id,
3334  *     flow_tag := mark_id, reg_b := reg_c[0] and jump to RX_ACT_TBL.
3335  *
3336  * For default flow (zero mark_id), flow is like,
3337  *   - If ingress port is ANY,
3338  *     reg_b := reg_c[0] and jump to RX_ACT_TBL.
3339  *
3340  * @param dev
3341  *   Pointer to Ethernet device.
3342  * @param mark_id
3343  *   ID of MARK action, zero means default flow for META.
3344  * @param[out] error
3345  *   Perform verbose error reporting if not NULL.
3346  *
3347  * @return
3348  *   Associated resource on success, NULL otherwise and rte_errno is set.
3349  */
3350 static struct mlx5_flow_mreg_copy_resource *
3351 flow_mreg_add_copy_action(struct rte_eth_dev *dev, uint32_t mark_id,
3352                           struct rte_flow_error *error)
3353 {
3354         struct mlx5_priv *priv = dev->data->dev_private;
3355         struct rte_flow_attr attr = {
3356                 .group = MLX5_FLOW_MREG_CP_TABLE_GROUP,
3357                 .ingress = 1,
3358         };
3359         struct mlx5_rte_flow_item_tag tag_spec = {
3360                 .data = mark_id,
3361         };
3362         struct rte_flow_item items[] = {
3363                 [1] = { .type = RTE_FLOW_ITEM_TYPE_END, },
3364         };
3365         struct rte_flow_action_mark ftag = {
3366                 .id = mark_id,
3367         };
3368         struct mlx5_flow_action_copy_mreg cp_mreg = {
3369                 .dst = REG_B,
3370                 .src = REG_NON,
3371         };
3372         struct rte_flow_action_jump jump = {
3373                 .group = MLX5_FLOW_MREG_ACT_TABLE_GROUP,
3374         };
3375         struct rte_flow_action actions[] = {
3376                 [3] = { .type = RTE_FLOW_ACTION_TYPE_END, },
3377         };
3378         struct mlx5_flow_mreg_copy_resource *mcp_res;
3379         uint32_t idx = 0;
3380         int ret;
3381
3382         /* Fill the register fileds in the flow. */
3383         ret = mlx5_flow_get_reg_id(dev, MLX5_FLOW_MARK, 0, error);
3384         if (ret < 0)
3385                 return NULL;
3386         tag_spec.id = ret;
3387         ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_RX, 0, error);
3388         if (ret < 0)
3389                 return NULL;
3390         cp_mreg.src = ret;
3391         /* Check if already registered. */
3392         MLX5_ASSERT(priv->mreg_cp_tbl);
3393         mcp_res = (void *)mlx5_hlist_lookup(priv->mreg_cp_tbl, mark_id);
3394         if (mcp_res) {
3395                 /* For non-default rule. */
3396                 if (mark_id != MLX5_DEFAULT_COPY_ID)
3397                         mcp_res->refcnt++;
3398                 MLX5_ASSERT(mark_id != MLX5_DEFAULT_COPY_ID ||
3399                             mcp_res->refcnt == 1);
3400                 return mcp_res;
3401         }
3402         /* Provide the full width of FLAG specific value. */
3403         if (mark_id == (priv->sh->dv_regc0_mask & MLX5_FLOW_MARK_DEFAULT))
3404                 tag_spec.data = MLX5_FLOW_MARK_DEFAULT;
3405         /* Build a new flow. */
3406         if (mark_id != MLX5_DEFAULT_COPY_ID) {
3407                 items[0] = (struct rte_flow_item){
3408                         .type = (enum rte_flow_item_type)
3409                                 MLX5_RTE_FLOW_ITEM_TYPE_TAG,
3410                         .spec = &tag_spec,
3411                 };
3412                 items[1] = (struct rte_flow_item){
3413                         .type = RTE_FLOW_ITEM_TYPE_END,
3414                 };
3415                 actions[0] = (struct rte_flow_action){
3416                         .type = (enum rte_flow_action_type)
3417                                 MLX5_RTE_FLOW_ACTION_TYPE_MARK,
3418                         .conf = &ftag,
3419                 };
3420                 actions[1] = (struct rte_flow_action){
3421                         .type = (enum rte_flow_action_type)
3422                                 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
3423                         .conf = &cp_mreg,
3424                 };
3425                 actions[2] = (struct rte_flow_action){
3426                         .type = RTE_FLOW_ACTION_TYPE_JUMP,
3427                         .conf = &jump,
3428                 };
3429                 actions[3] = (struct rte_flow_action){
3430                         .type = RTE_FLOW_ACTION_TYPE_END,
3431                 };
3432         } else {
3433                 /* Default rule, wildcard match. */
3434                 attr.priority = MLX5_FLOW_PRIO_RSVD;
3435                 items[0] = (struct rte_flow_item){
3436                         .type = RTE_FLOW_ITEM_TYPE_END,
3437                 };
3438                 actions[0] = (struct rte_flow_action){
3439                         .type = (enum rte_flow_action_type)
3440                                 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
3441                         .conf = &cp_mreg,
3442                 };
3443                 actions[1] = (struct rte_flow_action){
3444                         .type = RTE_FLOW_ACTION_TYPE_JUMP,
3445                         .conf = &jump,
3446                 };
3447                 actions[2] = (struct rte_flow_action){
3448                         .type = RTE_FLOW_ACTION_TYPE_END,
3449                 };
3450         }
3451         /* Build a new entry. */
3452         mcp_res = mlx5_ipool_zmalloc(priv->sh->ipool[MLX5_IPOOL_MCP], &idx);
3453         if (!mcp_res) {
3454                 rte_errno = ENOMEM;
3455                 return NULL;
3456         }
3457         mcp_res->idx = idx;
3458         /*
3459          * The copy Flows are not included in any list. There
3460          * ones are referenced from other Flows and can not
3461          * be applied, removed, deleted in ardbitrary order
3462          * by list traversing.
3463          */
3464         mcp_res->rix_flow = flow_list_create(dev, NULL, &attr, items,
3465                                          actions, false, error);
3466         if (!mcp_res->rix_flow)
3467                 goto error;
3468         mcp_res->refcnt++;
3469         mcp_res->hlist_ent.key = mark_id;
3470         ret = mlx5_hlist_insert(priv->mreg_cp_tbl,
3471                                 &mcp_res->hlist_ent);
3472         MLX5_ASSERT(!ret);
3473         if (ret)
3474                 goto error;
3475         return mcp_res;
3476 error:
3477         if (mcp_res->rix_flow)
3478                 flow_list_destroy(dev, NULL, mcp_res->rix_flow);
3479         mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MCP], mcp_res->idx);
3480         return NULL;
3481 }
3482
3483 /**
3484  * Release flow in RX_CP_TBL.
3485  *
3486  * @param dev
3487  *   Pointer to Ethernet device.
3488  * @flow
3489  *   Parent flow for wich copying is provided.
3490  */
3491 static void
3492 flow_mreg_del_copy_action(struct rte_eth_dev *dev,
3493                           struct rte_flow *flow)
3494 {
3495         struct mlx5_flow_mreg_copy_resource *mcp_res;
3496         struct mlx5_priv *priv = dev->data->dev_private;
3497
3498         if (!flow->rix_mreg_copy)
3499                 return;
3500         mcp_res = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_MCP],
3501                                  flow->rix_mreg_copy);
3502         if (!mcp_res || !priv->mreg_cp_tbl)
3503                 return;
3504         if (flow->copy_applied) {
3505                 MLX5_ASSERT(mcp_res->appcnt);
3506                 flow->copy_applied = 0;
3507                 --mcp_res->appcnt;
3508                 if (!mcp_res->appcnt) {
3509                         struct rte_flow *mcp_flow = mlx5_ipool_get
3510                                         (priv->sh->ipool[MLX5_IPOOL_RTE_FLOW],
3511                                         mcp_res->rix_flow);
3512
3513                         if (mcp_flow)
3514                                 flow_drv_remove(dev, mcp_flow);
3515                 }
3516         }
3517         /*
3518          * We do not check availability of metadata registers here,
3519          * because copy resources are not allocated in this case.
3520          */
3521         if (--mcp_res->refcnt)
3522                 return;
3523         MLX5_ASSERT(mcp_res->rix_flow);
3524         flow_list_destroy(dev, NULL, mcp_res->rix_flow);
3525         mlx5_hlist_remove(priv->mreg_cp_tbl, &mcp_res->hlist_ent);
3526         mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MCP], mcp_res->idx);
3527         flow->rix_mreg_copy = 0;
3528 }
3529
3530 /**
3531  * Start flow in RX_CP_TBL.
3532  *
3533  * @param dev
3534  *   Pointer to Ethernet device.
3535  * @flow
3536  *   Parent flow for wich copying is provided.
3537  *
3538  * @return
3539  *   0 on success, a negative errno value otherwise and rte_errno is set.
3540  */
3541 static int
3542 flow_mreg_start_copy_action(struct rte_eth_dev *dev,
3543                             struct rte_flow *flow)
3544 {
3545         struct mlx5_flow_mreg_copy_resource *mcp_res;
3546         struct mlx5_priv *priv = dev->data->dev_private;
3547         int ret;
3548
3549         if (!flow->rix_mreg_copy || flow->copy_applied)
3550                 return 0;
3551         mcp_res = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_MCP],
3552                                  flow->rix_mreg_copy);
3553         if (!mcp_res)
3554                 return 0;
3555         if (!mcp_res->appcnt) {
3556                 struct rte_flow *mcp_flow = mlx5_ipool_get
3557                                 (priv->sh->ipool[MLX5_IPOOL_RTE_FLOW],
3558                                 mcp_res->rix_flow);
3559
3560                 if (mcp_flow) {
3561                         ret = flow_drv_apply(dev, mcp_flow, NULL);
3562                         if (ret)
3563                                 return ret;
3564                 }
3565         }
3566         ++mcp_res->appcnt;
3567         flow->copy_applied = 1;
3568         return 0;
3569 }
3570
3571 /**
3572  * Stop flow in RX_CP_TBL.
3573  *
3574  * @param dev
3575  *   Pointer to Ethernet device.
3576  * @flow
3577  *   Parent flow for wich copying is provided.
3578  */
3579 static void
3580 flow_mreg_stop_copy_action(struct rte_eth_dev *dev,
3581                            struct rte_flow *flow)
3582 {
3583         struct mlx5_flow_mreg_copy_resource *mcp_res;
3584         struct mlx5_priv *priv = dev->data->dev_private;
3585
3586         if (!flow->rix_mreg_copy || !flow->copy_applied)
3587                 return;
3588         mcp_res = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_MCP],
3589                                  flow->rix_mreg_copy);
3590         if (!mcp_res)
3591                 return;
3592         MLX5_ASSERT(mcp_res->appcnt);
3593         --mcp_res->appcnt;
3594         flow->copy_applied = 0;
3595         if (!mcp_res->appcnt) {
3596                 struct rte_flow *mcp_flow = mlx5_ipool_get
3597                                 (priv->sh->ipool[MLX5_IPOOL_RTE_FLOW],
3598                                 mcp_res->rix_flow);
3599
3600                 if (mcp_flow)
3601                         flow_drv_remove(dev, mcp_flow);
3602         }
3603 }
3604
3605 /**
3606  * Remove the default copy action from RX_CP_TBL.
3607  *
3608  * @param dev
3609  *   Pointer to Ethernet device.
3610  */
3611 static void
3612 flow_mreg_del_default_copy_action(struct rte_eth_dev *dev)
3613 {
3614         struct mlx5_flow_mreg_copy_resource *mcp_res;
3615         struct mlx5_priv *priv = dev->data->dev_private;
3616
3617         /* Check if default flow is registered. */
3618         if (!priv->mreg_cp_tbl)
3619                 return;
3620         mcp_res = (void *)mlx5_hlist_lookup(priv->mreg_cp_tbl,
3621                                             MLX5_DEFAULT_COPY_ID);
3622         if (!mcp_res)
3623                 return;
3624         MLX5_ASSERT(mcp_res->rix_flow);
3625         flow_list_destroy(dev, NULL, mcp_res->rix_flow);
3626         mlx5_hlist_remove(priv->mreg_cp_tbl, &mcp_res->hlist_ent);
3627         mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MCP], mcp_res->idx);
3628 }
3629
3630 /**
3631  * Add the default copy action in in RX_CP_TBL.
3632  *
3633  * @param dev
3634  *   Pointer to Ethernet device.
3635  * @param[out] error
3636  *   Perform verbose error reporting if not NULL.
3637  *
3638  * @return
3639  *   0 for success, negative value otherwise and rte_errno is set.
3640  */
3641 static int
3642 flow_mreg_add_default_copy_action(struct rte_eth_dev *dev,
3643                                   struct rte_flow_error *error)
3644 {
3645         struct mlx5_priv *priv = dev->data->dev_private;
3646         struct mlx5_flow_mreg_copy_resource *mcp_res;
3647
3648         /* Check whether extensive metadata feature is engaged. */
3649         if (!priv->config.dv_flow_en ||
3650             priv->config.dv_xmeta_en == MLX5_XMETA_MODE_LEGACY ||
3651             !mlx5_flow_ext_mreg_supported(dev) ||
3652             !priv->sh->dv_regc0_mask)
3653                 return 0;
3654         mcp_res = flow_mreg_add_copy_action(dev, MLX5_DEFAULT_COPY_ID, error);
3655         if (!mcp_res)
3656                 return -rte_errno;
3657         return 0;
3658 }
3659
3660 /**
3661  * Add a flow of copying flow metadata registers in RX_CP_TBL.
3662  *
3663  * All the flow having Q/RSS action should be split by
3664  * flow_mreg_split_qrss_prep() to pass by RX_CP_TBL. A flow in the RX_CP_TBL
3665  * performs the following,
3666  *   - CQE->flow_tag := reg_c[1] (MARK)
3667  *   - CQE->flow_table_metadata (reg_b) := reg_c[0] (META)
3668  * As CQE's flow_tag is not a register, it can't be simply copied from reg_c[1]
3669  * but there should be a flow per each MARK ID set by MARK action.
3670  *
3671  * For the aforementioned reason, if there's a MARK action in flow's action
3672  * list, a corresponding flow should be added to the RX_CP_TBL in order to copy
3673  * the MARK ID to CQE's flow_tag like,
3674  *   - If reg_c[1] is mark_id,
3675  *     flow_tag := mark_id, reg_b := reg_c[0] and jump to RX_ACT_TBL.
3676  *
3677  * For SET_META action which stores value in reg_c[0], as the destination is
3678  * also a flow metadata register (reg_b), adding a default flow is enough. Zero
3679  * MARK ID means the default flow. The default flow looks like,
3680  *   - For all flow, reg_b := reg_c[0] and jump to RX_ACT_TBL.
3681  *
3682  * @param dev
3683  *   Pointer to Ethernet device.
3684  * @param flow
3685  *   Pointer to flow structure.
3686  * @param[in] actions
3687  *   Pointer to the list of actions.
3688  * @param[out] error
3689  *   Perform verbose error reporting if not NULL.
3690  *
3691  * @return
3692  *   0 on success, negative value otherwise and rte_errno is set.
3693  */
3694 static int
3695 flow_mreg_update_copy_table(struct rte_eth_dev *dev,
3696                             struct rte_flow *flow,
3697                             const struct rte_flow_action *actions,
3698                             struct rte_flow_error *error)
3699 {
3700         struct mlx5_priv *priv = dev->data->dev_private;
3701         struct mlx5_dev_config *config = &priv->config;
3702         struct mlx5_flow_mreg_copy_resource *mcp_res;
3703         const struct rte_flow_action_mark *mark;
3704
3705         /* Check whether extensive metadata feature is engaged. */
3706         if (!config->dv_flow_en ||
3707             config->dv_xmeta_en == MLX5_XMETA_MODE_LEGACY ||
3708             !mlx5_flow_ext_mreg_supported(dev) ||
3709             !priv->sh->dv_regc0_mask)
3710                 return 0;
3711         /* Find MARK action. */
3712         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
3713                 switch (actions->type) {
3714                 case RTE_FLOW_ACTION_TYPE_FLAG:
3715                         mcp_res = flow_mreg_add_copy_action
3716                                 (dev, MLX5_FLOW_MARK_DEFAULT, error);
3717                         if (!mcp_res)
3718                                 return -rte_errno;
3719                         flow->rix_mreg_copy = mcp_res->idx;
3720                         if (dev->data->dev_started) {
3721                                 mcp_res->appcnt++;
3722                                 flow->copy_applied = 1;
3723                         }
3724                         return 0;
3725                 case RTE_FLOW_ACTION_TYPE_MARK:
3726                         mark = (const struct rte_flow_action_mark *)
3727                                 actions->conf;
3728                         mcp_res =
3729                                 flow_mreg_add_copy_action(dev, mark->id, error);
3730                         if (!mcp_res)
3731                                 return -rte_errno;
3732                         flow->rix_mreg_copy = mcp_res->idx;
3733                         if (dev->data->dev_started) {
3734                                 mcp_res->appcnt++;
3735                                 flow->copy_applied = 1;
3736                         }
3737                         return 0;
3738                 default:
3739                         break;
3740                 }
3741         }
3742         return 0;
3743 }
3744
3745 #define MLX5_MAX_SPLIT_ACTIONS 24
3746 #define MLX5_MAX_SPLIT_ITEMS 24
3747
3748 /**
3749  * Split the hairpin flow.
3750  * Since HW can't support encap and push-vlan on Rx, we move these
3751  * actions to Tx.
3752  * If the count action is after the encap then we also
3753  * move the count action. in this case the count will also measure
3754  * the outer bytes.
3755  *
3756  * @param dev
3757  *   Pointer to Ethernet device.
3758  * @param[in] actions
3759  *   Associated actions (list terminated by the END action).
3760  * @param[out] actions_rx
3761  *   Rx flow actions.
3762  * @param[out] actions_tx
3763  *   Tx flow actions..
3764  * @param[out] pattern_tx
3765  *   The pattern items for the Tx flow.
3766  * @param[out] flow_id
3767  *   The flow ID connected to this flow.
3768  *
3769  * @return
3770  *   0 on success.
3771  */
3772 static int
3773 flow_hairpin_split(struct rte_eth_dev *dev,
3774                    const struct rte_flow_action actions[],
3775                    struct rte_flow_action actions_rx[],
3776                    struct rte_flow_action actions_tx[],
3777                    struct rte_flow_item pattern_tx[],
3778                    uint32_t *flow_id)
3779 {
3780         struct mlx5_priv *priv = dev->data->dev_private;
3781         const struct rte_flow_action_raw_encap *raw_encap;
3782         const struct rte_flow_action_raw_decap *raw_decap;
3783         struct mlx5_rte_flow_action_set_tag *set_tag;
3784         struct rte_flow_action *tag_action;
3785         struct mlx5_rte_flow_item_tag *tag_item;
3786         struct rte_flow_item *item;
3787         char *addr;
3788         int encap = 0;
3789
3790         mlx5_flow_id_get(priv->sh->flow_id_pool, flow_id);
3791         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
3792                 switch (actions->type) {
3793                 case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
3794                 case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
3795                 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
3796                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
3797                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_PCP:
3798                         rte_memcpy(actions_tx, actions,
3799                                sizeof(struct rte_flow_action));
3800                         actions_tx++;
3801                         break;
3802                 case RTE_FLOW_ACTION_TYPE_COUNT:
3803                         if (encap) {
3804                                 rte_memcpy(actions_tx, actions,
3805                                            sizeof(struct rte_flow_action));
3806                                 actions_tx++;
3807                         } else {
3808                                 rte_memcpy(actions_rx, actions,
3809                                            sizeof(struct rte_flow_action));
3810                                 actions_rx++;
3811                         }
3812                         break;
3813                 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
3814                         raw_encap = actions->conf;
3815                         if (raw_encap->size >
3816                             (sizeof(struct rte_flow_item_eth) +
3817                              sizeof(struct rte_flow_item_ipv4))) {
3818                                 memcpy(actions_tx, actions,
3819                                        sizeof(struct rte_flow_action));
3820                                 actions_tx++;
3821                                 encap = 1;
3822                         } else {
3823                                 rte_memcpy(actions_rx, actions,
3824                                            sizeof(struct rte_flow_action));
3825                                 actions_rx++;
3826                         }
3827                         break;
3828                 case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
3829                         raw_decap = actions->conf;
3830                         if (raw_decap->size <
3831                             (sizeof(struct rte_flow_item_eth) +
3832                              sizeof(struct rte_flow_item_ipv4))) {
3833                                 memcpy(actions_tx, actions,
3834                                        sizeof(struct rte_flow_action));
3835                                 actions_tx++;
3836                         } else {
3837                                 rte_memcpy(actions_rx, actions,
3838                                            sizeof(struct rte_flow_action));
3839                                 actions_rx++;
3840                         }
3841                         break;
3842                 default:
3843                         rte_memcpy(actions_rx, actions,
3844                                    sizeof(struct rte_flow_action));
3845                         actions_rx++;
3846                         break;
3847                 }
3848         }
3849         /* Add set meta action and end action for the Rx flow. */
3850         tag_action = actions_rx;
3851         tag_action->type = (enum rte_flow_action_type)
3852                            MLX5_RTE_FLOW_ACTION_TYPE_TAG;
3853         actions_rx++;
3854         rte_memcpy(actions_rx, actions, sizeof(struct rte_flow_action));
3855         actions_rx++;
3856         set_tag = (void *)actions_rx;
3857         set_tag->id = mlx5_flow_get_reg_id(dev, MLX5_HAIRPIN_RX, 0, NULL);
3858         MLX5_ASSERT(set_tag->id > REG_NON);
3859         set_tag->data = *flow_id;
3860         tag_action->conf = set_tag;
3861         /* Create Tx item list. */
3862         rte_memcpy(actions_tx, actions, sizeof(struct rte_flow_action));
3863         addr = (void *)&pattern_tx[2];
3864         item = pattern_tx;
3865         item->type = (enum rte_flow_item_type)
3866                      MLX5_RTE_FLOW_ITEM_TYPE_TAG;
3867         tag_item = (void *)addr;
3868         tag_item->data = *flow_id;
3869         tag_item->id = mlx5_flow_get_reg_id(dev, MLX5_HAIRPIN_TX, 0, NULL);
3870         MLX5_ASSERT(set_tag->id > REG_NON);
3871         item->spec = tag_item;
3872         addr += sizeof(struct mlx5_rte_flow_item_tag);
3873         tag_item = (void *)addr;
3874         tag_item->data = UINT32_MAX;
3875         tag_item->id = UINT16_MAX;
3876         item->mask = tag_item;
3877         item->last = NULL;
3878         item++;
3879         item->type = RTE_FLOW_ITEM_TYPE_END;
3880         return 0;
3881 }
3882
3883 /**
3884  * The last stage of splitting chain, just creates the subflow
3885  * without any modification.
3886  *
3887  * @param[in] dev
3888  *   Pointer to Ethernet device.
3889  * @param[in] flow
3890  *   Parent flow structure pointer.
3891  * @param[in, out] sub_flow
3892  *   Pointer to return the created subflow, may be NULL.
3893  * @param[in] prefix_layers
3894  *   Prefix subflow layers, may be 0.
3895  * @param[in] prefix_mark
3896  *   Prefix subflow mark flag, may be 0.
3897  * @param[in] attr
3898  *   Flow rule attributes.
3899  * @param[in] items
3900  *   Pattern specification (list terminated by the END pattern item).
3901  * @param[in] actions
3902  *   Associated actions (list terminated by the END action).
3903  * @param[in] external
3904  *   This flow rule is created by request external to PMD.
3905  * @param[in] flow_idx
3906  *   This memory pool index to the flow.
3907  * @param[out] error
3908  *   Perform verbose error reporting if not NULL.
3909  * @return
3910  *   0 on success, negative value otherwise
3911  */
3912 static int
3913 flow_create_split_inner(struct rte_eth_dev *dev,
3914                         struct rte_flow *flow,
3915                         struct mlx5_flow **sub_flow,
3916                         uint64_t prefix_layers,
3917                         uint32_t prefix_mark,
3918                         const struct rte_flow_attr *attr,
3919                         const struct rte_flow_item items[],
3920                         const struct rte_flow_action actions[],
3921                         bool external, uint32_t flow_idx,
3922                         struct rte_flow_error *error)
3923 {
3924         struct mlx5_flow *dev_flow;
3925
3926         dev_flow = flow_drv_prepare(dev, flow, attr, items, actions,
3927                 flow_idx, error);
3928         if (!dev_flow)
3929                 return -rte_errno;
3930         dev_flow->flow = flow;
3931         dev_flow->external = external;
3932         /* Subflow object was created, we must include one in the list. */
3933         SILIST_INSERT(&flow->dev_handles, dev_flow->handle_idx,
3934                       dev_flow->handle, next);
3935         /*
3936          * If dev_flow is as one of the suffix flow, some actions in suffix
3937          * flow may need some user defined item layer flags, and pass the
3938          * Metadate rxq mark flag to suffix flow as well.
3939          */
3940         if (prefix_layers)
3941                 dev_flow->handle->layers = prefix_layers;
3942         if (prefix_mark)
3943                 dev_flow->handle->mark = 1;
3944         if (sub_flow)
3945                 *sub_flow = dev_flow;
3946         return flow_drv_translate(dev, dev_flow, attr, items, actions, error);
3947 }
3948
3949 /**
3950  * Split the meter flow.
3951  *
3952  * As meter flow will split to three sub flow, other than meter
3953  * action, the other actions make sense to only meter accepts
3954  * the packet. If it need to be dropped, no other additional
3955  * actions should be take.
3956  *
3957  * One kind of special action which decapsulates the L3 tunnel
3958  * header will be in the prefix sub flow, as not to take the
3959  * L3 tunnel header into account.
3960  *
3961  * @param dev
3962  *   Pointer to Ethernet device.
3963  * @param[in] items
3964  *   Pattern specification (list terminated by the END pattern item).
3965  * @param[out] sfx_items
3966  *   Suffix flow match items (list terminated by the END pattern item).
3967  * @param[in] actions
3968  *   Associated actions (list terminated by the END action).
3969  * @param[out] actions_sfx
3970  *   Suffix flow actions.
3971  * @param[out] actions_pre
3972  *   Prefix flow actions.
3973  * @param[out] pattern_sfx
3974  *   The pattern items for the suffix flow.
3975  * @param[out] tag_sfx
3976  *   Pointer to suffix flow tag.
3977  *
3978  * @return
3979  *   0 on success.
3980  */
3981 static int
3982 flow_meter_split_prep(struct rte_eth_dev *dev,
3983                  const struct rte_flow_item items[],
3984                  struct rte_flow_item sfx_items[],
3985                  const struct rte_flow_action actions[],
3986                  struct rte_flow_action actions_sfx[],
3987                  struct rte_flow_action actions_pre[])
3988 {
3989         struct rte_flow_action *tag_action = NULL;
3990         struct rte_flow_item *tag_item;
3991         struct mlx5_rte_flow_action_set_tag *set_tag;
3992         struct rte_flow_error error;
3993         const struct rte_flow_action_raw_encap *raw_encap;
3994         const struct rte_flow_action_raw_decap *raw_decap;
3995         struct mlx5_rte_flow_item_tag *tag_spec;
3996         struct mlx5_rte_flow_item_tag *tag_mask;
3997         uint32_t tag_id;
3998         bool copy_vlan = false;
3999
4000         /* Prepare the actions for prefix and suffix flow. */
4001         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
4002                 struct rte_flow_action **action_cur = NULL;
4003
4004                 switch (actions->type) {
4005                 case RTE_FLOW_ACTION_TYPE_METER:
4006                         /* Add the extra tag action first. */
4007                         tag_action = actions_pre;
4008                         tag_action->type = (enum rte_flow_action_type)
4009                                            MLX5_RTE_FLOW_ACTION_TYPE_TAG;
4010                         actions_pre++;
4011                         action_cur = &actions_pre;
4012                         break;
4013                 case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP:
4014                 case RTE_FLOW_ACTION_TYPE_NVGRE_DECAP:
4015                         action_cur = &actions_pre;
4016                         break;
4017                 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
4018                         raw_encap = actions->conf;
4019                         if (raw_encap->size < MLX5_ENCAPSULATION_DECISION_SIZE)
4020                                 action_cur = &actions_pre;
4021                         break;
4022                 case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
4023                         raw_decap = actions->conf;
4024                         if (raw_decap->size > MLX5_ENCAPSULATION_DECISION_SIZE)
4025                                 action_cur = &actions_pre;
4026                         break;
4027                 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
4028                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
4029                         copy_vlan = true;
4030                         break;
4031                 default:
4032                         break;
4033                 }
4034                 if (!action_cur)
4035                         action_cur = &actions_sfx;
4036                 memcpy(*action_cur, actions, sizeof(struct rte_flow_action));
4037                 (*action_cur)++;
4038         }
4039         /* Add end action to the actions. */
4040         actions_sfx->type = RTE_FLOW_ACTION_TYPE_END;
4041         actions_pre->type = RTE_FLOW_ACTION_TYPE_END;
4042         actions_pre++;
4043         /* Set the tag. */
4044         set_tag = (void *)actions_pre;
4045         set_tag->id = mlx5_flow_get_reg_id(dev, MLX5_MTR_SFX, 0, &error);
4046         /*
4047          * Get the id from the qrss_pool to make qrss share the id with meter.
4048          */
4049         tag_id = flow_qrss_get_id(dev);
4050         set_tag->data = tag_id << MLX5_MTR_COLOR_BITS;
4051         assert(tag_action);
4052         tag_action->conf = set_tag;
4053         /* Prepare the suffix subflow items. */
4054         tag_item = sfx_items++;
4055         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
4056                 int item_type = items->type;
4057
4058                 switch (item_type) {
4059                 case RTE_FLOW_ITEM_TYPE_PORT_ID:
4060                         memcpy(sfx_items, items, sizeof(*sfx_items));
4061                         sfx_items++;
4062                         break;
4063                 case RTE_FLOW_ITEM_TYPE_VLAN:
4064                         if (copy_vlan) {
4065                                 memcpy(sfx_items, items, sizeof(*sfx_items));
4066                                 /*
4067                                  * Convert to internal match item, it is used
4068                                  * for vlan push and set vid.
4069                                  */
4070                                 sfx_items->type = (enum rte_flow_item_type)
4071                                                   MLX5_RTE_FLOW_ITEM_TYPE_VLAN;
4072                                 sfx_items++;
4073                         }
4074                         break;
4075                 default:
4076                         break;
4077                 }
4078         }
4079         sfx_items->type = RTE_FLOW_ITEM_TYPE_END;
4080         sfx_items++;
4081         tag_spec = (struct mlx5_rte_flow_item_tag *)sfx_items;
4082         tag_spec->data = tag_id << MLX5_MTR_COLOR_BITS;
4083         tag_spec->id = mlx5_flow_get_reg_id(dev, MLX5_MTR_SFX, 0, &error);
4084         tag_mask = tag_spec + 1;
4085         tag_mask->data = 0xffffff00;
4086         tag_item->type = (enum rte_flow_item_type)
4087                          MLX5_RTE_FLOW_ITEM_TYPE_TAG;
4088         tag_item->spec = tag_spec;
4089         tag_item->last = NULL;
4090         tag_item->mask = tag_mask;
4091         return tag_id;
4092 }
4093
4094 /**
4095  * Split action list having QUEUE/RSS for metadata register copy.
4096  *
4097  * Once Q/RSS action is detected in user's action list, the flow action
4098  * should be split in order to copy metadata registers, which will happen in
4099  * RX_CP_TBL like,
4100  *   - CQE->flow_tag := reg_c[1] (MARK)
4101  *   - CQE->flow_table_metadata (reg_b) := reg_c[0] (META)
4102  * The Q/RSS action will be performed on RX_ACT_TBL after passing by RX_CP_TBL.
4103  * This is because the last action of each flow must be a terminal action
4104  * (QUEUE, RSS or DROP).
4105  *
4106  * Flow ID must be allocated to identify actions in the RX_ACT_TBL and it is
4107  * stored and kept in the mlx5_flow structure per each sub_flow.
4108  *
4109  * The Q/RSS action is replaced with,
4110  *   - SET_TAG, setting the allocated flow ID to reg_c[2].
4111  * And the following JUMP action is added at the end,
4112  *   - JUMP, to RX_CP_TBL.
4113  *
4114  * A flow to perform remained Q/RSS action will be created in RX_ACT_TBL by
4115  * flow_create_split_metadata() routine. The flow will look like,
4116  *   - If flow ID matches (reg_c[2]), perform Q/RSS.
4117  *
4118  * @param dev
4119  *   Pointer to Ethernet device.
4120  * @param[out] split_actions
4121  *   Pointer to store split actions to jump to CP_TBL.
4122  * @param[in] actions
4123  *   Pointer to the list of original flow actions.
4124  * @param[in] qrss
4125  *   Pointer to the Q/RSS action.
4126  * @param[in] actions_n
4127  *   Number of original actions.
4128  * @param[out] error
4129  *   Perform verbose error reporting if not NULL.
4130  *
4131  * @return
4132  *   non-zero unique flow_id on success, otherwise 0 and
4133  *   error/rte_error are set.
4134  */
4135 static uint32_t
4136 flow_mreg_split_qrss_prep(struct rte_eth_dev *dev,
4137                           struct rte_flow_action *split_actions,
4138                           const struct rte_flow_action *actions,
4139                           const struct rte_flow_action *qrss,
4140                           int actions_n, struct rte_flow_error *error)
4141 {
4142         struct mlx5_rte_flow_action_set_tag *set_tag;
4143         struct rte_flow_action_jump *jump;
4144         const int qrss_idx = qrss - actions;
4145         uint32_t flow_id = 0;
4146         int ret = 0;
4147
4148         /*
4149          * Given actions will be split
4150          * - Replace QUEUE/RSS action with SET_TAG to set flow ID.
4151          * - Add jump to mreg CP_TBL.
4152          * As a result, there will be one more action.
4153          */
4154         ++actions_n;
4155         memcpy(split_actions, actions, sizeof(*split_actions) * actions_n);
4156         set_tag = (void *)(split_actions + actions_n);
4157         /*
4158          * If tag action is not set to void(it means we are not the meter
4159          * suffix flow), add the tag action. Since meter suffix flow already
4160          * has the tag added.
4161          */
4162         if (split_actions[qrss_idx].type != RTE_FLOW_ACTION_TYPE_VOID) {
4163                 /*
4164                  * Allocate the new subflow ID. This one is unique within
4165                  * device and not shared with representors. Otherwise,
4166                  * we would have to resolve multi-thread access synch
4167                  * issue. Each flow on the shared device is appended
4168                  * with source vport identifier, so the resulting
4169                  * flows will be unique in the shared (by master and
4170                  * representors) domain even if they have coinciding
4171                  * IDs.
4172                  */
4173                 flow_id = flow_qrss_get_id(dev);
4174                 if (!flow_id)
4175                         return rte_flow_error_set(error, ENOMEM,
4176                                                   RTE_FLOW_ERROR_TYPE_ACTION,
4177                                                   NULL, "can't allocate id "
4178                                                   "for split Q/RSS subflow");
4179                 /* Internal SET_TAG action to set flow ID. */
4180                 *set_tag = (struct mlx5_rte_flow_action_set_tag){
4181                         .data = flow_id,
4182                 };
4183                 ret = mlx5_flow_get_reg_id(dev, MLX5_COPY_MARK, 0, error);
4184                 if (ret < 0)
4185                         return ret;
4186                 set_tag->id = ret;
4187                 /* Construct new actions array. */
4188                 /* Replace QUEUE/RSS action. */
4189                 split_actions[qrss_idx] = (struct rte_flow_action){
4190                         .type = (enum rte_flow_action_type)
4191                                 MLX5_RTE_FLOW_ACTION_TYPE_TAG,
4192                         .conf = set_tag,
4193                 };
4194         }
4195         /* JUMP action to jump to mreg copy table (CP_TBL). */
4196         jump = (void *)(set_tag + 1);
4197         *jump = (struct rte_flow_action_jump){
4198                 .group = MLX5_FLOW_MREG_CP_TABLE_GROUP,
4199         };
4200         split_actions[actions_n - 2] = (struct rte_flow_action){
4201                 .type = RTE_FLOW_ACTION_TYPE_JUMP,
4202                 .conf = jump,
4203         };
4204         split_actions[actions_n - 1] = (struct rte_flow_action){
4205                 .type = RTE_FLOW_ACTION_TYPE_END,
4206         };
4207         return flow_id;
4208 }
4209
4210 /**
4211  * Extend the given action list for Tx metadata copy.
4212  *
4213  * Copy the given action list to the ext_actions and add flow metadata register
4214  * copy action in order to copy reg_a set by WQE to reg_c[0].
4215  *
4216  * @param[out] ext_actions
4217  *   Pointer to the extended action list.
4218  * @param[in] actions
4219  *   Pointer to the list of actions.
4220  * @param[in] actions_n
4221  *   Number of actions in the list.
4222  * @param[out] error
4223  *   Perform verbose error reporting if not NULL.
4224  * @param[in] encap_idx
4225  *   The encap action inndex.
4226  *
4227  * @return
4228  *   0 on success, negative value otherwise
4229  */
4230 static int
4231 flow_mreg_tx_copy_prep(struct rte_eth_dev *dev,
4232                        struct rte_flow_action *ext_actions,
4233                        const struct rte_flow_action *actions,
4234                        int actions_n, struct rte_flow_error *error,
4235                        int encap_idx)
4236 {
4237         struct mlx5_flow_action_copy_mreg *cp_mreg =
4238                 (struct mlx5_flow_action_copy_mreg *)
4239                         (ext_actions + actions_n + 1);
4240         int ret;
4241
4242         ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_RX, 0, error);
4243         if (ret < 0)
4244                 return ret;
4245         cp_mreg->dst = ret;
4246         ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_TX, 0, error);
4247         if (ret < 0)
4248                 return ret;
4249         cp_mreg->src = ret;
4250         if (encap_idx != 0)
4251                 memcpy(ext_actions, actions, sizeof(*ext_actions) * encap_idx);
4252         if (encap_idx == actions_n - 1) {
4253                 ext_actions[actions_n - 1] = (struct rte_flow_action){
4254                         .type = (enum rte_flow_action_type)
4255                                 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
4256                         .conf = cp_mreg,
4257                 };
4258                 ext_actions[actions_n] = (struct rte_flow_action){
4259                         .type = RTE_FLOW_ACTION_TYPE_END,
4260                 };
4261         } else {
4262                 ext_actions[encap_idx] = (struct rte_flow_action){
4263                         .type = (enum rte_flow_action_type)
4264                                 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
4265                         .conf = cp_mreg,
4266                 };
4267                 memcpy(ext_actions + encap_idx + 1, actions + encap_idx,
4268                                 sizeof(*ext_actions) * (actions_n - encap_idx));
4269         }
4270         return 0;
4271 }
4272
4273 /**
4274  * Check the match action from the action list.
4275  *
4276  * @param[in] actions
4277  *   Pointer to the list of actions.
4278  * @param[in] attr
4279  *   Flow rule attributes.
4280  * @param[in] action
4281  *   The action to be check if exist.
4282  * @param[out] match_action_pos
4283  *   Pointer to the position of the matched action if exists, otherwise is -1.
4284  * @param[out] qrss_action_pos
4285  *   Pointer to the position of the Queue/RSS action if exists, otherwise is -1.
4286  *
4287  * @return
4288  *   > 0 the total number of actions.
4289  *   0 if not found match action in action list.
4290  */
4291 static int
4292 flow_check_match_action(const struct rte_flow_action actions[],
4293                         const struct rte_flow_attr *attr,
4294                         enum rte_flow_action_type action,
4295                         int *match_action_pos, int *qrss_action_pos)
4296 {
4297         const struct rte_flow_action_sample *sample;
4298         int actions_n = 0;
4299         int jump_flag = 0;
4300         uint32_t ratio = 0;
4301         int sub_type = 0;
4302         int flag = 0;
4303
4304         *match_action_pos = -1;
4305         *qrss_action_pos = -1;
4306         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
4307                 if (actions->type == action) {
4308                         flag = 1;
4309                         *match_action_pos = actions_n;
4310                 }
4311                 if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE ||
4312                     actions->type == RTE_FLOW_ACTION_TYPE_RSS)
4313                         *qrss_action_pos = actions_n;
4314                 if (actions->type == RTE_FLOW_ACTION_TYPE_JUMP)
4315                         jump_flag = 1;
4316                 if (actions->type == RTE_FLOW_ACTION_TYPE_SAMPLE) {
4317                         sample = actions->conf;
4318                         ratio = sample->ratio;
4319                         sub_type = ((const struct rte_flow_action *)
4320                                         (sample->actions))->type;
4321                 }
4322                 actions_n++;
4323         }
4324         if (flag && action == RTE_FLOW_ACTION_TYPE_SAMPLE && attr->transfer) {
4325                 if (ratio == 1) {
4326                         /* JUMP Action not support for Mirroring;
4327                          * Mirroring support multi-destination;
4328                          */
4329                         if (!jump_flag && sub_type != RTE_FLOW_ACTION_TYPE_END)
4330                                 flag = 0;
4331                 }
4332         }
4333         /* Count RTE_FLOW_ACTION_TYPE_END. */
4334         return flag ? actions_n + 1 : 0;
4335 }
4336
4337 #define SAMPLE_SUFFIX_ITEM 2
4338
4339 /**
4340  * Split the sample flow.
4341  *
4342  * As sample flow will split to two sub flow, sample flow with
4343  * sample action, the other actions will move to new suffix flow.
4344  *
4345  * Also add unique tag id with tag action in the sample flow,
4346  * the same tag id will be as match in the suffix flow.
4347  *
4348  * @param dev
4349  *   Pointer to Ethernet device.
4350  * @param[in] fdb_tx
4351  *   FDB egress flow flag.
4352  * @param[out] sfx_items
4353  *   Suffix flow match items (list terminated by the END pattern item).
4354  * @param[in] actions
4355  *   Associated actions (list terminated by the END action).
4356  * @param[out] actions_sfx
4357  *   Suffix flow actions.
4358  * @param[out] actions_pre
4359  *   Prefix flow actions.
4360  * @param[in] actions_n
4361  *  The total number of actions.
4362  * @param[in] sample_action_pos
4363  *   The sample action position.
4364  * @param[in] qrss_action_pos
4365  *   The Queue/RSS action position.
4366  * @param[out] error
4367  *   Perform verbose error reporting if not NULL.
4368  *
4369  * @return
4370  *   0 on success, or unique flow_id, a negative errno value
4371  *   otherwise and rte_errno is set.
4372  */
4373 static int
4374 flow_sample_split_prep(struct rte_eth_dev *dev,
4375                        uint32_t fdb_tx,
4376                        struct rte_flow_item sfx_items[],
4377                        const struct rte_flow_action actions[],
4378                        struct rte_flow_action actions_sfx[],
4379                        struct rte_flow_action actions_pre[],
4380                        int actions_n,
4381                        int sample_action_pos,
4382                        int qrss_action_pos,
4383                        struct rte_flow_error *error)
4384 {
4385         struct mlx5_rte_flow_action_set_tag *set_tag;
4386         struct mlx5_rte_flow_item_tag *tag_spec;
4387         struct mlx5_rte_flow_item_tag *tag_mask;
4388         uint32_t tag_id = 0;
4389         int index;
4390         int ret;
4391
4392         if (sample_action_pos < 0)
4393                 return rte_flow_error_set(error, EINVAL,
4394                                           RTE_FLOW_ERROR_TYPE_ACTION,
4395                                           NULL, "invalid position of sample "
4396                                           "action in list");
4397         if (!fdb_tx) {
4398                 /* Prepare the prefix tag action. */
4399                 set_tag = (void *)(actions_pre + actions_n + 1);
4400                 ret = mlx5_flow_get_reg_id(dev, MLX5_APP_TAG, 0, error);
4401                 if (ret < 0)
4402                         return ret;
4403                 set_tag->id = ret;
4404                 tag_id = flow_qrss_get_id(dev);
4405                 set_tag->data = tag_id;
4406                 /* Prepare the suffix subflow items. */
4407                 tag_spec = (void *)(sfx_items + SAMPLE_SUFFIX_ITEM);
4408                 tag_spec->data = tag_id;
4409                 tag_spec->id = set_tag->id;
4410                 tag_mask = tag_spec + 1;
4411                 tag_mask->data = UINT32_MAX;
4412                 sfx_items[0] = (struct rte_flow_item){
4413                         .type = (enum rte_flow_item_type)
4414                                 MLX5_RTE_FLOW_ITEM_TYPE_TAG,
4415                         .spec = tag_spec,
4416                         .last = NULL,
4417                         .mask = tag_mask,
4418                 };
4419                 sfx_items[1] = (struct rte_flow_item){
4420                         .type = (enum rte_flow_item_type)
4421                                 RTE_FLOW_ITEM_TYPE_END,
4422                 };
4423         }
4424         /* Prepare the actions for prefix and suffix flow. */
4425         if (qrss_action_pos >= 0 && qrss_action_pos < sample_action_pos) {
4426                 index = qrss_action_pos;
4427                 /* Put the preceding the Queue/RSS action into prefix flow. */
4428                 if (index != 0)
4429                         memcpy(actions_pre, actions,
4430                                sizeof(struct rte_flow_action) * index);
4431                 /* Put others preceding the sample action into prefix flow. */
4432                 if (sample_action_pos > index + 1)
4433                         memcpy(actions_pre + index, actions + index + 1,
4434                                sizeof(struct rte_flow_action) *
4435                                (sample_action_pos - index - 1));
4436                 index = sample_action_pos - 1;
4437                 /* Put Queue/RSS action into Suffix flow. */
4438                 memcpy(actions_sfx, actions + qrss_action_pos,
4439                        sizeof(struct rte_flow_action));
4440                 actions_sfx++;
4441         } else {
4442                 index = sample_action_pos;
4443                 if (index != 0)
4444                         memcpy(actions_pre, actions,
4445                                sizeof(struct rte_flow_action) * index);
4446         }
4447         /* Add the extra tag action for NIC-RX and E-Switch ingress. */
4448         if (!fdb_tx) {
4449                 actions_pre[index++] =
4450                         (struct rte_flow_action){
4451                         .type = (enum rte_flow_action_type)
4452                                 MLX5_RTE_FLOW_ACTION_TYPE_TAG,
4453                         .conf = set_tag,
4454                 };
4455         }
4456         memcpy(actions_pre + index, actions + sample_action_pos,
4457                sizeof(struct rte_flow_action));
4458         index += 1;
4459         actions_pre[index] = (struct rte_flow_action){
4460                 .type = (enum rte_flow_action_type)
4461                         RTE_FLOW_ACTION_TYPE_END,
4462         };
4463         /* Put the actions after sample into Suffix flow. */
4464         memcpy(actions_sfx, actions + sample_action_pos + 1,
4465                sizeof(struct rte_flow_action) *
4466                (actions_n - sample_action_pos - 1));
4467         return tag_id;
4468 }
4469
4470 /**
4471  * The splitting for metadata feature.
4472  *
4473  * - Q/RSS action on NIC Rx should be split in order to pass by
4474  *   the mreg copy table (RX_CP_TBL) and then it jumps to the
4475  *   action table (RX_ACT_TBL) which has the split Q/RSS action.
4476  *
4477  * - All the actions on NIC Tx should have a mreg copy action to
4478  *   copy reg_a from WQE to reg_c[0].
4479  *
4480  * @param dev
4481  *   Pointer to Ethernet device.
4482  * @param[in] flow
4483  *   Parent flow structure pointer.
4484  * @param[in] prefix_layers
4485  *   Prefix flow layer flags.
4486  * @param[in] prefix_mark
4487  *   Prefix subflow mark flag, may be 0.
4488  * @param[in] attr
4489  *   Flow rule attributes.
4490  * @param[in] items
4491  *   Pattern specification (list terminated by the END pattern item).
4492  * @param[in] actions
4493  *   Associated actions (list terminated by the END action).
4494  * @param[in] external
4495  *   This flow rule is created by request external to PMD.
4496  * @param[in] flow_idx
4497  *   This memory pool index to the flow.
4498  * @param[out] error
4499  *   Perform verbose error reporting if not NULL.
4500  * @return
4501  *   0 on success, negative value otherwise
4502  */
4503 static int
4504 flow_create_split_metadata(struct rte_eth_dev *dev,
4505                            struct rte_flow *flow,
4506                            uint64_t prefix_layers,
4507                            uint32_t prefix_mark,
4508                            const struct rte_flow_attr *attr,
4509                            const struct rte_flow_item items[],
4510                            const struct rte_flow_action actions[],
4511                            bool external, uint32_t flow_idx,
4512                            struct rte_flow_error *error)
4513 {
4514         struct mlx5_priv *priv = dev->data->dev_private;
4515         struct mlx5_dev_config *config = &priv->config;
4516         const struct rte_flow_action *qrss = NULL;
4517         struct rte_flow_action *ext_actions = NULL;
4518         struct mlx5_flow *dev_flow = NULL;
4519         uint32_t qrss_id = 0;
4520         int mtr_sfx = 0;
4521         size_t act_size;
4522         int actions_n;
4523         int encap_idx;
4524         int ret;
4525
4526         /* Check whether extensive metadata feature is engaged. */
4527         if (!config->dv_flow_en ||
4528             config->dv_xmeta_en == MLX5_XMETA_MODE_LEGACY ||
4529             !mlx5_flow_ext_mreg_supported(dev))
4530                 return flow_create_split_inner(dev, flow, NULL, prefix_layers,
4531                                                prefix_mark, attr, items,
4532                                                actions, external, flow_idx,
4533                                                error);
4534         actions_n = flow_parse_metadata_split_actions_info(actions, &qrss,
4535                                                            &encap_idx);
4536         if (qrss) {
4537                 /* Exclude hairpin flows from splitting. */
4538                 if (qrss->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
4539                         const struct rte_flow_action_queue *queue;
4540
4541                         queue = qrss->conf;
4542                         if (mlx5_rxq_get_type(dev, queue->index) ==
4543                             MLX5_RXQ_TYPE_HAIRPIN)
4544                                 qrss = NULL;
4545                 } else if (qrss->type == RTE_FLOW_ACTION_TYPE_RSS) {
4546                         const struct rte_flow_action_rss *rss;
4547
4548                         rss = qrss->conf;
4549                         if (mlx5_rxq_get_type(dev, rss->queue[0]) ==
4550                             MLX5_RXQ_TYPE_HAIRPIN)
4551                                 qrss = NULL;
4552                 }
4553         }
4554         if (qrss) {
4555                 /* Check if it is in meter suffix table. */
4556                 mtr_sfx = attr->group == (attr->transfer ?
4557                           (MLX5_FLOW_TABLE_LEVEL_SUFFIX - 1) :
4558                           MLX5_FLOW_TABLE_LEVEL_SUFFIX);
4559                 /*
4560                  * Q/RSS action on NIC Rx should be split in order to pass by
4561                  * the mreg copy table (RX_CP_TBL) and then it jumps to the
4562                  * action table (RX_ACT_TBL) which has the split Q/RSS action.
4563                  */
4564                 act_size = sizeof(struct rte_flow_action) * (actions_n + 1) +
4565                            sizeof(struct rte_flow_action_set_tag) +
4566                            sizeof(struct rte_flow_action_jump);
4567                 ext_actions = mlx5_malloc(MLX5_MEM_ZERO, act_size, 0,
4568                                           SOCKET_ID_ANY);
4569                 if (!ext_actions)
4570                         return rte_flow_error_set(error, ENOMEM,
4571                                                   RTE_FLOW_ERROR_TYPE_ACTION,
4572                                                   NULL, "no memory to split "
4573                                                   "metadata flow");
4574                 /*
4575                  * If we are the suffix flow of meter, tag already exist.
4576                  * Set the tag action to void.
4577                  */
4578                 if (mtr_sfx)
4579                         ext_actions[qrss - actions].type =
4580                                                 RTE_FLOW_ACTION_TYPE_VOID;
4581                 else
4582                         ext_actions[qrss - actions].type =
4583                                                 (enum rte_flow_action_type)
4584                                                 MLX5_RTE_FLOW_ACTION_TYPE_TAG;
4585                 /*
4586                  * Create the new actions list with removed Q/RSS action
4587                  * and appended set tag and jump to register copy table
4588                  * (RX_CP_TBL). We should preallocate unique tag ID here
4589                  * in advance, because it is needed for set tag action.
4590                  */
4591                 qrss_id = flow_mreg_split_qrss_prep(dev, ext_actions, actions,
4592                                                     qrss, actions_n, error);
4593                 if (!mtr_sfx && !qrss_id) {
4594                         ret = -rte_errno;
4595                         goto exit;
4596                 }
4597         } else if (attr->egress && !attr->transfer) {
4598                 /*
4599                  * All the actions on NIC Tx should have a metadata register
4600                  * copy action to copy reg_a from WQE to reg_c[meta]
4601                  */
4602                 act_size = sizeof(struct rte_flow_action) * (actions_n + 1) +
4603                            sizeof(struct mlx5_flow_action_copy_mreg);
4604                 ext_actions = mlx5_malloc(MLX5_MEM_ZERO, act_size, 0,
4605                                           SOCKET_ID_ANY);
4606                 if (!ext_actions)
4607                         return rte_flow_error_set(error, ENOMEM,
4608                                                   RTE_FLOW_ERROR_TYPE_ACTION,
4609                                                   NULL, "no memory to split "
4610                                                   "metadata flow");
4611                 /* Create the action list appended with copy register. */
4612                 ret = flow_mreg_tx_copy_prep(dev, ext_actions, actions,
4613                                              actions_n, error, encap_idx);
4614                 if (ret < 0)
4615                         goto exit;
4616         }
4617         /* Add the unmodified original or prefix subflow. */
4618         ret = flow_create_split_inner(dev, flow, &dev_flow, prefix_layers,
4619                                       prefix_mark, attr,
4620                                       items, ext_actions ? ext_actions :
4621                                       actions, external, flow_idx, error);
4622         if (ret < 0)
4623                 goto exit;
4624         MLX5_ASSERT(dev_flow);
4625         if (qrss) {
4626                 const struct rte_flow_attr q_attr = {
4627                         .group = MLX5_FLOW_MREG_ACT_TABLE_GROUP,
4628                         .ingress = 1,
4629                 };
4630                 /* Internal PMD action to set register. */
4631                 struct mlx5_rte_flow_item_tag q_tag_spec = {
4632                         .data = qrss_id,
4633                         .id = REG_NON,
4634                 };
4635                 struct rte_flow_item q_items[] = {
4636                         {
4637                                 .type = (enum rte_flow_item_type)
4638                                         MLX5_RTE_FLOW_ITEM_TYPE_TAG,
4639                                 .spec = &q_tag_spec,
4640                                 .last = NULL,
4641                                 .mask = NULL,
4642                         },
4643                         {
4644                                 .type = RTE_FLOW_ITEM_TYPE_END,
4645                         },
4646                 };
4647                 struct rte_flow_action q_actions[] = {
4648                         {
4649                                 .type = qrss->type,
4650                                 .conf = qrss->conf,
4651                         },
4652                         {
4653                                 .type = RTE_FLOW_ACTION_TYPE_END,
4654                         },
4655                 };
4656                 uint64_t layers = flow_get_prefix_layer_flags(dev_flow);
4657
4658                 /*
4659                  * Configure the tag item only if there is no meter subflow.
4660                  * Since tag is already marked in the meter suffix subflow
4661                  * we can just use the meter suffix items as is.
4662                  */
4663                 if (qrss_id) {
4664                         /* Not meter subflow. */
4665                         MLX5_ASSERT(!mtr_sfx);
4666                         /*
4667                          * Put unique id in prefix flow due to it is destroyed
4668                          * after suffix flow and id will be freed after there
4669                          * is no actual flows with this id and identifier
4670                          * reallocation becomes possible (for example, for
4671                          * other flows in other threads).
4672                          */
4673                         dev_flow->handle->split_flow_id = qrss_id;
4674                         ret = mlx5_flow_get_reg_id(dev, MLX5_COPY_MARK, 0,
4675                                                    error);
4676                         if (ret < 0)
4677                                 goto exit;
4678                         q_tag_spec.id = ret;
4679                 }
4680                 dev_flow = NULL;
4681                 /* Add suffix subflow to execute Q/RSS. */
4682                 ret = flow_create_split_inner(dev, flow, &dev_flow, layers, 0,
4683                                               &q_attr, mtr_sfx ? items :
4684                                               q_items, q_actions,
4685                                               external, flow_idx, error);
4686                 if (ret < 0)
4687                         goto exit;
4688                 /* qrss ID should be freed if failed. */
4689                 qrss_id = 0;
4690                 MLX5_ASSERT(dev_flow);
4691         }
4692
4693 exit:
4694         /*
4695          * We do not destroy the partially created sub_flows in case of error.
4696          * These ones are included into parent flow list and will be destroyed
4697          * by flow_drv_destroy.
4698          */
4699         flow_qrss_free_id(dev, qrss_id);
4700         mlx5_free(ext_actions);
4701         return ret;
4702 }
4703
4704 /**
4705  * The splitting for meter feature.
4706  *
4707  * - The meter flow will be split to two flows as prefix and
4708  *   suffix flow. The packets make sense only it pass the prefix
4709  *   meter action.
4710  *
4711  * - Reg_C_5 is used for the packet to match betweend prefix and
4712  *   suffix flow.
4713  *
4714  * @param dev
4715  *   Pointer to Ethernet device.
4716  * @param[in] flow
4717  *   Parent flow structure pointer.
4718  * @param[in] prefix_layers
4719  *   Prefix subflow layers, may be 0.
4720  * @param[in] prefix_mark
4721  *   Prefix subflow mark flag, may be 0.
4722  * @param[in] attr
4723  *   Flow rule attributes.
4724  * @param[in] items
4725  *   Pattern specification (list terminated by the END pattern item).
4726  * @param[in] actions
4727  *   Associated actions (list terminated by the END action).
4728  * @param[in] external
4729  *   This flow rule is created by request external to PMD.
4730  * @param[in] flow_idx
4731  *   This memory pool index to the flow.
4732  * @param[out] error
4733  *   Perform verbose error reporting if not NULL.
4734  * @return
4735  *   0 on success, negative value otherwise
4736  */
4737 static int
4738 flow_create_split_meter(struct rte_eth_dev *dev,
4739                         struct rte_flow *flow,
4740                         uint64_t prefix_layers,
4741                         uint32_t prefix_mark,
4742                         const struct rte_flow_attr *attr,
4743                         const struct rte_flow_item items[],
4744                         const struct rte_flow_action actions[],
4745                         bool external, uint32_t flow_idx,
4746                         struct rte_flow_error *error)
4747 {
4748         struct mlx5_priv *priv = dev->data->dev_private;
4749         struct rte_flow_action *sfx_actions = NULL;
4750         struct rte_flow_action *pre_actions = NULL;
4751         struct rte_flow_item *sfx_items = NULL;
4752         struct mlx5_flow *dev_flow = NULL;
4753         struct rte_flow_attr sfx_attr = *attr;
4754         uint32_t mtr = 0;
4755         uint32_t mtr_tag_id = 0;
4756         size_t act_size;
4757         size_t item_size;
4758         int actions_n = 0;
4759         int ret;
4760
4761         if (priv->mtr_en)
4762                 actions_n = flow_check_meter_action(actions, &mtr);
4763         if (mtr) {
4764                 /* The five prefix actions: meter, decap, encap, tag, end. */
4765                 act_size = sizeof(struct rte_flow_action) * (actions_n + 5) +
4766                            sizeof(struct mlx5_rte_flow_action_set_tag);
4767                 /* tag, vlan, port id, end. */
4768 #define METER_SUFFIX_ITEM 4
4769                 item_size = sizeof(struct rte_flow_item) * METER_SUFFIX_ITEM +
4770                             sizeof(struct mlx5_rte_flow_item_tag) * 2;
4771                 sfx_actions = mlx5_malloc(MLX5_MEM_ZERO, (act_size + item_size),
4772                                           0, SOCKET_ID_ANY);
4773                 if (!sfx_actions)
4774                         return rte_flow_error_set(error, ENOMEM,
4775                                                   RTE_FLOW_ERROR_TYPE_ACTION,
4776                                                   NULL, "no memory to split "
4777                                                   "meter flow");
4778                 sfx_items = (struct rte_flow_item *)((char *)sfx_actions +
4779                              act_size);
4780                 pre_actions = sfx_actions + actions_n;
4781                 mtr_tag_id = flow_meter_split_prep(dev, items, sfx_items,
4782                                                    actions, sfx_actions,
4783                                                    pre_actions);
4784                 if (!mtr_tag_id) {
4785                         ret = -rte_errno;
4786                         goto exit;
4787                 }
4788                 /* Add the prefix subflow. */
4789                 ret = flow_create_split_inner(dev, flow, &dev_flow,
4790                                               prefix_layers, 0,
4791                                               attr, items,
4792                                               pre_actions, external,
4793                                               flow_idx, error);
4794                 if (ret) {
4795                         ret = -rte_errno;
4796                         goto exit;
4797                 }
4798                 dev_flow->handle->split_flow_id = mtr_tag_id;
4799                 /* Setting the sfx group atrr. */
4800                 sfx_attr.group = sfx_attr.transfer ?
4801                                 (MLX5_FLOW_TABLE_LEVEL_SUFFIX - 1) :
4802                                  MLX5_FLOW_TABLE_LEVEL_SUFFIX;
4803         }
4804         /* Add the prefix subflow. */
4805         ret = flow_create_split_metadata(dev, flow, dev_flow ?
4806                                          flow_get_prefix_layer_flags(dev_flow) :
4807                                          prefix_layers, dev_flow ?
4808                                          dev_flow->handle->mark : prefix_mark,
4809                                          &sfx_attr, sfx_items ?
4810                                          sfx_items : items,
4811                                          sfx_actions ? sfx_actions : actions,
4812                                          external, flow_idx, error);
4813 exit:
4814         if (sfx_actions)
4815                 mlx5_free(sfx_actions);
4816         return ret;
4817 }
4818
4819 /**
4820  * The splitting for sample feature.
4821  *
4822  * Once Sample action is detected in the action list, the flow actions should
4823  * be split into prefix sub flow and suffix sub flow.
4824  *
4825  * The original items remain in the prefix sub flow, all actions preceding the
4826  * sample action and the sample action itself will be copied to the prefix
4827  * sub flow, the actions following the sample action will be copied to the
4828  * suffix sub flow, Queue action always be located in the suffix sub flow.
4829  *
4830  * In order to make the packet from prefix sub flow matches with suffix sub
4831  * flow, an extra tag action be added into prefix sub flow, and the suffix sub
4832  * flow uses tag item with the unique flow id.
4833  *
4834  * @param dev
4835  *   Pointer to Ethernet device.
4836  * @param[in] flow
4837  *   Parent flow structure pointer.
4838  * @param[in] attr
4839  *   Flow rule attributes.
4840  * @param[in] items
4841  *   Pattern specification (list terminated by the END pattern item).
4842  * @param[in] actions
4843  *   Associated actions (list terminated by the END action).
4844  * @param[in] external
4845  *   This flow rule is created by request external to PMD.
4846  * @param[in] flow_idx
4847  *   This memory pool index to the flow.
4848  * @param[out] error
4849  *   Perform verbose error reporting if not NULL.
4850  * @return
4851  *   0 on success, negative value otherwise
4852  */
4853 static int
4854 flow_create_split_sample(struct rte_eth_dev *dev,
4855                          struct rte_flow *flow,
4856                          const struct rte_flow_attr *attr,
4857                          const struct rte_flow_item items[],
4858                          const struct rte_flow_action actions[],
4859                          bool external, uint32_t flow_idx,
4860                          struct rte_flow_error *error)
4861 {
4862         struct mlx5_priv *priv = dev->data->dev_private;
4863         struct rte_flow_action *sfx_actions = NULL;
4864         struct rte_flow_action *pre_actions = NULL;
4865         struct rte_flow_item *sfx_items = NULL;
4866         struct mlx5_flow *dev_flow = NULL;
4867         struct rte_flow_attr sfx_attr = *attr;
4868 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
4869         struct mlx5_flow_dv_sample_resource *sample_res;
4870         struct mlx5_flow_tbl_data_entry *sfx_tbl_data;
4871         struct mlx5_flow_tbl_resource *sfx_tbl;
4872         union mlx5_flow_tbl_key sfx_table_key;
4873 #endif
4874         size_t act_size;
4875         size_t item_size;
4876         uint32_t fdb_tx = 0;
4877         int32_t tag_id = 0;
4878         int actions_n = 0;
4879         int sample_action_pos;
4880         int qrss_action_pos;
4881         int ret = 0;
4882
4883         if (priv->sampler_en)
4884                 actions_n = flow_check_match_action(actions, attr,
4885                                         RTE_FLOW_ACTION_TYPE_SAMPLE,
4886                                         &sample_action_pos, &qrss_action_pos);
4887         if (actions_n) {
4888                 /* The prefix actions must includes sample, tag, end. */
4889                 act_size = sizeof(struct rte_flow_action) * (actions_n * 2 + 1)
4890                            + sizeof(struct mlx5_rte_flow_action_set_tag);
4891                 item_size = sizeof(struct rte_flow_item) * SAMPLE_SUFFIX_ITEM +
4892                             sizeof(struct mlx5_rte_flow_item_tag) * 2;
4893                 sfx_actions = mlx5_malloc(MLX5_MEM_ZERO, (act_size +
4894                                           item_size), 0, SOCKET_ID_ANY);
4895                 if (!sfx_actions)
4896                         return rte_flow_error_set(error, ENOMEM,
4897                                                   RTE_FLOW_ERROR_TYPE_ACTION,
4898                                                   NULL, "no memory to split "
4899                                                   "sample flow");
4900                 /* The representor_id is -1 for uplink. */
4901                 fdb_tx = (attr->transfer && priv->representor_id != -1);
4902                 if (!fdb_tx)
4903                         sfx_items = (struct rte_flow_item *)((char *)sfx_actions
4904                                         + act_size);
4905                 pre_actions = sfx_actions + actions_n;
4906                 tag_id = flow_sample_split_prep(dev, fdb_tx, sfx_items,
4907                                                 actions, sfx_actions,
4908                                                 pre_actions, actions_n,
4909                                                 sample_action_pos,
4910                                                 qrss_action_pos, error);
4911                 if (tag_id < 0 || (!fdb_tx && !tag_id)) {
4912                         ret = -rte_errno;
4913                         goto exit;
4914                 }
4915                 /* Add the prefix subflow. */
4916                 ret = flow_create_split_inner(dev, flow, &dev_flow, 0, 0, attr,
4917                                               items, pre_actions, external,
4918                                               flow_idx, error);
4919                 if (ret) {
4920                         ret = -rte_errno;
4921                         goto exit;
4922                 }
4923                 dev_flow->handle->split_flow_id = tag_id;
4924 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
4925                 /* Set the sfx group attr. */
4926                 sample_res = (struct mlx5_flow_dv_sample_resource *)
4927                                         dev_flow->dv.sample_res;
4928                 sfx_tbl = (struct mlx5_flow_tbl_resource *)
4929                                         sample_res->normal_path_tbl;
4930                 sfx_tbl_data = container_of(sfx_tbl,
4931                                         struct mlx5_flow_tbl_data_entry, tbl);
4932                 sfx_table_key.v64 = sfx_tbl_data->entry.key;
4933                 sfx_attr.group = sfx_attr.transfer ?
4934                                         (sfx_table_key.table_id - 1) :
4935                                          sfx_table_key.table_id;
4936 #endif
4937         }
4938         /* Add the suffix subflow. */
4939         ret = flow_create_split_meter(dev, flow, dev_flow ?
4940                                  flow_get_prefix_layer_flags(dev_flow) : 0,
4941                                  dev_flow ? dev_flow->handle->mark : 0,
4942                                  &sfx_attr, sfx_items ? sfx_items : items,
4943                                  sfx_actions ? sfx_actions : actions,
4944                                  external, flow_idx, error);
4945 exit:
4946         if (sfx_actions)
4947                 mlx5_free(sfx_actions);
4948         return ret;
4949 }
4950
4951 /**
4952  * Split the flow to subflow set. The splitters might be linked
4953  * in the chain, like this:
4954  * flow_create_split_outer() calls:
4955  *   flow_create_split_meter() calls:
4956  *     flow_create_split_metadata(meter_subflow_0) calls:
4957  *       flow_create_split_inner(metadata_subflow_0)
4958  *       flow_create_split_inner(metadata_subflow_1)
4959  *       flow_create_split_inner(metadata_subflow_2)
4960  *     flow_create_split_metadata(meter_subflow_1) calls:
4961  *       flow_create_split_inner(metadata_subflow_0)
4962  *       flow_create_split_inner(metadata_subflow_1)
4963  *       flow_create_split_inner(metadata_subflow_2)
4964  *
4965  * This provide flexible way to add new levels of flow splitting.
4966  * The all of successfully created subflows are included to the
4967  * parent flow dev_flow list.
4968  *
4969  * @param dev
4970  *   Pointer to Ethernet device.
4971  * @param[in] flow
4972  *   Parent flow structure pointer.
4973  * @param[in] attr
4974  *   Flow rule attributes.
4975  * @param[in] items
4976  *   Pattern specification (list terminated by the END pattern item).
4977  * @param[in] actions
4978  *   Associated actions (list terminated by the END action).
4979  * @param[in] external
4980  *   This flow rule is created by request external to PMD.
4981  * @param[in] flow_idx
4982  *   This memory pool index to the flow.
4983  * @param[out] error
4984  *   Perform verbose error reporting if not NULL.
4985  * @return
4986  *   0 on success, negative value otherwise
4987  */
4988 static int
4989 flow_create_split_outer(struct rte_eth_dev *dev,
4990                         struct rte_flow *flow,
4991                         const struct rte_flow_attr *attr,
4992                         const struct rte_flow_item items[],
4993                         const struct rte_flow_action actions[],
4994                         bool external, uint32_t flow_idx,
4995                         struct rte_flow_error *error)
4996 {
4997         int ret;
4998
4999         ret = flow_create_split_sample(dev, flow, attr, items,
5000                                        actions, external, flow_idx, error);
5001         MLX5_ASSERT(ret <= 0);
5002         return ret;
5003 }
5004
5005 /**
5006  * Create a flow and add it to @p list.
5007  *
5008  * @param dev
5009  *   Pointer to Ethernet device.
5010  * @param list
5011  *   Pointer to a TAILQ flow list. If this parameter NULL,
5012  *   no list insertion occurred, flow is just created,
5013  *   this is caller's responsibility to track the
5014  *   created flow.
5015  * @param[in] attr
5016  *   Flow rule attributes.
5017  * @param[in] items
5018  *   Pattern specification (list terminated by the END pattern item).
5019  * @param[in] actions
5020  *   Associated actions (list terminated by the END action).
5021  * @param[in] external
5022  *   This flow rule is created by request external to PMD.
5023  * @param[out] error
5024  *   Perform verbose error reporting if not NULL.
5025  *
5026  * @return
5027  *   A flow index on success, 0 otherwise and rte_errno is set.
5028  */
5029 static uint32_t
5030 flow_list_create(struct rte_eth_dev *dev, uint32_t *list,
5031                  const struct rte_flow_attr *attr,
5032                  const struct rte_flow_item items[],
5033                  const struct rte_flow_action actions[],
5034                  bool external, struct rte_flow_error *error)
5035 {
5036         struct mlx5_priv *priv = dev->data->dev_private;
5037         struct rte_flow *flow = NULL;
5038         struct mlx5_flow *dev_flow;
5039         const struct rte_flow_action_rss *rss;
5040         union {
5041                 struct mlx5_flow_expand_rss buf;
5042                 uint8_t buffer[2048];
5043         } expand_buffer;
5044         union {
5045                 struct rte_flow_action actions[MLX5_MAX_SPLIT_ACTIONS];
5046                 uint8_t buffer[2048];
5047         } actions_rx;
5048         union {
5049                 struct rte_flow_action actions[MLX5_MAX_SPLIT_ACTIONS];
5050                 uint8_t buffer[2048];
5051         } actions_hairpin_tx;
5052         union {
5053                 struct rte_flow_item items[MLX5_MAX_SPLIT_ITEMS];
5054                 uint8_t buffer[2048];
5055         } items_tx;
5056         struct mlx5_flow_expand_rss *buf = &expand_buffer.buf;
5057         struct mlx5_flow_rss_desc *rss_desc = &((struct mlx5_flow_rss_desc *)
5058                                               priv->rss_desc)[!!priv->flow_idx];
5059         const struct rte_flow_action *p_actions_rx = actions;
5060         uint32_t i;
5061         uint32_t idx = 0;
5062         int hairpin_flow;
5063         uint32_t hairpin_id = 0;
5064         struct rte_flow_attr attr_tx = { .priority = 0 };
5065         struct rte_flow_attr attr_factor = {0};
5066         int ret;
5067
5068         memcpy((void *)&attr_factor, (const void *)attr, sizeof(*attr));
5069         if (external)
5070                 attr_factor.group *= MLX5_FLOW_TABLE_FACTOR;
5071         hairpin_flow = flow_check_hairpin_split(dev, &attr_factor, actions);
5072         ret = flow_drv_validate(dev, &attr_factor, items, p_actions_rx,
5073                                 external, hairpin_flow, error);
5074         if (ret < 0)
5075                 return 0;
5076         if (hairpin_flow > 0) {
5077                 if (hairpin_flow > MLX5_MAX_SPLIT_ACTIONS) {
5078                         rte_errno = EINVAL;
5079                         return 0;
5080                 }
5081                 flow_hairpin_split(dev, actions, actions_rx.actions,
5082                                    actions_hairpin_tx.actions, items_tx.items,
5083                                    &hairpin_id);
5084                 p_actions_rx = actions_rx.actions;
5085         }
5086         flow = mlx5_ipool_zmalloc(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], &idx);
5087         if (!flow) {
5088                 rte_errno = ENOMEM;
5089                 goto error_before_flow;
5090         }
5091         flow->drv_type = flow_get_drv_type(dev, &attr_factor);
5092         if (hairpin_id != 0)
5093                 flow->hairpin_flow_id = hairpin_id;
5094         MLX5_ASSERT(flow->drv_type > MLX5_FLOW_TYPE_MIN &&
5095                     flow->drv_type < MLX5_FLOW_TYPE_MAX);
5096         memset(rss_desc, 0, sizeof(*rss_desc));
5097         rss = flow_get_rss_action(p_actions_rx);
5098         if (rss) {
5099                 /*
5100                  * The following information is required by
5101                  * mlx5_flow_hashfields_adjust() in advance.
5102                  */
5103                 rss_desc->level = rss->level;
5104                 /* RSS type 0 indicates default RSS type (ETH_RSS_IP). */
5105                 rss_desc->types = !rss->types ? ETH_RSS_IP : rss->types;
5106         }
5107         flow->dev_handles = 0;
5108         if (rss && rss->types) {
5109                 unsigned int graph_root;
5110
5111                 graph_root = find_graph_root(items, rss->level);
5112                 ret = mlx5_flow_expand_rss(buf, sizeof(expand_buffer.buffer),
5113                                            items, rss->types,
5114                                            mlx5_support_expansion, graph_root);
5115                 MLX5_ASSERT(ret > 0 &&
5116                        (unsigned int)ret < sizeof(expand_buffer.buffer));
5117         } else {
5118                 buf->entries = 1;
5119                 buf->entry[0].pattern = (void *)(uintptr_t)items;
5120         }
5121         /*
5122          * Record the start index when there is a nested call. All sub-flows
5123          * need to be translated before another calling.
5124          * No need to use ping-pong buffer to save memory here.
5125          */
5126         if (priv->flow_idx) {
5127                 MLX5_ASSERT(!priv->flow_nested_idx);
5128                 priv->flow_nested_idx = priv->flow_idx;
5129         }
5130         for (i = 0; i < buf->entries; ++i) {
5131                 /*
5132                  * The splitter may create multiple dev_flows,
5133                  * depending on configuration. In the simplest
5134                  * case it just creates unmodified original flow.
5135                  */
5136                 ret = flow_create_split_outer(dev, flow, &attr_factor,
5137                                               buf->entry[i].pattern,
5138                                               p_actions_rx, external, idx,
5139                                               error);
5140                 if (ret < 0)
5141                         goto error;
5142         }
5143         /* Create the tx flow. */
5144         if (hairpin_flow) {
5145                 attr_tx.group = MLX5_HAIRPIN_TX_TABLE;
5146                 attr_tx.ingress = 0;
5147                 attr_tx.egress = 1;
5148                 dev_flow = flow_drv_prepare(dev, flow, &attr_tx, items_tx.items,
5149                                          actions_hairpin_tx.actions,
5150                                          idx, error);
5151                 if (!dev_flow)
5152                         goto error;
5153                 dev_flow->flow = flow;
5154                 dev_flow->external = 0;
5155                 SILIST_INSERT(&flow->dev_handles, dev_flow->handle_idx,
5156                               dev_flow->handle, next);
5157                 ret = flow_drv_translate(dev, dev_flow, &attr_tx,
5158                                          items_tx.items,
5159                                          actions_hairpin_tx.actions, error);
5160                 if (ret < 0)
5161                         goto error;
5162         }
5163         /*
5164          * Update the metadata register copy table. If extensive
5165          * metadata feature is enabled and registers are supported
5166          * we might create the extra rte_flow for each unique
5167          * MARK/FLAG action ID.
5168          *
5169          * The table is updated for ingress Flows only, because
5170          * the egress Flows belong to the different device and
5171          * copy table should be updated in peer NIC Rx domain.
5172          */
5173         if (attr_factor.ingress &&
5174             (external || attr_factor.group != MLX5_FLOW_MREG_CP_TABLE_GROUP)) {
5175                 ret = flow_mreg_update_copy_table(dev, flow, actions, error);
5176                 if (ret)
5177                         goto error;
5178         }
5179         /*
5180          * If the flow is external (from application) OR device is started, then
5181          * the flow will be applied immediately.
5182          */
5183         if (external || dev->data->dev_started) {
5184                 ret = flow_drv_apply(dev, flow, error);
5185                 if (ret < 0)
5186                         goto error;
5187         }
5188         if (list)
5189                 ILIST_INSERT(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], list, idx,
5190                              flow, next);
5191         flow_rxq_flags_set(dev, flow);
5192         /* Nested flow creation index recovery. */
5193         priv->flow_idx = priv->flow_nested_idx;
5194         if (priv->flow_nested_idx)
5195                 priv->flow_nested_idx = 0;
5196         return idx;
5197 error:
5198         MLX5_ASSERT(flow);
5199         ret = rte_errno; /* Save rte_errno before cleanup. */
5200         flow_mreg_del_copy_action(dev, flow);
5201         flow_drv_destroy(dev, flow);
5202         mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], idx);
5203         rte_errno = ret; /* Restore rte_errno. */
5204 error_before_flow:
5205         ret = rte_errno;
5206         if (hairpin_id)
5207                 mlx5_flow_id_release(priv->sh->flow_id_pool,
5208                                      hairpin_id);
5209         rte_errno = ret;
5210         priv->flow_idx = priv->flow_nested_idx;
5211         if (priv->flow_nested_idx)
5212                 priv->flow_nested_idx = 0;
5213         return 0;
5214 }
5215
5216 /**
5217  * Create a dedicated flow rule on e-switch table 0 (root table), to direct all
5218  * incoming packets to table 1.
5219  *
5220  * Other flow rules, requested for group n, will be created in
5221  * e-switch table n+1.
5222  * Jump action to e-switch group n will be created to group n+1.
5223  *
5224  * Used when working in switchdev mode, to utilise advantages of table 1
5225  * and above.
5226  *
5227  * @param dev
5228  *   Pointer to Ethernet device.
5229  *
5230  * @return
5231  *   Pointer to flow on success, NULL otherwise and rte_errno is set.
5232  */
5233 struct rte_flow *
5234 mlx5_flow_create_esw_table_zero_flow(struct rte_eth_dev *dev)
5235 {
5236         const struct rte_flow_attr attr = {
5237                 .group = 0,
5238                 .priority = 0,
5239                 .ingress = 1,
5240                 .egress = 0,
5241                 .transfer = 1,
5242         };
5243         const struct rte_flow_item pattern = {
5244                 .type = RTE_FLOW_ITEM_TYPE_END,
5245         };
5246         struct rte_flow_action_jump jump = {
5247                 .group = 1,
5248         };
5249         const struct rte_flow_action actions[] = {
5250                 {
5251                         .type = RTE_FLOW_ACTION_TYPE_JUMP,
5252                         .conf = &jump,
5253                 },
5254                 {
5255                         .type = RTE_FLOW_ACTION_TYPE_END,
5256                 },
5257         };
5258         struct mlx5_priv *priv = dev->data->dev_private;
5259         struct rte_flow_error error;
5260
5261         return (void *)(uintptr_t)flow_list_create(dev, &priv->ctrl_flows,
5262                                                    &attr, &pattern,
5263                                                    actions, false, &error);
5264 }
5265
5266 /**
5267  * Validate a flow supported by the NIC.
5268  *
5269  * @see rte_flow_validate()
5270  * @see rte_flow_ops
5271  */
5272 int
5273 mlx5_flow_validate(struct rte_eth_dev *dev,
5274                    const struct rte_flow_attr *attr,
5275                    const struct rte_flow_item items[],
5276                    const struct rte_flow_action actions[],
5277                    struct rte_flow_error *error)
5278 {
5279         int hairpin_flow;
5280
5281         hairpin_flow = flow_check_hairpin_split(dev, attr, actions);
5282         return flow_drv_validate(dev, attr, items, actions,
5283                                 true, hairpin_flow, error);
5284 }
5285
5286 /**
5287  * Create a flow.
5288  *
5289  * @see rte_flow_create()
5290  * @see rte_flow_ops
5291  */
5292 struct rte_flow *
5293 mlx5_flow_create(struct rte_eth_dev *dev,
5294                  const struct rte_flow_attr *attr,
5295                  const struct rte_flow_item items[],
5296                  const struct rte_flow_action actions[],
5297                  struct rte_flow_error *error)
5298 {
5299         struct mlx5_priv *priv = dev->data->dev_private;
5300
5301         /*
5302          * If the device is not started yet, it is not allowed to created a
5303          * flow from application. PMD default flows and traffic control flows
5304          * are not affected.
5305          */
5306         if (unlikely(!dev->data->dev_started)) {
5307                 DRV_LOG(DEBUG, "port %u is not started when "
5308                         "inserting a flow", dev->data->port_id);
5309                 rte_flow_error_set(error, ENODEV,
5310                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
5311                                    NULL,
5312                                    "port not started");
5313                 return NULL;
5314         }
5315         return (void *)(uintptr_t)flow_list_create(dev, &priv->flows,
5316                                   attr, items, actions, true, error);
5317 }
5318
5319 /**
5320  * Destroy a flow in a list.
5321  *
5322  * @param dev
5323  *   Pointer to Ethernet device.
5324  * @param list
5325  *   Pointer to the Indexed flow list. If this parameter NULL,
5326  *   there is no flow removal from the list. Be noted that as
5327  *   flow is add to the indexed list, memory of the indexed
5328  *   list points to maybe changed as flow destroyed.
5329  * @param[in] flow_idx
5330  *   Index of flow to destroy.
5331  */
5332 static void
5333 flow_list_destroy(struct rte_eth_dev *dev, uint32_t *list,
5334                   uint32_t flow_idx)
5335 {
5336         struct mlx5_priv *priv = dev->data->dev_private;
5337         struct mlx5_fdir_flow *priv_fdir_flow = NULL;
5338         struct rte_flow *flow = mlx5_ipool_get(priv->sh->ipool
5339                                                [MLX5_IPOOL_RTE_FLOW], flow_idx);
5340
5341         if (!flow)
5342                 return;
5343         /*
5344          * Update RX queue flags only if port is started, otherwise it is
5345          * already clean.
5346          */
5347         if (dev->data->dev_started)
5348                 flow_rxq_flags_trim(dev, flow);
5349         if (flow->hairpin_flow_id)
5350                 mlx5_flow_id_release(priv->sh->flow_id_pool,
5351                                      flow->hairpin_flow_id);
5352         flow_drv_destroy(dev, flow);
5353         if (list)
5354                 ILIST_REMOVE(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], list,
5355                              flow_idx, flow, next);
5356         flow_mreg_del_copy_action(dev, flow);
5357         if (flow->fdir) {
5358                 LIST_FOREACH(priv_fdir_flow, &priv->fdir_flows, next) {
5359                         if (priv_fdir_flow->rix_flow == flow_idx)
5360                                 break;
5361                 }
5362                 if (priv_fdir_flow) {
5363                         LIST_REMOVE(priv_fdir_flow, next);
5364                         mlx5_free(priv_fdir_flow->fdir);
5365                         mlx5_free(priv_fdir_flow);
5366                 }
5367         }
5368         mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], flow_idx);
5369 }
5370
5371 /**
5372  * Destroy all flows.
5373  *
5374  * @param dev
5375  *   Pointer to Ethernet device.
5376  * @param list
5377  *   Pointer to the Indexed flow list.
5378  * @param active
5379  *   If flushing is called avtively.
5380  */
5381 void
5382 mlx5_flow_list_flush(struct rte_eth_dev *dev, uint32_t *list, bool active)
5383 {
5384         uint32_t num_flushed = 0;
5385
5386         while (*list) {
5387                 flow_list_destroy(dev, list, *list);
5388                 num_flushed++;
5389         }
5390         if (active) {
5391                 DRV_LOG(INFO, "port %u: %u flows flushed before stopping",
5392                         dev->data->port_id, num_flushed);
5393         }
5394 }
5395
5396 /**
5397  * Remove all flows.
5398  *
5399  * @param dev
5400  *   Pointer to Ethernet device.
5401  * @param list
5402  *   Pointer to the Indexed flow list.
5403  */
5404 void
5405 mlx5_flow_stop(struct rte_eth_dev *dev, uint32_t *list)
5406 {
5407         struct mlx5_priv *priv = dev->data->dev_private;
5408         struct rte_flow *flow = NULL;
5409         uint32_t idx;
5410
5411         ILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], *list, idx,
5412                       flow, next) {
5413                 flow_drv_remove(dev, flow);
5414                 flow_mreg_stop_copy_action(dev, flow);
5415         }
5416         flow_mreg_del_default_copy_action(dev);
5417         flow_rxq_flags_clear(dev);
5418 }
5419
5420 /**
5421  * Add all flows.
5422  *
5423  * @param dev
5424  *   Pointer to Ethernet device.
5425  * @param list
5426  *   Pointer to the Indexed flow list.
5427  *
5428  * @return
5429  *   0 on success, a negative errno value otherwise and rte_errno is set.
5430  */
5431 int
5432 mlx5_flow_start(struct rte_eth_dev *dev, uint32_t *list)
5433 {
5434         struct mlx5_priv *priv = dev->data->dev_private;
5435         struct rte_flow *flow = NULL;
5436         struct rte_flow_error error;
5437         uint32_t idx;
5438         int ret = 0;
5439
5440         /* Make sure default copy action (reg_c[0] -> reg_b) is created. */
5441         ret = flow_mreg_add_default_copy_action(dev, &error);
5442         if (ret < 0)
5443                 return -rte_errno;
5444         /* Apply Flows created by application. */
5445         ILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], *list, idx,
5446                       flow, next) {
5447                 ret = flow_mreg_start_copy_action(dev, flow);
5448                 if (ret < 0)
5449                         goto error;
5450                 ret = flow_drv_apply(dev, flow, &error);
5451                 if (ret < 0)
5452                         goto error;
5453                 flow_rxq_flags_set(dev, flow);
5454         }
5455         return 0;
5456 error:
5457         ret = rte_errno; /* Save rte_errno before cleanup. */
5458         mlx5_flow_stop(dev, list);
5459         rte_errno = ret; /* Restore rte_errno. */
5460         return -rte_errno;
5461 }
5462
5463 /**
5464  * Stop all default actions for flows.
5465  *
5466  * @param dev
5467  *   Pointer to Ethernet device.
5468  */
5469 void
5470 mlx5_flow_stop_default(struct rte_eth_dev *dev)
5471 {
5472         flow_mreg_del_default_copy_action(dev);
5473         flow_rxq_flags_clear(dev);
5474 }
5475
5476 /**
5477  * Start all default actions for flows.
5478  *
5479  * @param dev
5480  *   Pointer to Ethernet device.
5481  * @return
5482  *   0 on success, a negative errno value otherwise and rte_errno is set.
5483  */
5484 int
5485 mlx5_flow_start_default(struct rte_eth_dev *dev)
5486 {
5487         struct rte_flow_error error;
5488
5489         /* Make sure default copy action (reg_c[0] -> reg_b) is created. */
5490         return flow_mreg_add_default_copy_action(dev, &error);
5491 }
5492
5493 /**
5494  * Allocate intermediate resources for flow creation.
5495  *
5496  * @param dev
5497  *   Pointer to Ethernet device.
5498  */
5499 void
5500 mlx5_flow_alloc_intermediate(struct rte_eth_dev *dev)
5501 {
5502         struct mlx5_priv *priv = dev->data->dev_private;
5503
5504         if (!priv->inter_flows) {
5505                 priv->inter_flows = mlx5_malloc(MLX5_MEM_ZERO,
5506                                     MLX5_NUM_MAX_DEV_FLOWS *
5507                                     sizeof(struct mlx5_flow) +
5508                                     (sizeof(struct mlx5_flow_rss_desc) +
5509                                     sizeof(uint16_t) * UINT16_MAX) * 2, 0,
5510                                     SOCKET_ID_ANY);
5511                 if (!priv->inter_flows) {
5512                         DRV_LOG(ERR, "can't allocate intermediate memory.");
5513                         return;
5514                 }
5515         }
5516         priv->rss_desc = &((struct mlx5_flow *)priv->inter_flows)
5517                          [MLX5_NUM_MAX_DEV_FLOWS];
5518         /* Reset the index. */
5519         priv->flow_idx = 0;
5520         priv->flow_nested_idx = 0;
5521 }
5522
5523 /**
5524  * Free intermediate resources for flows.
5525  *
5526  * @param dev
5527  *   Pointer to Ethernet device.
5528  */
5529 void
5530 mlx5_flow_free_intermediate(struct rte_eth_dev *dev)
5531 {
5532         struct mlx5_priv *priv = dev->data->dev_private;
5533
5534         mlx5_free(priv->inter_flows);
5535         priv->inter_flows = NULL;
5536 }
5537
5538 /**
5539  * Verify the flow list is empty
5540  *
5541  * @param dev
5542  *  Pointer to Ethernet device.
5543  *
5544  * @return the number of flows not released.
5545  */
5546 int
5547 mlx5_flow_verify(struct rte_eth_dev *dev)
5548 {
5549         struct mlx5_priv *priv = dev->data->dev_private;
5550         struct rte_flow *flow;
5551         uint32_t idx;
5552         int ret = 0;
5553
5554         ILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], priv->flows, idx,
5555                       flow, next) {
5556                 DRV_LOG(DEBUG, "port %u flow %p still referenced",
5557                         dev->data->port_id, (void *)flow);
5558                 ++ret;
5559         }
5560         return ret;
5561 }
5562
5563 /**
5564  * Enable default hairpin egress flow.
5565  *
5566  * @param dev
5567  *   Pointer to Ethernet device.
5568  * @param queue
5569  *   The queue index.
5570  *
5571  * @return
5572  *   0 on success, a negative errno value otherwise and rte_errno is set.
5573  */
5574 int
5575 mlx5_ctrl_flow_source_queue(struct rte_eth_dev *dev,
5576                             uint32_t queue)
5577 {
5578         struct mlx5_priv *priv = dev->data->dev_private;
5579         const struct rte_flow_attr attr = {
5580                 .egress = 1,
5581                 .priority = 0,
5582         };
5583         struct mlx5_rte_flow_item_tx_queue queue_spec = {
5584                 .queue = queue,
5585         };
5586         struct mlx5_rte_flow_item_tx_queue queue_mask = {
5587                 .queue = UINT32_MAX,
5588         };
5589         struct rte_flow_item items[] = {
5590                 {
5591                         .type = (enum rte_flow_item_type)
5592                                 MLX5_RTE_FLOW_ITEM_TYPE_TX_QUEUE,
5593                         .spec = &queue_spec,
5594                         .last = NULL,
5595                         .mask = &queue_mask,
5596                 },
5597                 {
5598                         .type = RTE_FLOW_ITEM_TYPE_END,
5599                 },
5600         };
5601         struct rte_flow_action_jump jump = {
5602                 .group = MLX5_HAIRPIN_TX_TABLE,
5603         };
5604         struct rte_flow_action actions[2];
5605         uint32_t flow_idx;
5606         struct rte_flow_error error;
5607
5608         actions[0].type = RTE_FLOW_ACTION_TYPE_JUMP;
5609         actions[0].conf = &jump;
5610         actions[1].type = RTE_FLOW_ACTION_TYPE_END;
5611         flow_idx = flow_list_create(dev, &priv->ctrl_flows,
5612                                 &attr, items, actions, false, &error);
5613         if (!flow_idx) {
5614                 DRV_LOG(DEBUG,
5615                         "Failed to create ctrl flow: rte_errno(%d),"
5616                         " type(%d), message(%s)",
5617                         rte_errno, error.type,
5618                         error.message ? error.message : " (no stated reason)");
5619                 return -rte_errno;
5620         }
5621         return 0;
5622 }
5623
5624 /**
5625  * Enable a control flow configured from the control plane.
5626  *
5627  * @param dev
5628  *   Pointer to Ethernet device.
5629  * @param eth_spec
5630  *   An Ethernet flow spec to apply.
5631  * @param eth_mask
5632  *   An Ethernet flow mask to apply.
5633  * @param vlan_spec
5634  *   A VLAN flow spec to apply.
5635  * @param vlan_mask
5636  *   A VLAN flow mask to apply.
5637  *
5638  * @return
5639  *   0 on success, a negative errno value otherwise and rte_errno is set.
5640  */
5641 int
5642 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
5643                     struct rte_flow_item_eth *eth_spec,
5644                     struct rte_flow_item_eth *eth_mask,
5645                     struct rte_flow_item_vlan *vlan_spec,
5646                     struct rte_flow_item_vlan *vlan_mask)
5647 {
5648         struct mlx5_priv *priv = dev->data->dev_private;
5649         const struct rte_flow_attr attr = {
5650                 .ingress = 1,
5651                 .priority = MLX5_FLOW_PRIO_RSVD,
5652         };
5653         struct rte_flow_item items[] = {
5654                 {
5655                         .type = RTE_FLOW_ITEM_TYPE_ETH,
5656                         .spec = eth_spec,
5657                         .last = NULL,
5658                         .mask = eth_mask,
5659                 },
5660                 {
5661                         .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
5662                                               RTE_FLOW_ITEM_TYPE_END,
5663                         .spec = vlan_spec,
5664                         .last = NULL,
5665                         .mask = vlan_mask,
5666                 },
5667                 {
5668                         .type = RTE_FLOW_ITEM_TYPE_END,
5669                 },
5670         };
5671         uint16_t queue[priv->reta_idx_n];
5672         struct rte_flow_action_rss action_rss = {
5673                 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
5674                 .level = 0,
5675                 .types = priv->rss_conf.rss_hf,
5676                 .key_len = priv->rss_conf.rss_key_len,
5677                 .queue_num = priv->reta_idx_n,
5678                 .key = priv->rss_conf.rss_key,
5679                 .queue = queue,
5680         };
5681         struct rte_flow_action actions[] = {
5682                 {
5683                         .type = RTE_FLOW_ACTION_TYPE_RSS,
5684                         .conf = &action_rss,
5685                 },
5686                 {
5687                         .type = RTE_FLOW_ACTION_TYPE_END,
5688                 },
5689         };
5690         uint32_t flow_idx;
5691         struct rte_flow_error error;
5692         unsigned int i;
5693
5694         if (!priv->reta_idx_n || !priv->rxqs_n) {
5695                 return 0;
5696         }
5697         if (!(dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG))
5698                 action_rss.types = 0;
5699         for (i = 0; i != priv->reta_idx_n; ++i)
5700                 queue[i] = (*priv->reta_idx)[i];
5701         flow_idx = flow_list_create(dev, &priv->ctrl_flows,
5702                                 &attr, items, actions, false, &error);
5703         if (!flow_idx)
5704                 return -rte_errno;
5705         return 0;
5706 }
5707
5708 /**
5709  * Enable a flow control configured from the control plane.
5710  *
5711  * @param dev
5712  *   Pointer to Ethernet device.
5713  * @param eth_spec
5714  *   An Ethernet flow spec to apply.
5715  * @param eth_mask
5716  *   An Ethernet flow mask to apply.
5717  *
5718  * @return
5719  *   0 on success, a negative errno value otherwise and rte_errno is set.
5720  */
5721 int
5722 mlx5_ctrl_flow(struct rte_eth_dev *dev,
5723                struct rte_flow_item_eth *eth_spec,
5724                struct rte_flow_item_eth *eth_mask)
5725 {
5726         return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
5727 }
5728
5729 /**
5730  * Create default miss flow rule matching lacp traffic
5731  *
5732  * @param dev
5733  *   Pointer to Ethernet device.
5734  * @param eth_spec
5735  *   An Ethernet flow spec to apply.
5736  *
5737  * @return
5738  *   0 on success, a negative errno value otherwise and rte_errno is set.
5739  */
5740 int
5741 mlx5_flow_lacp_miss(struct rte_eth_dev *dev)
5742 {
5743         struct mlx5_priv *priv = dev->data->dev_private;
5744         /*
5745          * The LACP matching is done by only using ether type since using
5746          * a multicast dst mac causes kernel to give low priority to this flow.
5747          */
5748         static const struct rte_flow_item_eth lacp_spec = {
5749                 .type = RTE_BE16(0x8809),
5750         };
5751         static const struct rte_flow_item_eth lacp_mask = {
5752                 .type = 0xffff,
5753         };
5754         const struct rte_flow_attr attr = {
5755                 .ingress = 1,
5756         };
5757         struct rte_flow_item items[] = {
5758                 {
5759                         .type = RTE_FLOW_ITEM_TYPE_ETH,
5760                         .spec = &lacp_spec,
5761                         .mask = &lacp_mask,
5762                 },
5763                 {
5764                         .type = RTE_FLOW_ITEM_TYPE_END,
5765                 },
5766         };
5767         struct rte_flow_action actions[] = {
5768                 {
5769                         .type = (enum rte_flow_action_type)
5770                                 MLX5_RTE_FLOW_ACTION_TYPE_DEFAULT_MISS,
5771                 },
5772                 {
5773                         .type = RTE_FLOW_ACTION_TYPE_END,
5774                 },
5775         };
5776         struct rte_flow_error error;
5777         uint32_t flow_idx = flow_list_create(dev, &priv->ctrl_flows,
5778                                 &attr, items, actions, false, &error);
5779
5780         if (!flow_idx)
5781                 return -rte_errno;
5782         return 0;
5783 }
5784
5785 /**
5786  * Destroy a flow.
5787  *
5788  * @see rte_flow_destroy()
5789  * @see rte_flow_ops
5790  */
5791 int
5792 mlx5_flow_destroy(struct rte_eth_dev *dev,
5793                   struct rte_flow *flow,
5794                   struct rte_flow_error *error __rte_unused)
5795 {
5796         struct mlx5_priv *priv = dev->data->dev_private;
5797
5798         flow_list_destroy(dev, &priv->flows, (uintptr_t)(void *)flow);
5799         return 0;
5800 }
5801
5802 /**
5803  * Destroy all flows.
5804  *
5805  * @see rte_flow_flush()
5806  * @see rte_flow_ops
5807  */
5808 int
5809 mlx5_flow_flush(struct rte_eth_dev *dev,
5810                 struct rte_flow_error *error __rte_unused)
5811 {
5812         struct mlx5_priv *priv = dev->data->dev_private;
5813
5814         mlx5_flow_list_flush(dev, &priv->flows, false);
5815         return 0;
5816 }
5817
5818 /**
5819  * Isolated mode.
5820  *
5821  * @see rte_flow_isolate()
5822  * @see rte_flow_ops
5823  */
5824 int
5825 mlx5_flow_isolate(struct rte_eth_dev *dev,
5826                   int enable,
5827                   struct rte_flow_error *error)
5828 {
5829         struct mlx5_priv *priv = dev->data->dev_private;
5830
5831         if (dev->data->dev_started) {
5832                 rte_flow_error_set(error, EBUSY,
5833                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
5834                                    NULL,
5835                                    "port must be stopped first");
5836                 return -rte_errno;
5837         }
5838         priv->isolated = !!enable;
5839         if (enable)
5840                 dev->dev_ops = &mlx5_os_dev_ops_isolate;
5841         else
5842                 dev->dev_ops = &mlx5_os_dev_ops;
5843
5844         dev->rx_descriptor_status = mlx5_rx_descriptor_status;
5845         dev->tx_descriptor_status = mlx5_tx_descriptor_status;
5846
5847         return 0;
5848 }
5849
5850 /**
5851  * Query a flow.
5852  *
5853  * @see rte_flow_query()
5854  * @see rte_flow_ops
5855  */
5856 static int
5857 flow_drv_query(struct rte_eth_dev *dev,
5858                uint32_t flow_idx,
5859                const struct rte_flow_action *actions,
5860                void *data,
5861                struct rte_flow_error *error)
5862 {
5863         struct mlx5_priv *priv = dev->data->dev_private;
5864         const struct mlx5_flow_driver_ops *fops;
5865         struct rte_flow *flow = mlx5_ipool_get(priv->sh->ipool
5866                                                [MLX5_IPOOL_RTE_FLOW],
5867                                                flow_idx);
5868         enum mlx5_flow_drv_type ftype;
5869
5870         if (!flow) {
5871                 return rte_flow_error_set(error, ENOENT,
5872                           RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
5873                           NULL,
5874                           "invalid flow handle");
5875         }
5876         ftype = flow->drv_type;
5877         MLX5_ASSERT(ftype > MLX5_FLOW_TYPE_MIN && ftype < MLX5_FLOW_TYPE_MAX);
5878         fops = flow_get_drv_ops(ftype);
5879
5880         return fops->query(dev, flow, actions, data, error);
5881 }
5882
5883 /**
5884  * Query a flow.
5885  *
5886  * @see rte_flow_query()
5887  * @see rte_flow_ops
5888  */
5889 int
5890 mlx5_flow_query(struct rte_eth_dev *dev,
5891                 struct rte_flow *flow,
5892                 const struct rte_flow_action *actions,
5893                 void *data,
5894                 struct rte_flow_error *error)
5895 {
5896         int ret;
5897
5898         ret = flow_drv_query(dev, (uintptr_t)(void *)flow, actions, data,
5899                              error);
5900         if (ret < 0)
5901                 return ret;
5902         return 0;
5903 }
5904
5905 /**
5906  * Convert a flow director filter to a generic flow.
5907  *
5908  * @param dev
5909  *   Pointer to Ethernet device.
5910  * @param fdir_filter
5911  *   Flow director filter to add.
5912  * @param attributes
5913  *   Generic flow parameters structure.
5914  *
5915  * @return
5916  *   0 on success, a negative errno value otherwise and rte_errno is set.
5917  */
5918 static int
5919 flow_fdir_filter_convert(struct rte_eth_dev *dev,
5920                          const struct rte_eth_fdir_filter *fdir_filter,
5921                          struct mlx5_fdir *attributes)
5922 {
5923         struct mlx5_priv *priv = dev->data->dev_private;
5924         const struct rte_eth_fdir_input *input = &fdir_filter->input;
5925         const struct rte_eth_fdir_masks *mask =
5926                 &dev->data->dev_conf.fdir_conf.mask;
5927
5928         /* Validate queue number. */
5929         if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
5930                 DRV_LOG(ERR, "port %u invalid queue number %d",
5931                         dev->data->port_id, fdir_filter->action.rx_queue);
5932                 rte_errno = EINVAL;
5933                 return -rte_errno;
5934         }
5935         attributes->attr.ingress = 1;
5936         attributes->items[0] = (struct rte_flow_item) {
5937                 .type = RTE_FLOW_ITEM_TYPE_ETH,
5938                 .spec = &attributes->l2,
5939                 .mask = &attributes->l2_mask,
5940         };
5941         switch (fdir_filter->action.behavior) {
5942         case RTE_ETH_FDIR_ACCEPT:
5943                 attributes->actions[0] = (struct rte_flow_action){
5944                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
5945                         .conf = &attributes->queue,
5946                 };
5947                 break;
5948         case RTE_ETH_FDIR_REJECT:
5949                 attributes->actions[0] = (struct rte_flow_action){
5950                         .type = RTE_FLOW_ACTION_TYPE_DROP,
5951                 };
5952                 break;
5953         default:
5954                 DRV_LOG(ERR, "port %u invalid behavior %d",
5955                         dev->data->port_id,
5956                         fdir_filter->action.behavior);
5957                 rte_errno = ENOTSUP;
5958                 return -rte_errno;
5959         }
5960         attributes->queue.index = fdir_filter->action.rx_queue;
5961         /* Handle L3. */
5962         switch (fdir_filter->input.flow_type) {
5963         case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
5964         case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
5965         case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
5966                 attributes->l3.ipv4.hdr = (struct rte_ipv4_hdr){
5967                         .src_addr = input->flow.ip4_flow.src_ip,
5968                         .dst_addr = input->flow.ip4_flow.dst_ip,
5969                         .time_to_live = input->flow.ip4_flow.ttl,
5970                         .type_of_service = input->flow.ip4_flow.tos,
5971                 };
5972                 attributes->l3_mask.ipv4.hdr = (struct rte_ipv4_hdr){
5973                         .src_addr = mask->ipv4_mask.src_ip,
5974                         .dst_addr = mask->ipv4_mask.dst_ip,
5975                         .time_to_live = mask->ipv4_mask.ttl,
5976                         .type_of_service = mask->ipv4_mask.tos,
5977                         .next_proto_id = mask->ipv4_mask.proto,
5978                 };
5979                 attributes->items[1] = (struct rte_flow_item){
5980                         .type = RTE_FLOW_ITEM_TYPE_IPV4,
5981                         .spec = &attributes->l3,
5982                         .mask = &attributes->l3_mask,
5983                 };
5984                 break;
5985         case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
5986         case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
5987         case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
5988                 attributes->l3.ipv6.hdr = (struct rte_ipv6_hdr){
5989                         .hop_limits = input->flow.ipv6_flow.hop_limits,
5990                         .proto = input->flow.ipv6_flow.proto,
5991                 };
5992
5993                 memcpy(attributes->l3.ipv6.hdr.src_addr,
5994                        input->flow.ipv6_flow.src_ip,
5995                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
5996                 memcpy(attributes->l3.ipv6.hdr.dst_addr,
5997                        input->flow.ipv6_flow.dst_ip,
5998                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
5999                 memcpy(attributes->l3_mask.ipv6.hdr.src_addr,
6000                        mask->ipv6_mask.src_ip,
6001                        RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
6002                 memcpy(attributes->l3_mask.ipv6.hdr.dst_addr,
6003                        mask->ipv6_mask.dst_ip,
6004                        RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
6005                 attributes->items[1] = (struct rte_flow_item){
6006                         .type = RTE_FLOW_ITEM_TYPE_IPV6,
6007                         .spec = &attributes->l3,
6008                         .mask = &attributes->l3_mask,
6009                 };
6010                 break;
6011         default:
6012                 DRV_LOG(ERR, "port %u invalid flow type%d",
6013                         dev->data->port_id, fdir_filter->input.flow_type);
6014                 rte_errno = ENOTSUP;
6015                 return -rte_errno;
6016         }
6017         /* Handle L4. */
6018         switch (fdir_filter->input.flow_type) {
6019         case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
6020                 attributes->l4.udp.hdr = (struct rte_udp_hdr){
6021                         .src_port = input->flow.udp4_flow.src_port,
6022                         .dst_port = input->flow.udp4_flow.dst_port,
6023                 };
6024                 attributes->l4_mask.udp.hdr = (struct rte_udp_hdr){
6025                         .src_port = mask->src_port_mask,
6026                         .dst_port = mask->dst_port_mask,
6027                 };
6028                 attributes->items[2] = (struct rte_flow_item){
6029                         .type = RTE_FLOW_ITEM_TYPE_UDP,
6030                         .spec = &attributes->l4,
6031                         .mask = &attributes->l4_mask,
6032                 };
6033                 break;
6034         case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
6035                 attributes->l4.tcp.hdr = (struct rte_tcp_hdr){
6036                         .src_port = input->flow.tcp4_flow.src_port,
6037                         .dst_port = input->flow.tcp4_flow.dst_port,
6038                 };
6039                 attributes->l4_mask.tcp.hdr = (struct rte_tcp_hdr){
6040                         .src_port = mask->src_port_mask,
6041                         .dst_port = mask->dst_port_mask,
6042                 };
6043                 attributes->items[2] = (struct rte_flow_item){
6044                         .type = RTE_FLOW_ITEM_TYPE_TCP,
6045                         .spec = &attributes->l4,
6046                         .mask = &attributes->l4_mask,
6047                 };
6048                 break;
6049         case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
6050                 attributes->l4.udp.hdr = (struct rte_udp_hdr){
6051                         .src_port = input->flow.udp6_flow.src_port,
6052                         .dst_port = input->flow.udp6_flow.dst_port,
6053                 };
6054                 attributes->l4_mask.udp.hdr = (struct rte_udp_hdr){
6055                         .src_port = mask->src_port_mask,
6056                         .dst_port = mask->dst_port_mask,
6057                 };
6058                 attributes->items[2] = (struct rte_flow_item){
6059                         .type = RTE_FLOW_ITEM_TYPE_UDP,
6060                         .spec = &attributes->l4,
6061                         .mask = &attributes->l4_mask,
6062                 };
6063                 break;
6064         case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
6065                 attributes->l4.tcp.hdr = (struct rte_tcp_hdr){
6066                         .src_port = input->flow.tcp6_flow.src_port,
6067                         .dst_port = input->flow.tcp6_flow.dst_port,
6068                 };
6069                 attributes->l4_mask.tcp.hdr = (struct rte_tcp_hdr){
6070                         .src_port = mask->src_port_mask,
6071                         .dst_port = mask->dst_port_mask,
6072                 };
6073                 attributes->items[2] = (struct rte_flow_item){
6074                         .type = RTE_FLOW_ITEM_TYPE_TCP,
6075                         .spec = &attributes->l4,
6076                         .mask = &attributes->l4_mask,
6077                 };
6078                 break;
6079         case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
6080         case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
6081                 break;
6082         default:
6083                 DRV_LOG(ERR, "port %u invalid flow type%d",
6084                         dev->data->port_id, fdir_filter->input.flow_type);
6085                 rte_errno = ENOTSUP;
6086                 return -rte_errno;
6087         }
6088         return 0;
6089 }
6090
6091 #define FLOW_FDIR_CMP(f1, f2, fld) \
6092         memcmp(&(f1)->fld, &(f2)->fld, sizeof(f1->fld))
6093
6094 /**
6095  * Compare two FDIR flows. If items and actions are identical, the two flows are
6096  * regarded as same.
6097  *
6098  * @param dev
6099  *   Pointer to Ethernet device.
6100  * @param f1
6101  *   FDIR flow to compare.
6102  * @param f2
6103  *   FDIR flow to compare.
6104  *
6105  * @return
6106  *   Zero on match, 1 otherwise.
6107  */
6108 static int
6109 flow_fdir_cmp(const struct mlx5_fdir *f1, const struct mlx5_fdir *f2)
6110 {
6111         if (FLOW_FDIR_CMP(f1, f2, attr) ||
6112             FLOW_FDIR_CMP(f1, f2, l2) ||
6113             FLOW_FDIR_CMP(f1, f2, l2_mask) ||
6114             FLOW_FDIR_CMP(f1, f2, l3) ||
6115             FLOW_FDIR_CMP(f1, f2, l3_mask) ||
6116             FLOW_FDIR_CMP(f1, f2, l4) ||
6117             FLOW_FDIR_CMP(f1, f2, l4_mask) ||
6118             FLOW_FDIR_CMP(f1, f2, actions[0].type))
6119                 return 1;
6120         if (f1->actions[0].type == RTE_FLOW_ACTION_TYPE_QUEUE &&
6121             FLOW_FDIR_CMP(f1, f2, queue))
6122                 return 1;
6123         return 0;
6124 }
6125
6126 /**
6127  * Search device flow list to find out a matched FDIR flow.
6128  *
6129  * @param dev
6130  *   Pointer to Ethernet device.
6131  * @param fdir_flow
6132  *   FDIR flow to lookup.
6133  *
6134  * @return
6135  *   Index of flow if found, 0 otherwise.
6136  */
6137 static uint32_t
6138 flow_fdir_filter_lookup(struct rte_eth_dev *dev, struct mlx5_fdir *fdir_flow)
6139 {
6140         struct mlx5_priv *priv = dev->data->dev_private;
6141         uint32_t flow_idx = 0;
6142         struct mlx5_fdir_flow *priv_fdir_flow = NULL;
6143
6144         MLX5_ASSERT(fdir_flow);
6145         LIST_FOREACH(priv_fdir_flow, &priv->fdir_flows, next) {
6146                 if (!flow_fdir_cmp(priv_fdir_flow->fdir, fdir_flow)) {
6147                         DRV_LOG(DEBUG, "port %u found FDIR flow %u",
6148                                 dev->data->port_id, flow_idx);
6149                         flow_idx = priv_fdir_flow->rix_flow;
6150                         break;
6151                 }
6152         }
6153         return flow_idx;
6154 }
6155
6156 /**
6157  * Add new flow director filter and store it in list.
6158  *
6159  * @param dev
6160  *   Pointer to Ethernet device.
6161  * @param fdir_filter
6162  *   Flow director filter to add.
6163  *
6164  * @return
6165  *   0 on success, a negative errno value otherwise and rte_errno is set.
6166  */
6167 static int
6168 flow_fdir_filter_add(struct rte_eth_dev *dev,
6169                      const struct rte_eth_fdir_filter *fdir_filter)
6170 {
6171         struct mlx5_priv *priv = dev->data->dev_private;
6172         struct mlx5_fdir *fdir_flow;
6173         struct rte_flow *flow;
6174         struct mlx5_fdir_flow *priv_fdir_flow = NULL;
6175         uint32_t flow_idx;
6176         int ret;
6177
6178         fdir_flow = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*fdir_flow), 0,
6179                                 SOCKET_ID_ANY);
6180         if (!fdir_flow) {
6181                 rte_errno = ENOMEM;
6182                 return -rte_errno;
6183         }
6184         ret = flow_fdir_filter_convert(dev, fdir_filter, fdir_flow);
6185         if (ret)
6186                 goto error;
6187         flow_idx = flow_fdir_filter_lookup(dev, fdir_flow);
6188         if (flow_idx) {
6189                 rte_errno = EEXIST;
6190                 goto error;
6191         }
6192         priv_fdir_flow = mlx5_malloc(MLX5_MEM_ZERO,
6193                                      sizeof(struct mlx5_fdir_flow),
6194                                      0, SOCKET_ID_ANY);
6195         if (!priv_fdir_flow) {
6196                 rte_errno = ENOMEM;
6197                 goto error;
6198         }
6199         flow_idx = flow_list_create(dev, &priv->flows, &fdir_flow->attr,
6200                                     fdir_flow->items, fdir_flow->actions, true,
6201                                     NULL);
6202         flow = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], flow_idx);
6203         if (!flow)
6204                 goto error;
6205         flow->fdir = 1;
6206         priv_fdir_flow->fdir = fdir_flow;
6207         priv_fdir_flow->rix_flow = flow_idx;
6208         LIST_INSERT_HEAD(&priv->fdir_flows, priv_fdir_flow, next);
6209         DRV_LOG(DEBUG, "port %u created FDIR flow %p",
6210                 dev->data->port_id, (void *)flow);
6211         return 0;
6212 error:
6213         mlx5_free(priv_fdir_flow);
6214         mlx5_free(fdir_flow);
6215         return -rte_errno;
6216 }
6217
6218 /**
6219  * Delete specific filter.
6220  *
6221  * @param dev
6222  *   Pointer to Ethernet device.
6223  * @param fdir_filter
6224  *   Filter to be deleted.
6225  *
6226  * @return
6227  *   0 on success, a negative errno value otherwise and rte_errno is set.
6228  */
6229 static int
6230 flow_fdir_filter_delete(struct rte_eth_dev *dev,
6231                         const struct rte_eth_fdir_filter *fdir_filter)
6232 {
6233         struct mlx5_priv *priv = dev->data->dev_private;
6234         uint32_t flow_idx;
6235         struct mlx5_fdir fdir_flow = {
6236                 .attr.group = 0,
6237         };
6238         struct mlx5_fdir_flow *priv_fdir_flow = NULL;
6239         int ret;
6240
6241         ret = flow_fdir_filter_convert(dev, fdir_filter, &fdir_flow);
6242         if (ret)
6243                 return -rte_errno;
6244         LIST_FOREACH(priv_fdir_flow, &priv->fdir_flows, next) {
6245                 /* Find the fdir in priv list */
6246                 if (!flow_fdir_cmp(priv_fdir_flow->fdir, &fdir_flow))
6247                         break;
6248         }
6249         if (!priv_fdir_flow)
6250                 return 0;
6251         LIST_REMOVE(priv_fdir_flow, next);
6252         flow_idx = priv_fdir_flow->rix_flow;
6253         flow_list_destroy(dev, &priv->flows, flow_idx);
6254         mlx5_free(priv_fdir_flow->fdir);
6255         mlx5_free(priv_fdir_flow);
6256         DRV_LOG(DEBUG, "port %u deleted FDIR flow %u",
6257                 dev->data->port_id, flow_idx);
6258         return 0;
6259 }
6260
6261 /**
6262  * Update queue for specific filter.
6263  *
6264  * @param dev
6265  *   Pointer to Ethernet device.
6266  * @param fdir_filter
6267  *   Filter to be updated.
6268  *
6269  * @return
6270  *   0 on success, a negative errno value otherwise and rte_errno is set.
6271  */
6272 static int
6273 flow_fdir_filter_update(struct rte_eth_dev *dev,
6274                         const struct rte_eth_fdir_filter *fdir_filter)
6275 {
6276         int ret;
6277
6278         ret = flow_fdir_filter_delete(dev, fdir_filter);
6279         if (ret)
6280                 return ret;
6281         return flow_fdir_filter_add(dev, fdir_filter);
6282 }
6283
6284 /**
6285  * Flush all filters.
6286  *
6287  * @param dev
6288  *   Pointer to Ethernet device.
6289  */
6290 static void
6291 flow_fdir_filter_flush(struct rte_eth_dev *dev)
6292 {
6293         struct mlx5_priv *priv = dev->data->dev_private;
6294         struct mlx5_fdir_flow *priv_fdir_flow = NULL;
6295
6296         while (!LIST_EMPTY(&priv->fdir_flows)) {
6297                 priv_fdir_flow = LIST_FIRST(&priv->fdir_flows);
6298                 LIST_REMOVE(priv_fdir_flow, next);
6299                 flow_list_destroy(dev, &priv->flows, priv_fdir_flow->rix_flow);
6300                 mlx5_free(priv_fdir_flow->fdir);
6301                 mlx5_free(priv_fdir_flow);
6302         }
6303 }
6304
6305 /**
6306  * Get flow director information.
6307  *
6308  * @param dev
6309  *   Pointer to Ethernet device.
6310  * @param[out] fdir_info
6311  *   Resulting flow director information.
6312  */
6313 static void
6314 flow_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info)
6315 {
6316         struct rte_eth_fdir_masks *mask =
6317                 &dev->data->dev_conf.fdir_conf.mask;
6318
6319         fdir_info->mode = dev->data->dev_conf.fdir_conf.mode;
6320         fdir_info->guarant_spc = 0;
6321         rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
6322         fdir_info->max_flexpayload = 0;
6323         fdir_info->flow_types_mask[0] = 0;
6324         fdir_info->flex_payload_unit = 0;
6325         fdir_info->max_flex_payload_segment_num = 0;
6326         fdir_info->flex_payload_limit = 0;
6327         memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
6328 }
6329
6330 /**
6331  * Deal with flow director operations.
6332  *
6333  * @param dev
6334  *   Pointer to Ethernet device.
6335  * @param filter_op
6336  *   Operation to perform.
6337  * @param arg
6338  *   Pointer to operation-specific structure.
6339  *
6340  * @return
6341  *   0 on success, a negative errno value otherwise and rte_errno is set.
6342  */
6343 static int
6344 flow_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op,
6345                     void *arg)
6346 {
6347         enum rte_fdir_mode fdir_mode =
6348                 dev->data->dev_conf.fdir_conf.mode;
6349
6350         if (filter_op == RTE_ETH_FILTER_NOP)
6351                 return 0;
6352         if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
6353             fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
6354                 DRV_LOG(ERR, "port %u flow director mode %d not supported",
6355                         dev->data->port_id, fdir_mode);
6356                 rte_errno = EINVAL;
6357                 return -rte_errno;
6358         }
6359         switch (filter_op) {
6360         case RTE_ETH_FILTER_ADD:
6361                 return flow_fdir_filter_add(dev, arg);
6362         case RTE_ETH_FILTER_UPDATE:
6363                 return flow_fdir_filter_update(dev, arg);
6364         case RTE_ETH_FILTER_DELETE:
6365                 return flow_fdir_filter_delete(dev, arg);
6366         case RTE_ETH_FILTER_FLUSH:
6367                 flow_fdir_filter_flush(dev);
6368                 break;
6369         case RTE_ETH_FILTER_INFO:
6370                 flow_fdir_info_get(dev, arg);
6371                 break;
6372         default:
6373                 DRV_LOG(DEBUG, "port %u unknown operation %u",
6374                         dev->data->port_id, filter_op);
6375                 rte_errno = EINVAL;
6376                 return -rte_errno;
6377         }
6378         return 0;
6379 }
6380
6381 /**
6382  * Manage filter operations.
6383  *
6384  * @param dev
6385  *   Pointer to Ethernet device structure.
6386  * @param filter_type
6387  *   Filter type.
6388  * @param filter_op
6389  *   Operation to perform.
6390  * @param arg
6391  *   Pointer to operation-specific structure.
6392  *
6393  * @return
6394  *   0 on success, a negative errno value otherwise and rte_errno is set.
6395  */
6396 int
6397 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
6398                      enum rte_filter_type filter_type,
6399                      enum rte_filter_op filter_op,
6400                      void *arg)
6401 {
6402         switch (filter_type) {
6403         case RTE_ETH_FILTER_GENERIC:
6404                 if (filter_op != RTE_ETH_FILTER_GET) {
6405                         rte_errno = EINVAL;
6406                         return -rte_errno;
6407                 }
6408                 *(const void **)arg = &mlx5_flow_ops;
6409                 return 0;
6410         case RTE_ETH_FILTER_FDIR:
6411                 return flow_fdir_ctrl_func(dev, filter_op, arg);
6412         default:
6413                 DRV_LOG(ERR, "port %u filter type (%d) not supported",
6414                         dev->data->port_id, filter_type);
6415                 rte_errno = ENOTSUP;
6416                 return -rte_errno;
6417         }
6418         return 0;
6419 }
6420
6421 /**
6422  * Create the needed meter and suffix tables.
6423  *
6424  * @param[in] dev
6425  *   Pointer to Ethernet device.
6426  * @param[in] fm
6427  *   Pointer to the flow meter.
6428  *
6429  * @return
6430  *   Pointer to table set on success, NULL otherwise.
6431  */
6432 struct mlx5_meter_domains_infos *
6433 mlx5_flow_create_mtr_tbls(struct rte_eth_dev *dev,
6434                           const struct mlx5_flow_meter *fm)
6435 {
6436         const struct mlx5_flow_driver_ops *fops;
6437
6438         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
6439         return fops->create_mtr_tbls(dev, fm);
6440 }
6441
6442 /**
6443  * Destroy the meter table set.
6444  *
6445  * @param[in] dev
6446  *   Pointer to Ethernet device.
6447  * @param[in] tbl
6448  *   Pointer to the meter table set.
6449  *
6450  * @return
6451  *   0 on success.
6452  */
6453 int
6454 mlx5_flow_destroy_mtr_tbls(struct rte_eth_dev *dev,
6455                            struct mlx5_meter_domains_infos *tbls)
6456 {
6457         const struct mlx5_flow_driver_ops *fops;
6458
6459         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
6460         return fops->destroy_mtr_tbls(dev, tbls);
6461 }
6462
6463 /**
6464  * Create policer rules.
6465  *
6466  * @param[in] dev
6467  *   Pointer to Ethernet device.
6468  * @param[in] fm
6469  *   Pointer to flow meter structure.
6470  * @param[in] attr
6471  *   Pointer to flow attributes.
6472  *
6473  * @return
6474  *   0 on success, -1 otherwise.
6475  */
6476 int
6477 mlx5_flow_create_policer_rules(struct rte_eth_dev *dev,
6478                                struct mlx5_flow_meter *fm,
6479                                const struct rte_flow_attr *attr)
6480 {
6481         const struct mlx5_flow_driver_ops *fops;
6482
6483         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
6484         return fops->create_policer_rules(dev, fm, attr);
6485 }
6486
6487 /**
6488  * Destroy policer rules.
6489  *
6490  * @param[in] fm
6491  *   Pointer to flow meter structure.
6492  * @param[in] attr
6493  *   Pointer to flow attributes.
6494  *
6495  * @return
6496  *   0 on success, -1 otherwise.
6497  */
6498 int
6499 mlx5_flow_destroy_policer_rules(struct rte_eth_dev *dev,
6500                                 struct mlx5_flow_meter *fm,
6501                                 const struct rte_flow_attr *attr)
6502 {
6503         const struct mlx5_flow_driver_ops *fops;
6504
6505         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
6506         return fops->destroy_policer_rules(dev, fm, attr);
6507 }
6508
6509 /**
6510  * Allocate a counter.
6511  *
6512  * @param[in] dev
6513  *   Pointer to Ethernet device structure.
6514  *
6515  * @return
6516  *   Index to allocated counter  on success, 0 otherwise.
6517  */
6518 uint32_t
6519 mlx5_counter_alloc(struct rte_eth_dev *dev)
6520 {
6521         const struct mlx5_flow_driver_ops *fops;
6522         struct rte_flow_attr attr = { .transfer = 0 };
6523
6524         if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
6525                 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
6526                 return fops->counter_alloc(dev);
6527         }
6528         DRV_LOG(ERR,
6529                 "port %u counter allocate is not supported.",
6530                  dev->data->port_id);
6531         return 0;
6532 }
6533
6534 /**
6535  * Free a counter.
6536  *
6537  * @param[in] dev
6538  *   Pointer to Ethernet device structure.
6539  * @param[in] cnt
6540  *   Index to counter to be free.
6541  */
6542 void
6543 mlx5_counter_free(struct rte_eth_dev *dev, uint32_t cnt)
6544 {
6545         const struct mlx5_flow_driver_ops *fops;
6546         struct rte_flow_attr attr = { .transfer = 0 };
6547
6548         if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
6549                 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
6550                 fops->counter_free(dev, cnt);
6551                 return;
6552         }
6553         DRV_LOG(ERR,
6554                 "port %u counter free is not supported.",
6555                  dev->data->port_id);
6556 }
6557
6558 /**
6559  * Query counter statistics.
6560  *
6561  * @param[in] dev
6562  *   Pointer to Ethernet device structure.
6563  * @param[in] cnt
6564  *   Index to counter to query.
6565  * @param[in] clear
6566  *   Set to clear counter statistics.
6567  * @param[out] pkts
6568  *   The counter hits packets number to save.
6569  * @param[out] bytes
6570  *   The counter hits bytes number to save.
6571  *
6572  * @return
6573  *   0 on success, a negative errno value otherwise.
6574  */
6575 int
6576 mlx5_counter_query(struct rte_eth_dev *dev, uint32_t cnt,
6577                    bool clear, uint64_t *pkts, uint64_t *bytes)
6578 {
6579         const struct mlx5_flow_driver_ops *fops;
6580         struct rte_flow_attr attr = { .transfer = 0 };
6581
6582         if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
6583                 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
6584                 return fops->counter_query(dev, cnt, clear, pkts, bytes);
6585         }
6586         DRV_LOG(ERR,
6587                 "port %u counter query is not supported.",
6588                  dev->data->port_id);
6589         return -ENOTSUP;
6590 }
6591
6592 #define MLX5_POOL_QUERY_FREQ_US 1000000
6593
6594 /**
6595  * Get number of all validate pools.
6596  *
6597  * @param[in] sh
6598  *   Pointer to mlx5_dev_ctx_shared object.
6599  *
6600  * @return
6601  *   The number of all validate pools.
6602  */
6603 static uint32_t
6604 mlx5_get_all_valid_pool_count(struct mlx5_dev_ctx_shared *sh)
6605 {
6606         int i;
6607         uint32_t pools_n = 0;
6608
6609         for (i = 0; i < MLX5_CCONT_TYPE_MAX; ++i)
6610                 pools_n += rte_atomic16_read(&sh->cmng.ccont[i].n_valid);
6611         return pools_n;
6612 }
6613
6614 /**
6615  * Set the periodic procedure for triggering asynchronous batch queries for all
6616  * the counter pools.
6617  *
6618  * @param[in] sh
6619  *   Pointer to mlx5_dev_ctx_shared object.
6620  */
6621 void
6622 mlx5_set_query_alarm(struct mlx5_dev_ctx_shared *sh)
6623 {
6624         uint32_t pools_n, us;
6625
6626         pools_n = mlx5_get_all_valid_pool_count(sh);
6627         us = MLX5_POOL_QUERY_FREQ_US / pools_n;
6628         DRV_LOG(DEBUG, "Set alarm for %u pools each %u us", pools_n, us);
6629         if (rte_eal_alarm_set(us, mlx5_flow_query_alarm, sh)) {
6630                 sh->cmng.query_thread_on = 0;
6631                 DRV_LOG(ERR, "Cannot reinitialize query alarm");
6632         } else {
6633                 sh->cmng.query_thread_on = 1;
6634         }
6635 }
6636
6637 /**
6638  * The periodic procedure for triggering asynchronous batch queries for all the
6639  * counter pools. This function is probably called by the host thread.
6640  *
6641  * @param[in] arg
6642  *   The parameter for the alarm process.
6643  */
6644 void
6645 mlx5_flow_query_alarm(void *arg)
6646 {
6647         struct mlx5_dev_ctx_shared *sh = arg;
6648         struct mlx5_devx_obj *dcs;
6649         uint16_t offset;
6650         int ret;
6651         uint8_t batch = sh->cmng.batch;
6652         uint16_t pool_index = sh->cmng.pool_index;
6653         struct mlx5_pools_container *cont;
6654         struct mlx5_flow_counter_pool *pool;
6655         int cont_loop = MLX5_CCONT_TYPE_MAX;
6656
6657         if (sh->cmng.pending_queries >= MLX5_MAX_PENDING_QUERIES)
6658                 goto set_alarm;
6659 next_container:
6660         cont = MLX5_CNT_CONTAINER(sh, batch);
6661         rte_spinlock_lock(&cont->resize_sl);
6662         if (!cont->pools) {
6663                 rte_spinlock_unlock(&cont->resize_sl);
6664                 /* Check if all the containers are empty. */
6665                 if (unlikely(--cont_loop == 0))
6666                         goto set_alarm;
6667                 batch ^= 0x1;
6668                 pool_index = 0;
6669                 goto next_container;
6670         }
6671         pool = cont->pools[pool_index];
6672         rte_spinlock_unlock(&cont->resize_sl);
6673         if (pool->raw_hw)
6674                 /* There is a pool query in progress. */
6675                 goto set_alarm;
6676         pool->raw_hw =
6677                 LIST_FIRST(&sh->cmng.free_stat_raws);
6678         if (!pool->raw_hw)
6679                 /* No free counter statistics raw memory. */
6680                 goto set_alarm;
6681         dcs = (struct mlx5_devx_obj *)(uintptr_t)rte_atomic64_read
6682                                                               (&pool->a64_dcs);
6683         if (dcs->id & (MLX5_CNT_BATCH_QUERY_ID_ALIGNMENT - 1)) {
6684                 /* Pool without valid counter. */
6685                 pool->raw_hw = NULL;
6686                 goto next_pool;
6687         }
6688         offset = batch ? 0 : dcs->id % MLX5_COUNTERS_PER_POOL;
6689         /*
6690          * Identify the counters released between query trigger and query
6691          * handle more efficiently. The counter released in this gap period
6692          * should wait for a new round of query as the new arrived packets
6693          * will not be taken into account.
6694          */
6695         pool->query_gen++;
6696         ret = mlx5_devx_cmd_flow_counter_query(dcs, 0, MLX5_COUNTERS_PER_POOL -
6697                                                offset, NULL, NULL,
6698                                                pool->raw_hw->mem_mng->dm->id,
6699                                                (void *)(uintptr_t)
6700                                                (pool->raw_hw->data + offset),
6701                                                sh->devx_comp,
6702                                                (uint64_t)(uintptr_t)pool);
6703         if (ret) {
6704                 DRV_LOG(ERR, "Failed to trigger asynchronous query for dcs ID"
6705                         " %d", pool->min_dcs->id);
6706                 pool->raw_hw = NULL;
6707                 goto set_alarm;
6708         }
6709         pool->raw_hw->min_dcs_id = dcs->id;
6710         LIST_REMOVE(pool->raw_hw, next);
6711         sh->cmng.pending_queries++;
6712 next_pool:
6713         pool_index++;
6714         if (pool_index >= rte_atomic16_read(&cont->n_valid)) {
6715                 batch ^= 0x1;
6716                 pool_index = 0;
6717         }
6718 set_alarm:
6719         sh->cmng.batch = batch;
6720         sh->cmng.pool_index = pool_index;
6721         mlx5_set_query_alarm(sh);
6722 }
6723
6724 /**
6725  * Check and callback event for new aged flow in the counter pool
6726  *
6727  * @param[in] sh
6728  *   Pointer to mlx5_dev_ctx_shared object.
6729  * @param[in] pool
6730  *   Pointer to Current counter pool.
6731  */
6732 static void
6733 mlx5_flow_aging_check(struct mlx5_dev_ctx_shared *sh,
6734                    struct mlx5_flow_counter_pool *pool)
6735 {
6736         struct mlx5_priv *priv;
6737         struct mlx5_flow_counter *cnt;
6738         struct mlx5_age_info *age_info;
6739         struct mlx5_age_param *age_param;
6740         struct mlx5_counter_stats_raw *cur = pool->raw_hw;
6741         struct mlx5_counter_stats_raw *prev = pool->raw;
6742         const uint64_t curr_time = MLX5_CURR_TIME_SEC;
6743         const uint32_t time_delta = curr_time - pool->time_of_last_age_check;
6744         uint16_t expected = AGE_CANDIDATE;
6745         uint32_t i;
6746
6747         pool->time_of_last_age_check = curr_time;
6748         for (i = 0; i < MLX5_COUNTERS_PER_POOL; ++i) {
6749                 cnt = MLX5_POOL_GET_CNT(pool, i);
6750                 age_param = MLX5_CNT_TO_AGE(cnt);
6751                 if (__atomic_load_n(&age_param->state,
6752                                     __ATOMIC_RELAXED) != AGE_CANDIDATE)
6753                         continue;
6754                 if (cur->data[i].hits != prev->data[i].hits) {
6755                         __atomic_store_n(&age_param->sec_since_last_hit, 0,
6756                                          __ATOMIC_RELAXED);
6757                         continue;
6758                 }
6759                 if (__atomic_add_fetch(&age_param->sec_since_last_hit,
6760                                        time_delta,
6761                                        __ATOMIC_RELAXED) <= age_param->timeout)
6762                         continue;
6763                 /**
6764                  * Hold the lock first, or if between the
6765                  * state AGE_TMOUT and tailq operation the
6766                  * release happened, the release procedure
6767                  * may delete a non-existent tailq node.
6768                  */
6769                 priv = rte_eth_devices[age_param->port_id].data->dev_private;
6770                 age_info = GET_PORT_AGE_INFO(priv);
6771                 rte_spinlock_lock(&age_info->aged_sl);
6772                 if (__atomic_compare_exchange_n(&age_param->state, &expected,
6773                                                 AGE_TMOUT, false,
6774                                                 __ATOMIC_RELAXED,
6775                                                 __ATOMIC_RELAXED)) {
6776                         TAILQ_INSERT_TAIL(&age_info->aged_counters, cnt, next);
6777                         MLX5_AGE_SET(age_info, MLX5_AGE_EVENT_NEW);
6778                 }
6779                 rte_spinlock_unlock(&age_info->aged_sl);
6780         }
6781         for (i = 0; i < sh->max_port; i++) {
6782                 age_info = &sh->port[i].age_info;
6783                 if (!MLX5_AGE_GET(age_info, MLX5_AGE_EVENT_NEW))
6784                         continue;
6785                 if (MLX5_AGE_GET(age_info, MLX5_AGE_TRIGGER))
6786                         rte_eth_dev_callback_process
6787                                 (&rte_eth_devices[sh->port[i].devx_ih_port_id],
6788                                 RTE_ETH_EVENT_FLOW_AGED, NULL);
6789                 age_info->flags = 0;
6790         }
6791 }
6792
6793 /**
6794  * Handler for the HW respond about ready values from an asynchronous batch
6795  * query. This function is probably called by the host thread.
6796  *
6797  * @param[in] sh
6798  *   The pointer to the shared device context.
6799  * @param[in] async_id
6800  *   The Devx async ID.
6801  * @param[in] status
6802  *   The status of the completion.
6803  */
6804 void
6805 mlx5_flow_async_pool_query_handle(struct mlx5_dev_ctx_shared *sh,
6806                                   uint64_t async_id, int status)
6807 {
6808         struct mlx5_flow_counter_pool *pool =
6809                 (struct mlx5_flow_counter_pool *)(uintptr_t)async_id;
6810         struct mlx5_counter_stats_raw *raw_to_free;
6811         uint8_t query_gen = pool->query_gen ^ 1;
6812         struct mlx5_pools_container *cont =
6813                 MLX5_CNT_CONTAINER(sh, !IS_EXT_POOL(pool));
6814         enum mlx5_counter_type cnt_type =
6815                 IS_AGE_POOL(pool) ? MLX5_COUNTER_TYPE_AGE :
6816                                     MLX5_COUNTER_TYPE_ORIGIN;
6817
6818         if (unlikely(status)) {
6819                 raw_to_free = pool->raw_hw;
6820         } else {
6821                 raw_to_free = pool->raw;
6822                 if (IS_AGE_POOL(pool))
6823                         mlx5_flow_aging_check(sh, pool);
6824                 rte_spinlock_lock(&pool->sl);
6825                 pool->raw = pool->raw_hw;
6826                 rte_spinlock_unlock(&pool->sl);
6827                 /* Be sure the new raw counters data is updated in memory. */
6828                 rte_io_wmb();
6829                 if (!TAILQ_EMPTY(&pool->counters[query_gen])) {
6830                         rte_spinlock_lock(&cont->csl);
6831                         TAILQ_CONCAT(&cont->counters[cnt_type],
6832                                      &pool->counters[query_gen], next);
6833                         rte_spinlock_unlock(&cont->csl);
6834                 }
6835         }
6836         LIST_INSERT_HEAD(&sh->cmng.free_stat_raws, raw_to_free, next);
6837         pool->raw_hw = NULL;
6838         sh->cmng.pending_queries--;
6839 }
6840
6841 /**
6842  * Translate the rte_flow group index to HW table value.
6843  *
6844  * @param[in] attributes
6845  *   Pointer to flow attributes
6846  * @param[in] external
6847  *   Value is part of flow rule created by request external to PMD.
6848  * @param[in] group
6849  *   rte_flow group index value.
6850  * @param[out] fdb_def_rule
6851  *   Whether fdb jump to table 1 is configured.
6852  * @param[out] table
6853  *   HW table value.
6854  * @param[out] error
6855  *   Pointer to error structure.
6856  *
6857  * @return
6858  *   0 on success, a negative errno value otherwise and rte_errno is set.
6859  */
6860 int
6861 mlx5_flow_group_to_table(const struct rte_flow_attr *attributes, bool external,
6862                          uint32_t group, bool fdb_def_rule, uint32_t *table,
6863                          struct rte_flow_error *error)
6864 {
6865         if (attributes->transfer && external && fdb_def_rule) {
6866                 if (group == UINT32_MAX)
6867                         return rte_flow_error_set
6868                                                 (error, EINVAL,
6869                                                  RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
6870                                                  NULL,
6871                                                  "group index not supported");
6872                 *table = group + 1;
6873         } else {
6874                 *table = group;
6875         }
6876         return 0;
6877 }
6878
6879 /**
6880  * Discover availability of metadata reg_c's.
6881  *
6882  * Iteratively use test flows to check availability.
6883  *
6884  * @param[in] dev
6885  *   Pointer to the Ethernet device structure.
6886  *
6887  * @return
6888  *   0 on success, a negative errno value otherwise and rte_errno is set.
6889  */
6890 int
6891 mlx5_flow_discover_mreg_c(struct rte_eth_dev *dev)
6892 {
6893         struct mlx5_priv *priv = dev->data->dev_private;
6894         struct mlx5_dev_config *config = &priv->config;
6895         enum modify_reg idx;
6896         int n = 0;
6897
6898         /* reg_c[0] and reg_c[1] are reserved. */
6899         config->flow_mreg_c[n++] = REG_C_0;
6900         config->flow_mreg_c[n++] = REG_C_1;
6901         /* Discover availability of other reg_c's. */
6902         for (idx = REG_C_2; idx <= REG_C_7; ++idx) {
6903                 struct rte_flow_attr attr = {
6904                         .group = MLX5_FLOW_MREG_CP_TABLE_GROUP,
6905                         .priority = MLX5_FLOW_PRIO_RSVD,
6906                         .ingress = 1,
6907                 };
6908                 struct rte_flow_item items[] = {
6909                         [0] = {
6910                                 .type = RTE_FLOW_ITEM_TYPE_END,
6911                         },
6912                 };
6913                 struct rte_flow_action actions[] = {
6914                         [0] = {
6915                                 .type = (enum rte_flow_action_type)
6916                                         MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
6917                                 .conf = &(struct mlx5_flow_action_copy_mreg){
6918                                         .src = REG_C_1,
6919                                         .dst = idx,
6920                                 },
6921                         },
6922                         [1] = {
6923                                 .type = RTE_FLOW_ACTION_TYPE_JUMP,
6924                                 .conf = &(struct rte_flow_action_jump){
6925                                         .group = MLX5_FLOW_MREG_ACT_TABLE_GROUP,
6926                                 },
6927                         },
6928                         [2] = {
6929                                 .type = RTE_FLOW_ACTION_TYPE_END,
6930                         },
6931                 };
6932                 uint32_t flow_idx;
6933                 struct rte_flow *flow;
6934                 struct rte_flow_error error;
6935
6936                 if (!config->dv_flow_en)
6937                         break;
6938                 /* Create internal flow, validation skips copy action. */
6939                 flow_idx = flow_list_create(dev, NULL, &attr, items,
6940                                             actions, false, &error);
6941                 flow = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW],
6942                                       flow_idx);
6943                 if (!flow)
6944                         continue;
6945                 if (dev->data->dev_started || !flow_drv_apply(dev, flow, NULL))
6946                         config->flow_mreg_c[n++] = idx;
6947                 flow_list_destroy(dev, NULL, flow_idx);
6948         }
6949         for (; n < MLX5_MREG_C_NUM; ++n)
6950                 config->flow_mreg_c[n] = REG_NON;
6951         return 0;
6952 }
6953
6954 /**
6955  * Dump flow raw hw data to file
6956  *
6957  * @param[in] dev
6958  *    The pointer to Ethernet device.
6959  * @param[in] file
6960  *   A pointer to a file for output.
6961  * @param[out] error
6962  *   Perform verbose error reporting if not NULL. PMDs initialize this
6963  *   structure in case of error only.
6964  * @return
6965  *   0 on success, a nagative value otherwise.
6966  */
6967 int
6968 mlx5_flow_dev_dump(struct rte_eth_dev *dev,
6969                    FILE *file,
6970                    struct rte_flow_error *error __rte_unused)
6971 {
6972         struct mlx5_priv *priv = dev->data->dev_private;
6973         struct mlx5_dev_ctx_shared *sh = priv->sh;
6974
6975         if (!priv->config.dv_flow_en) {
6976                 if (fputs("device dv flow disabled\n", file) <= 0)
6977                         return -errno;
6978                 return -ENOTSUP;
6979         }
6980         return mlx5_devx_cmd_flow_dump(sh->fdb_domain, sh->rx_domain,
6981                                        sh->tx_domain, file);
6982 }
6983
6984 /**
6985  * Get aged-out flows.
6986  *
6987  * @param[in] dev
6988  *   Pointer to the Ethernet device structure.
6989  * @param[in] context
6990  *   The address of an array of pointers to the aged-out flows contexts.
6991  * @param[in] nb_countexts
6992  *   The length of context array pointers.
6993  * @param[out] error
6994  *   Perform verbose error reporting if not NULL. Initialized in case of
6995  *   error only.
6996  *
6997  * @return
6998  *   how many contexts get in success, otherwise negative errno value.
6999  *   if nb_contexts is 0, return the amount of all aged contexts.
7000  *   if nb_contexts is not 0 , return the amount of aged flows reported
7001  *   in the context array.
7002  */
7003 int
7004 mlx5_flow_get_aged_flows(struct rte_eth_dev *dev, void **contexts,
7005                         uint32_t nb_contexts, struct rte_flow_error *error)
7006 {
7007         const struct mlx5_flow_driver_ops *fops;
7008         struct rte_flow_attr attr = { .transfer = 0 };
7009
7010         if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
7011                 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
7012                 return fops->get_aged_flows(dev, contexts, nb_contexts,
7013                                                     error);
7014         }
7015         DRV_LOG(ERR,
7016                 "port %u get aged flows is not supported.",
7017                  dev->data->port_id);
7018         return -ENOTSUP;
7019 }