f2d3730d6bd56e4af70583c2413501ec128855e8
[dpdk.git] / drivers / net / mlx5 / mlx5_flow.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2016 6WIND S.A.
3  * Copyright 2016 Mellanox Technologies, Ltd
4  */
5
6 #include <netinet/in.h>
7 #include <sys/queue.h>
8 #include <stdalign.h>
9 #include <stdint.h>
10 #include <string.h>
11 #include <stdbool.h>
12
13 /* Verbs header. */
14 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
15 #ifdef PEDANTIC
16 #pragma GCC diagnostic ignored "-Wpedantic"
17 #endif
18 #include <infiniband/verbs.h>
19 #ifdef PEDANTIC
20 #pragma GCC diagnostic error "-Wpedantic"
21 #endif
22
23 #include <rte_common.h>
24 #include <rte_ether.h>
25 #include <rte_ethdev_driver.h>
26 #include <rte_flow.h>
27 #include <rte_flow_driver.h>
28 #include <rte_malloc.h>
29 #include <rte_ip.h>
30
31 #include <mlx5_glue.h>
32 #include <mlx5_devx_cmds.h>
33 #include <mlx5_prm.h>
34
35 #include "mlx5_defs.h"
36 #include "mlx5.h"
37 #include "mlx5_flow.h"
38 #include "mlx5_rxtx.h"
39
40 /* Dev ops structure defined in mlx5.c */
41 extern const struct eth_dev_ops mlx5_dev_ops;
42 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
43
44 /** Device flow drivers. */
45 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
46 extern const struct mlx5_flow_driver_ops mlx5_flow_dv_drv_ops;
47 #endif
48 extern const struct mlx5_flow_driver_ops mlx5_flow_verbs_drv_ops;
49
50 const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops;
51
52 const struct mlx5_flow_driver_ops *flow_drv_ops[] = {
53         [MLX5_FLOW_TYPE_MIN] = &mlx5_flow_null_drv_ops,
54 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
55         [MLX5_FLOW_TYPE_DV] = &mlx5_flow_dv_drv_ops,
56 #endif
57         [MLX5_FLOW_TYPE_VERBS] = &mlx5_flow_verbs_drv_ops,
58         [MLX5_FLOW_TYPE_MAX] = &mlx5_flow_null_drv_ops
59 };
60
61 enum mlx5_expansion {
62         MLX5_EXPANSION_ROOT,
63         MLX5_EXPANSION_ROOT_OUTER,
64         MLX5_EXPANSION_ROOT_ETH_VLAN,
65         MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN,
66         MLX5_EXPANSION_OUTER_ETH,
67         MLX5_EXPANSION_OUTER_ETH_VLAN,
68         MLX5_EXPANSION_OUTER_VLAN,
69         MLX5_EXPANSION_OUTER_IPV4,
70         MLX5_EXPANSION_OUTER_IPV4_UDP,
71         MLX5_EXPANSION_OUTER_IPV4_TCP,
72         MLX5_EXPANSION_OUTER_IPV6,
73         MLX5_EXPANSION_OUTER_IPV6_UDP,
74         MLX5_EXPANSION_OUTER_IPV6_TCP,
75         MLX5_EXPANSION_VXLAN,
76         MLX5_EXPANSION_VXLAN_GPE,
77         MLX5_EXPANSION_GRE,
78         MLX5_EXPANSION_MPLS,
79         MLX5_EXPANSION_ETH,
80         MLX5_EXPANSION_ETH_VLAN,
81         MLX5_EXPANSION_VLAN,
82         MLX5_EXPANSION_IPV4,
83         MLX5_EXPANSION_IPV4_UDP,
84         MLX5_EXPANSION_IPV4_TCP,
85         MLX5_EXPANSION_IPV6,
86         MLX5_EXPANSION_IPV6_UDP,
87         MLX5_EXPANSION_IPV6_TCP,
88 };
89
90 /** Supported expansion of items. */
91 static const struct rte_flow_expand_node mlx5_support_expansion[] = {
92         [MLX5_EXPANSION_ROOT] = {
93                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
94                                                  MLX5_EXPANSION_IPV4,
95                                                  MLX5_EXPANSION_IPV6),
96                 .type = RTE_FLOW_ITEM_TYPE_END,
97         },
98         [MLX5_EXPANSION_ROOT_OUTER] = {
99                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_ETH,
100                                                  MLX5_EXPANSION_OUTER_IPV4,
101                                                  MLX5_EXPANSION_OUTER_IPV6),
102                 .type = RTE_FLOW_ITEM_TYPE_END,
103         },
104         [MLX5_EXPANSION_ROOT_ETH_VLAN] = {
105                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH_VLAN),
106                 .type = RTE_FLOW_ITEM_TYPE_END,
107         },
108         [MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN] = {
109                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_ETH_VLAN),
110                 .type = RTE_FLOW_ITEM_TYPE_END,
111         },
112         [MLX5_EXPANSION_OUTER_ETH] = {
113                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_IPV4,
114                                                  MLX5_EXPANSION_OUTER_IPV6,
115                                                  MLX5_EXPANSION_MPLS),
116                 .type = RTE_FLOW_ITEM_TYPE_ETH,
117                 .rss_types = 0,
118         },
119         [MLX5_EXPANSION_OUTER_ETH_VLAN] = {
120                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_VLAN),
121                 .type = RTE_FLOW_ITEM_TYPE_ETH,
122                 .rss_types = 0,
123         },
124         [MLX5_EXPANSION_OUTER_VLAN] = {
125                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_IPV4,
126                                                  MLX5_EXPANSION_OUTER_IPV6),
127                 .type = RTE_FLOW_ITEM_TYPE_VLAN,
128         },
129         [MLX5_EXPANSION_OUTER_IPV4] = {
130                 .next = RTE_FLOW_EXPAND_RSS_NEXT
131                         (MLX5_EXPANSION_OUTER_IPV4_UDP,
132                          MLX5_EXPANSION_OUTER_IPV4_TCP,
133                          MLX5_EXPANSION_GRE,
134                          MLX5_EXPANSION_IPV4,
135                          MLX5_EXPANSION_IPV6),
136                 .type = RTE_FLOW_ITEM_TYPE_IPV4,
137                 .rss_types = ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
138                         ETH_RSS_NONFRAG_IPV4_OTHER,
139         },
140         [MLX5_EXPANSION_OUTER_IPV4_UDP] = {
141                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN,
142                                                  MLX5_EXPANSION_VXLAN_GPE),
143                 .type = RTE_FLOW_ITEM_TYPE_UDP,
144                 .rss_types = ETH_RSS_NONFRAG_IPV4_UDP,
145         },
146         [MLX5_EXPANSION_OUTER_IPV4_TCP] = {
147                 .type = RTE_FLOW_ITEM_TYPE_TCP,
148                 .rss_types = ETH_RSS_NONFRAG_IPV4_TCP,
149         },
150         [MLX5_EXPANSION_OUTER_IPV6] = {
151                 .next = RTE_FLOW_EXPAND_RSS_NEXT
152                         (MLX5_EXPANSION_OUTER_IPV6_UDP,
153                          MLX5_EXPANSION_OUTER_IPV6_TCP,
154                          MLX5_EXPANSION_IPV4,
155                          MLX5_EXPANSION_IPV6),
156                 .type = RTE_FLOW_ITEM_TYPE_IPV6,
157                 .rss_types = ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 |
158                         ETH_RSS_NONFRAG_IPV6_OTHER,
159         },
160         [MLX5_EXPANSION_OUTER_IPV6_UDP] = {
161                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN,
162                                                  MLX5_EXPANSION_VXLAN_GPE),
163                 .type = RTE_FLOW_ITEM_TYPE_UDP,
164                 .rss_types = ETH_RSS_NONFRAG_IPV6_UDP,
165         },
166         [MLX5_EXPANSION_OUTER_IPV6_TCP] = {
167                 .type = RTE_FLOW_ITEM_TYPE_TCP,
168                 .rss_types = ETH_RSS_NONFRAG_IPV6_TCP,
169         },
170         [MLX5_EXPANSION_VXLAN] = {
171                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
172                                                  MLX5_EXPANSION_IPV4,
173                                                  MLX5_EXPANSION_IPV6),
174                 .type = RTE_FLOW_ITEM_TYPE_VXLAN,
175         },
176         [MLX5_EXPANSION_VXLAN_GPE] = {
177                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
178                                                  MLX5_EXPANSION_IPV4,
179                                                  MLX5_EXPANSION_IPV6),
180                 .type = RTE_FLOW_ITEM_TYPE_VXLAN_GPE,
181         },
182         [MLX5_EXPANSION_GRE] = {
183                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4),
184                 .type = RTE_FLOW_ITEM_TYPE_GRE,
185         },
186         [MLX5_EXPANSION_MPLS] = {
187                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
188                                                  MLX5_EXPANSION_IPV6),
189                 .type = RTE_FLOW_ITEM_TYPE_MPLS,
190         },
191         [MLX5_EXPANSION_ETH] = {
192                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
193                                                  MLX5_EXPANSION_IPV6),
194                 .type = RTE_FLOW_ITEM_TYPE_ETH,
195         },
196         [MLX5_EXPANSION_ETH_VLAN] = {
197                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VLAN),
198                 .type = RTE_FLOW_ITEM_TYPE_ETH,
199         },
200         [MLX5_EXPANSION_VLAN] = {
201                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
202                                                  MLX5_EXPANSION_IPV6),
203                 .type = RTE_FLOW_ITEM_TYPE_VLAN,
204         },
205         [MLX5_EXPANSION_IPV4] = {
206                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4_UDP,
207                                                  MLX5_EXPANSION_IPV4_TCP),
208                 .type = RTE_FLOW_ITEM_TYPE_IPV4,
209                 .rss_types = ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
210                         ETH_RSS_NONFRAG_IPV4_OTHER,
211         },
212         [MLX5_EXPANSION_IPV4_UDP] = {
213                 .type = RTE_FLOW_ITEM_TYPE_UDP,
214                 .rss_types = ETH_RSS_NONFRAG_IPV4_UDP,
215         },
216         [MLX5_EXPANSION_IPV4_TCP] = {
217                 .type = RTE_FLOW_ITEM_TYPE_TCP,
218                 .rss_types = ETH_RSS_NONFRAG_IPV4_TCP,
219         },
220         [MLX5_EXPANSION_IPV6] = {
221                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV6_UDP,
222                                                  MLX5_EXPANSION_IPV6_TCP),
223                 .type = RTE_FLOW_ITEM_TYPE_IPV6,
224                 .rss_types = ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 |
225                         ETH_RSS_NONFRAG_IPV6_OTHER,
226         },
227         [MLX5_EXPANSION_IPV6_UDP] = {
228                 .type = RTE_FLOW_ITEM_TYPE_UDP,
229                 .rss_types = ETH_RSS_NONFRAG_IPV6_UDP,
230         },
231         [MLX5_EXPANSION_IPV6_TCP] = {
232                 .type = RTE_FLOW_ITEM_TYPE_TCP,
233                 .rss_types = ETH_RSS_NONFRAG_IPV6_TCP,
234         },
235 };
236
237 static const struct rte_flow_ops mlx5_flow_ops = {
238         .validate = mlx5_flow_validate,
239         .create = mlx5_flow_create,
240         .destroy = mlx5_flow_destroy,
241         .flush = mlx5_flow_flush,
242         .isolate = mlx5_flow_isolate,
243         .query = mlx5_flow_query,
244         .dev_dump = mlx5_flow_dev_dump,
245 };
246
247 /* Convert FDIR request to Generic flow. */
248 struct mlx5_fdir {
249         struct rte_flow_attr attr;
250         struct rte_flow_item items[4];
251         struct rte_flow_item_eth l2;
252         struct rte_flow_item_eth l2_mask;
253         union {
254                 struct rte_flow_item_ipv4 ipv4;
255                 struct rte_flow_item_ipv6 ipv6;
256         } l3;
257         union {
258                 struct rte_flow_item_ipv4 ipv4;
259                 struct rte_flow_item_ipv6 ipv6;
260         } l3_mask;
261         union {
262                 struct rte_flow_item_udp udp;
263                 struct rte_flow_item_tcp tcp;
264         } l4;
265         union {
266                 struct rte_flow_item_udp udp;
267                 struct rte_flow_item_tcp tcp;
268         } l4_mask;
269         struct rte_flow_action actions[2];
270         struct rte_flow_action_queue queue;
271 };
272
273 /* Map of Verbs to Flow priority with 8 Verbs priorities. */
274 static const uint32_t priority_map_3[][MLX5_PRIORITY_MAP_MAX] = {
275         { 0, 1, 2 }, { 2, 3, 4 }, { 5, 6, 7 },
276 };
277
278 /* Map of Verbs to Flow priority with 16 Verbs priorities. */
279 static const uint32_t priority_map_5[][MLX5_PRIORITY_MAP_MAX] = {
280         { 0, 1, 2 }, { 3, 4, 5 }, { 6, 7, 8 },
281         { 9, 10, 11 }, { 12, 13, 14 },
282 };
283
284 /* Tunnel information. */
285 struct mlx5_flow_tunnel_info {
286         uint64_t tunnel; /**< Tunnel bit (see MLX5_FLOW_*). */
287         uint32_t ptype; /**< Tunnel Ptype (see RTE_PTYPE_*). */
288 };
289
290 static struct mlx5_flow_tunnel_info tunnels_info[] = {
291         {
292                 .tunnel = MLX5_FLOW_LAYER_VXLAN,
293                 .ptype = RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L4_UDP,
294         },
295         {
296                 .tunnel = MLX5_FLOW_LAYER_GENEVE,
297                 .ptype = RTE_PTYPE_TUNNEL_GENEVE | RTE_PTYPE_L4_UDP,
298         },
299         {
300                 .tunnel = MLX5_FLOW_LAYER_VXLAN_GPE,
301                 .ptype = RTE_PTYPE_TUNNEL_VXLAN_GPE | RTE_PTYPE_L4_UDP,
302         },
303         {
304                 .tunnel = MLX5_FLOW_LAYER_GRE,
305                 .ptype = RTE_PTYPE_TUNNEL_GRE,
306         },
307         {
308                 .tunnel = MLX5_FLOW_LAYER_MPLS | MLX5_FLOW_LAYER_OUTER_L4_UDP,
309                 .ptype = RTE_PTYPE_TUNNEL_MPLS_IN_UDP | RTE_PTYPE_L4_UDP,
310         },
311         {
312                 .tunnel = MLX5_FLOW_LAYER_MPLS,
313                 .ptype = RTE_PTYPE_TUNNEL_MPLS_IN_GRE,
314         },
315         {
316                 .tunnel = MLX5_FLOW_LAYER_NVGRE,
317                 .ptype = RTE_PTYPE_TUNNEL_NVGRE,
318         },
319         {
320                 .tunnel = MLX5_FLOW_LAYER_IPIP,
321                 .ptype = RTE_PTYPE_TUNNEL_IP,
322         },
323         {
324                 .tunnel = MLX5_FLOW_LAYER_IPV6_ENCAP,
325                 .ptype = RTE_PTYPE_TUNNEL_IP,
326         },
327         {
328                 .tunnel = MLX5_FLOW_LAYER_GTP,
329                 .ptype = RTE_PTYPE_TUNNEL_GTPU,
330         },
331 };
332
333 /**
334  * Translate tag ID to register.
335  *
336  * @param[in] dev
337  *   Pointer to the Ethernet device structure.
338  * @param[in] feature
339  *   The feature that request the register.
340  * @param[in] id
341  *   The request register ID.
342  * @param[out] error
343  *   Error description in case of any.
344  *
345  * @return
346  *   The request register on success, a negative errno
347  *   value otherwise and rte_errno is set.
348  */
349 int
350 mlx5_flow_get_reg_id(struct rte_eth_dev *dev,
351                      enum mlx5_feature_name feature,
352                      uint32_t id,
353                      struct rte_flow_error *error)
354 {
355         struct mlx5_priv *priv = dev->data->dev_private;
356         struct mlx5_dev_config *config = &priv->config;
357         enum modify_reg start_reg;
358         bool skip_mtr_reg = false;
359
360         switch (feature) {
361         case MLX5_HAIRPIN_RX:
362                 return REG_B;
363         case MLX5_HAIRPIN_TX:
364                 return REG_A;
365         case MLX5_METADATA_RX:
366                 switch (config->dv_xmeta_en) {
367                 case MLX5_XMETA_MODE_LEGACY:
368                         return REG_B;
369                 case MLX5_XMETA_MODE_META16:
370                         return REG_C_0;
371                 case MLX5_XMETA_MODE_META32:
372                         return REG_C_1;
373                 }
374                 break;
375         case MLX5_METADATA_TX:
376                 return REG_A;
377         case MLX5_METADATA_FDB:
378                 switch (config->dv_xmeta_en) {
379                 case MLX5_XMETA_MODE_LEGACY:
380                         return REG_NONE;
381                 case MLX5_XMETA_MODE_META16:
382                         return REG_C_0;
383                 case MLX5_XMETA_MODE_META32:
384                         return REG_C_1;
385                 }
386                 break;
387         case MLX5_FLOW_MARK:
388                 switch (config->dv_xmeta_en) {
389                 case MLX5_XMETA_MODE_LEGACY:
390                         return REG_NONE;
391                 case MLX5_XMETA_MODE_META16:
392                         return REG_C_1;
393                 case MLX5_XMETA_MODE_META32:
394                         return REG_C_0;
395                 }
396                 break;
397         case MLX5_MTR_SFX:
398                 /*
399                  * If meter color and flow match share one register, flow match
400                  * should use the meter color register for match.
401                  */
402                 if (priv->mtr_reg_share)
403                         return priv->mtr_color_reg;
404                 else
405                         return priv->mtr_color_reg != REG_C_2 ? REG_C_2 :
406                                REG_C_3;
407         case MLX5_MTR_COLOR:
408                 MLX5_ASSERT(priv->mtr_color_reg != REG_NONE);
409                 return priv->mtr_color_reg;
410         case MLX5_COPY_MARK:
411                 /*
412                  * Metadata COPY_MARK register using is in meter suffix sub
413                  * flow while with meter. It's safe to share the same register.
414                  */
415                 return priv->mtr_color_reg != REG_C_2 ? REG_C_2 : REG_C_3;
416         case MLX5_APP_TAG:
417                 /*
418                  * If meter is enable, it will engage the register for color
419                  * match and flow match. If meter color match is not using the
420                  * REG_C_2, need to skip the REG_C_x be used by meter color
421                  * match.
422                  * If meter is disable, free to use all available registers.
423                  */
424                 start_reg = priv->mtr_color_reg != REG_C_2 ? REG_C_2 :
425                             (priv->mtr_reg_share ? REG_C_3 : REG_C_4);
426                 skip_mtr_reg = !!(priv->mtr_en && start_reg == REG_C_2);
427                 if (id > (REG_C_7 - start_reg))
428                         return rte_flow_error_set(error, EINVAL,
429                                                   RTE_FLOW_ERROR_TYPE_ITEM,
430                                                   NULL, "invalid tag id");
431                 if (config->flow_mreg_c[id + start_reg - REG_C_0] == REG_NONE)
432                         return rte_flow_error_set(error, ENOTSUP,
433                                                   RTE_FLOW_ERROR_TYPE_ITEM,
434                                                   NULL, "unsupported tag id");
435                 /*
436                  * This case means meter is using the REG_C_x great than 2.
437                  * Take care not to conflict with meter color REG_C_x.
438                  * If the available index REG_C_y >= REG_C_x, skip the
439                  * color register.
440                  */
441                 if (skip_mtr_reg && config->flow_mreg_c
442                     [id + start_reg - REG_C_0] >= priv->mtr_color_reg) {
443                         if (config->flow_mreg_c
444                             [id + 1 + start_reg - REG_C_0] != REG_NONE)
445                                 return config->flow_mreg_c
446                                                [id + 1 + start_reg - REG_C_0];
447                         return rte_flow_error_set(error, ENOTSUP,
448                                                   RTE_FLOW_ERROR_TYPE_ITEM,
449                                                   NULL, "unsupported tag id");
450                 }
451                 return config->flow_mreg_c[id + start_reg - REG_C_0];
452         }
453         MLX5_ASSERT(false);
454         return rte_flow_error_set(error, EINVAL,
455                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
456                                   NULL, "invalid feature name");
457 }
458
459 /**
460  * Check extensive flow metadata register support.
461  *
462  * @param dev
463  *   Pointer to rte_eth_dev structure.
464  *
465  * @return
466  *   True if device supports extensive flow metadata register, otherwise false.
467  */
468 bool
469 mlx5_flow_ext_mreg_supported(struct rte_eth_dev *dev)
470 {
471         struct mlx5_priv *priv = dev->data->dev_private;
472         struct mlx5_dev_config *config = &priv->config;
473
474         /*
475          * Having available reg_c can be regarded inclusively as supporting
476          * extensive flow metadata register, which could mean,
477          * - metadata register copy action by modify header.
478          * - 16 modify header actions is supported.
479          * - reg_c's are preserved across different domain (FDB and NIC) on
480          *   packet loopback by flow lookup miss.
481          */
482         return config->flow_mreg_c[2] != REG_NONE;
483 }
484
485 /**
486  * Discover the maximum number of priority available.
487  *
488  * @param[in] dev
489  *   Pointer to the Ethernet device structure.
490  *
491  * @return
492  *   number of supported flow priority on success, a negative errno
493  *   value otherwise and rte_errno is set.
494  */
495 int
496 mlx5_flow_discover_priorities(struct rte_eth_dev *dev)
497 {
498         struct mlx5_priv *priv = dev->data->dev_private;
499         struct {
500                 struct ibv_flow_attr attr;
501                 struct ibv_flow_spec_eth eth;
502                 struct ibv_flow_spec_action_drop drop;
503         } flow_attr = {
504                 .attr = {
505                         .num_of_specs = 2,
506                         .port = (uint8_t)priv->ibv_port,
507                 },
508                 .eth = {
509                         .type = IBV_FLOW_SPEC_ETH,
510                         .size = sizeof(struct ibv_flow_spec_eth),
511                 },
512                 .drop = {
513                         .size = sizeof(struct ibv_flow_spec_action_drop),
514                         .type = IBV_FLOW_SPEC_ACTION_DROP,
515                 },
516         };
517         struct ibv_flow *flow;
518         struct mlx5_hrxq *drop = mlx5_hrxq_drop_new(dev);
519         uint16_t vprio[] = { 8, 16 };
520         int i;
521         int priority = 0;
522
523         if (!drop) {
524                 rte_errno = ENOTSUP;
525                 return -rte_errno;
526         }
527         for (i = 0; i != RTE_DIM(vprio); i++) {
528                 flow_attr.attr.priority = vprio[i] - 1;
529                 flow = mlx5_glue->create_flow(drop->qp, &flow_attr.attr);
530                 if (!flow)
531                         break;
532                 claim_zero(mlx5_glue->destroy_flow(flow));
533                 priority = vprio[i];
534         }
535         mlx5_hrxq_drop_release(dev);
536         switch (priority) {
537         case 8:
538                 priority = RTE_DIM(priority_map_3);
539                 break;
540         case 16:
541                 priority = RTE_DIM(priority_map_5);
542                 break;
543         default:
544                 rte_errno = ENOTSUP;
545                 DRV_LOG(ERR,
546                         "port %u verbs maximum priority: %d expected 8/16",
547                         dev->data->port_id, priority);
548                 return -rte_errno;
549         }
550         DRV_LOG(INFO, "port %u flow maximum priority: %d",
551                 dev->data->port_id, priority);
552         return priority;
553 }
554
555 /**
556  * Adjust flow priority based on the highest layer and the request priority.
557  *
558  * @param[in] dev
559  *   Pointer to the Ethernet device structure.
560  * @param[in] priority
561  *   The rule base priority.
562  * @param[in] subpriority
563  *   The priority based on the items.
564  *
565  * @return
566  *   The new priority.
567  */
568 uint32_t mlx5_flow_adjust_priority(struct rte_eth_dev *dev, int32_t priority,
569                                    uint32_t subpriority)
570 {
571         uint32_t res = 0;
572         struct mlx5_priv *priv = dev->data->dev_private;
573
574         switch (priv->config.flow_prio) {
575         case RTE_DIM(priority_map_3):
576                 res = priority_map_3[priority][subpriority];
577                 break;
578         case RTE_DIM(priority_map_5):
579                 res = priority_map_5[priority][subpriority];
580                 break;
581         }
582         return  res;
583 }
584
585 /**
586  * Verify the @p item specifications (spec, last, mask) are compatible with the
587  * NIC capabilities.
588  *
589  * @param[in] item
590  *   Item specification.
591  * @param[in] mask
592  *   @p item->mask or flow default bit-masks.
593  * @param[in] nic_mask
594  *   Bit-masks covering supported fields by the NIC to compare with user mask.
595  * @param[in] size
596  *   Bit-masks size in bytes.
597  * @param[out] error
598  *   Pointer to error structure.
599  *
600  * @return
601  *   0 on success, a negative errno value otherwise and rte_errno is set.
602  */
603 int
604 mlx5_flow_item_acceptable(const struct rte_flow_item *item,
605                           const uint8_t *mask,
606                           const uint8_t *nic_mask,
607                           unsigned int size,
608                           struct rte_flow_error *error)
609 {
610         unsigned int i;
611
612         MLX5_ASSERT(nic_mask);
613         for (i = 0; i < size; ++i)
614                 if ((nic_mask[i] | mask[i]) != nic_mask[i])
615                         return rte_flow_error_set(error, ENOTSUP,
616                                                   RTE_FLOW_ERROR_TYPE_ITEM,
617                                                   item,
618                                                   "mask enables non supported"
619                                                   " bits");
620         if (!item->spec && (item->mask || item->last))
621                 return rte_flow_error_set(error, EINVAL,
622                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
623                                           "mask/last without a spec is not"
624                                           " supported");
625         if (item->spec && item->last) {
626                 uint8_t spec[size];
627                 uint8_t last[size];
628                 unsigned int i;
629                 int ret;
630
631                 for (i = 0; i < size; ++i) {
632                         spec[i] = ((const uint8_t *)item->spec)[i] & mask[i];
633                         last[i] = ((const uint8_t *)item->last)[i] & mask[i];
634                 }
635                 ret = memcmp(spec, last, size);
636                 if (ret != 0)
637                         return rte_flow_error_set(error, EINVAL,
638                                                   RTE_FLOW_ERROR_TYPE_ITEM,
639                                                   item,
640                                                   "range is not valid");
641         }
642         return 0;
643 }
644
645 /**
646  * Adjust the hash fields according to the @p flow information.
647  *
648  * @param[in] dev_flow.
649  *   Pointer to the mlx5_flow.
650  * @param[in] tunnel
651  *   1 when the hash field is for a tunnel item.
652  * @param[in] layer_types
653  *   ETH_RSS_* types.
654  * @param[in] hash_fields
655  *   Item hash fields.
656  *
657  * @return
658  *   The hash fields that should be used.
659  */
660 uint64_t
661 mlx5_flow_hashfields_adjust(struct mlx5_flow *dev_flow,
662                             int tunnel __rte_unused, uint64_t layer_types,
663                             uint64_t hash_fields)
664 {
665         struct rte_flow *flow = dev_flow->flow;
666 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
667         int rss_request_inner = flow->rss.level >= 2;
668
669         /* Check RSS hash level for tunnel. */
670         if (tunnel && rss_request_inner)
671                 hash_fields |= IBV_RX_HASH_INNER;
672         else if (tunnel || rss_request_inner)
673                 return 0;
674 #endif
675         /* Check if requested layer matches RSS hash fields. */
676         if (!(flow->rss.types & layer_types))
677                 return 0;
678         return hash_fields;
679 }
680
681 /**
682  * Lookup and set the ptype in the data Rx part.  A single Ptype can be used,
683  * if several tunnel rules are used on this queue, the tunnel ptype will be
684  * cleared.
685  *
686  * @param rxq_ctrl
687  *   Rx queue to update.
688  */
689 static void
690 flow_rxq_tunnel_ptype_update(struct mlx5_rxq_ctrl *rxq_ctrl)
691 {
692         unsigned int i;
693         uint32_t tunnel_ptype = 0;
694
695         /* Look up for the ptype to use. */
696         for (i = 0; i != MLX5_FLOW_TUNNEL; ++i) {
697                 if (!rxq_ctrl->flow_tunnels_n[i])
698                         continue;
699                 if (!tunnel_ptype) {
700                         tunnel_ptype = tunnels_info[i].ptype;
701                 } else {
702                         tunnel_ptype = 0;
703                         break;
704                 }
705         }
706         rxq_ctrl->rxq.tunnel = tunnel_ptype;
707 }
708
709 /**
710  * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) according to the devive
711  * flow.
712  *
713  * @param[in] dev
714  *   Pointer to the Ethernet device structure.
715  * @param[in] flow
716  *   Pointer to flow structure.
717  * @param[in] dev_handle
718  *   Pointer to device flow handle structure.
719  */
720 static void
721 flow_drv_rxq_flags_set(struct rte_eth_dev *dev, struct rte_flow *flow,
722                        struct mlx5_flow_handle *dev_handle)
723 {
724         struct mlx5_priv *priv = dev->data->dev_private;
725         const int mark = !!(dev_handle->act_flags &
726                             (MLX5_FLOW_ACTION_FLAG | MLX5_FLOW_ACTION_MARK));
727         const int tunnel = !!(dev_handle->layers & MLX5_FLOW_LAYER_TUNNEL);
728         unsigned int i;
729
730         for (i = 0; i != flow->rss.queue_num; ++i) {
731                 int idx = (*flow->rss.queue)[i];
732                 struct mlx5_rxq_ctrl *rxq_ctrl =
733                         container_of((*priv->rxqs)[idx],
734                                      struct mlx5_rxq_ctrl, rxq);
735
736                 /*
737                  * To support metadata register copy on Tx loopback,
738                  * this must be always enabled (metadata may arive
739                  * from other port - not from local flows only.
740                  */
741                 if (priv->config.dv_flow_en &&
742                     priv->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY &&
743                     mlx5_flow_ext_mreg_supported(dev)) {
744                         rxq_ctrl->rxq.mark = 1;
745                         rxq_ctrl->flow_mark_n = 1;
746                 } else if (mark) {
747                         rxq_ctrl->rxq.mark = 1;
748                         rxq_ctrl->flow_mark_n++;
749                 }
750                 if (tunnel) {
751                         unsigned int j;
752
753                         /* Increase the counter matching the flow. */
754                         for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) {
755                                 if ((tunnels_info[j].tunnel &
756                                      dev_handle->layers) ==
757                                     tunnels_info[j].tunnel) {
758                                         rxq_ctrl->flow_tunnels_n[j]++;
759                                         break;
760                                 }
761                         }
762                         flow_rxq_tunnel_ptype_update(rxq_ctrl);
763                 }
764         }
765 }
766
767 /**
768  * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) for a flow
769  *
770  * @param[in] dev
771  *   Pointer to the Ethernet device structure.
772  * @param[in] flow
773  *   Pointer to flow structure.
774  */
775 static void
776 flow_rxq_flags_set(struct rte_eth_dev *dev, struct rte_flow *flow)
777 {
778         struct mlx5_flow_handle *dev_handle;
779
780         LIST_FOREACH(dev_handle, &flow->dev_handles, next)
781                 flow_drv_rxq_flags_set(dev, flow, dev_handle);
782 }
783
784 /**
785  * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the
786  * device flow if no other flow uses it with the same kind of request.
787  *
788  * @param dev
789  *   Pointer to Ethernet device.
790  * @param[in] flow
791  *   Pointer to flow structure.
792  * @param[in] dev_handle
793  *   Pointer to the device flow handle structure.
794  */
795 static void
796 flow_drv_rxq_flags_trim(struct rte_eth_dev *dev, struct rte_flow *flow,
797                         struct mlx5_flow_handle *dev_handle)
798 {
799         struct mlx5_priv *priv = dev->data->dev_private;
800         const int mark = !!(dev_handle->act_flags &
801                             (MLX5_FLOW_ACTION_FLAG | MLX5_FLOW_ACTION_MARK));
802         const int tunnel = !!(dev_handle->layers & MLX5_FLOW_LAYER_TUNNEL);
803         unsigned int i;
804
805         MLX5_ASSERT(dev->data->dev_started);
806         for (i = 0; i != flow->rss.queue_num; ++i) {
807                 int idx = (*flow->rss.queue)[i];
808                 struct mlx5_rxq_ctrl *rxq_ctrl =
809                         container_of((*priv->rxqs)[idx],
810                                      struct mlx5_rxq_ctrl, rxq);
811
812                 if (priv->config.dv_flow_en &&
813                     priv->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY &&
814                     mlx5_flow_ext_mreg_supported(dev)) {
815                         rxq_ctrl->rxq.mark = 1;
816                         rxq_ctrl->flow_mark_n = 1;
817                 } else if (mark) {
818                         rxq_ctrl->flow_mark_n--;
819                         rxq_ctrl->rxq.mark = !!rxq_ctrl->flow_mark_n;
820                 }
821                 if (tunnel) {
822                         unsigned int j;
823
824                         /* Decrease the counter matching the flow. */
825                         for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) {
826                                 if ((tunnels_info[j].tunnel &
827                                      dev_handle->layers) ==
828                                     tunnels_info[j].tunnel) {
829                                         rxq_ctrl->flow_tunnels_n[j]--;
830                                         break;
831                                 }
832                         }
833                         flow_rxq_tunnel_ptype_update(rxq_ctrl);
834                 }
835         }
836 }
837
838 /**
839  * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the
840  * @p flow if no other flow uses it with the same kind of request.
841  *
842  * @param dev
843  *   Pointer to Ethernet device.
844  * @param[in] flow
845  *   Pointer to the flow.
846  */
847 static void
848 flow_rxq_flags_trim(struct rte_eth_dev *dev, struct rte_flow *flow)
849 {
850         struct mlx5_flow_handle *dev_handle;
851
852         LIST_FOREACH(dev_handle, &flow->dev_handles, next)
853                 flow_drv_rxq_flags_trim(dev, flow, dev_handle);
854 }
855
856 /**
857  * Clear the Mark/Flag and Tunnel ptype information in all Rx queues.
858  *
859  * @param dev
860  *   Pointer to Ethernet device.
861  */
862 static void
863 flow_rxq_flags_clear(struct rte_eth_dev *dev)
864 {
865         struct mlx5_priv *priv = dev->data->dev_private;
866         unsigned int i;
867
868         for (i = 0; i != priv->rxqs_n; ++i) {
869                 struct mlx5_rxq_ctrl *rxq_ctrl;
870                 unsigned int j;
871
872                 if (!(*priv->rxqs)[i])
873                         continue;
874                 rxq_ctrl = container_of((*priv->rxqs)[i],
875                                         struct mlx5_rxq_ctrl, rxq);
876                 rxq_ctrl->flow_mark_n = 0;
877                 rxq_ctrl->rxq.mark = 0;
878                 for (j = 0; j != MLX5_FLOW_TUNNEL; ++j)
879                         rxq_ctrl->flow_tunnels_n[j] = 0;
880                 rxq_ctrl->rxq.tunnel = 0;
881         }
882 }
883
884 /*
885  * return a pointer to the desired action in the list of actions.
886  *
887  * @param[in] actions
888  *   The list of actions to search the action in.
889  * @param[in] action
890  *   The action to find.
891  *
892  * @return
893  *   Pointer to the action in the list, if found. NULL otherwise.
894  */
895 const struct rte_flow_action *
896 mlx5_flow_find_action(const struct rte_flow_action *actions,
897                       enum rte_flow_action_type action)
898 {
899         if (actions == NULL)
900                 return NULL;
901         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++)
902                 if (actions->type == action)
903                         return actions;
904         return NULL;
905 }
906
907 /*
908  * Validate the flag action.
909  *
910  * @param[in] action_flags
911  *   Bit-fields that holds the actions detected until now.
912  * @param[in] attr
913  *   Attributes of flow that includes this action.
914  * @param[out] error
915  *   Pointer to error structure.
916  *
917  * @return
918  *   0 on success, a negative errno value otherwise and rte_errno is set.
919  */
920 int
921 mlx5_flow_validate_action_flag(uint64_t action_flags,
922                                const struct rte_flow_attr *attr,
923                                struct rte_flow_error *error)
924 {
925         if (action_flags & MLX5_FLOW_ACTION_MARK)
926                 return rte_flow_error_set(error, EINVAL,
927                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
928                                           "can't mark and flag in same flow");
929         if (action_flags & MLX5_FLOW_ACTION_FLAG)
930                 return rte_flow_error_set(error, EINVAL,
931                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
932                                           "can't have 2 flag"
933                                           " actions in same flow");
934         if (attr->egress)
935                 return rte_flow_error_set(error, ENOTSUP,
936                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
937                                           "flag action not supported for "
938                                           "egress");
939         return 0;
940 }
941
942 /*
943  * Validate the mark action.
944  *
945  * @param[in] action
946  *   Pointer to the queue action.
947  * @param[in] action_flags
948  *   Bit-fields that holds the actions detected until now.
949  * @param[in] attr
950  *   Attributes of flow that includes this action.
951  * @param[out] error
952  *   Pointer to error structure.
953  *
954  * @return
955  *   0 on success, a negative errno value otherwise and rte_errno is set.
956  */
957 int
958 mlx5_flow_validate_action_mark(const struct rte_flow_action *action,
959                                uint64_t action_flags,
960                                const struct rte_flow_attr *attr,
961                                struct rte_flow_error *error)
962 {
963         const struct rte_flow_action_mark *mark = action->conf;
964
965         if (!mark)
966                 return rte_flow_error_set(error, EINVAL,
967                                           RTE_FLOW_ERROR_TYPE_ACTION,
968                                           action,
969                                           "configuration cannot be null");
970         if (mark->id >= MLX5_FLOW_MARK_MAX)
971                 return rte_flow_error_set(error, EINVAL,
972                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
973                                           &mark->id,
974                                           "mark id must in 0 <= id < "
975                                           RTE_STR(MLX5_FLOW_MARK_MAX));
976         if (action_flags & MLX5_FLOW_ACTION_FLAG)
977                 return rte_flow_error_set(error, EINVAL,
978                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
979                                           "can't flag and mark in same flow");
980         if (action_flags & MLX5_FLOW_ACTION_MARK)
981                 return rte_flow_error_set(error, EINVAL,
982                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
983                                           "can't have 2 mark actions in same"
984                                           " flow");
985         if (attr->egress)
986                 return rte_flow_error_set(error, ENOTSUP,
987                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
988                                           "mark action not supported for "
989                                           "egress");
990         return 0;
991 }
992
993 /*
994  * Validate the drop action.
995  *
996  * @param[in] action_flags
997  *   Bit-fields that holds the actions detected until now.
998  * @param[in] attr
999  *   Attributes of flow that includes this action.
1000  * @param[out] error
1001  *   Pointer to error structure.
1002  *
1003  * @return
1004  *   0 on success, a negative errno value otherwise and rte_errno is set.
1005  */
1006 int
1007 mlx5_flow_validate_action_drop(uint64_t action_flags __rte_unused,
1008                                const struct rte_flow_attr *attr,
1009                                struct rte_flow_error *error)
1010 {
1011         if (attr->egress)
1012                 return rte_flow_error_set(error, ENOTSUP,
1013                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1014                                           "drop action not supported for "
1015                                           "egress");
1016         return 0;
1017 }
1018
1019 /*
1020  * Validate the queue action.
1021  *
1022  * @param[in] action
1023  *   Pointer to the queue action.
1024  * @param[in] action_flags
1025  *   Bit-fields that holds the actions detected until now.
1026  * @param[in] dev
1027  *   Pointer to the Ethernet device structure.
1028  * @param[in] attr
1029  *   Attributes of flow that includes this action.
1030  * @param[out] error
1031  *   Pointer to error structure.
1032  *
1033  * @return
1034  *   0 on success, a negative errno value otherwise and rte_errno is set.
1035  */
1036 int
1037 mlx5_flow_validate_action_queue(const struct rte_flow_action *action,
1038                                 uint64_t action_flags,
1039                                 struct rte_eth_dev *dev,
1040                                 const struct rte_flow_attr *attr,
1041                                 struct rte_flow_error *error)
1042 {
1043         struct mlx5_priv *priv = dev->data->dev_private;
1044         const struct rte_flow_action_queue *queue = action->conf;
1045
1046         if (action_flags & MLX5_FLOW_FATE_ACTIONS)
1047                 return rte_flow_error_set(error, EINVAL,
1048                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1049                                           "can't have 2 fate actions in"
1050                                           " same flow");
1051         if (!priv->rxqs_n)
1052                 return rte_flow_error_set(error, EINVAL,
1053                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1054                                           NULL, "No Rx queues configured");
1055         if (queue->index >= priv->rxqs_n)
1056                 return rte_flow_error_set(error, EINVAL,
1057                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1058                                           &queue->index,
1059                                           "queue index out of range");
1060         if (!(*priv->rxqs)[queue->index])
1061                 return rte_flow_error_set(error, EINVAL,
1062                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1063                                           &queue->index,
1064                                           "queue is not configured");
1065         if (attr->egress)
1066                 return rte_flow_error_set(error, ENOTSUP,
1067                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1068                                           "queue action not supported for "
1069                                           "egress");
1070         return 0;
1071 }
1072
1073 /*
1074  * Validate the rss action.
1075  *
1076  * @param[in] action
1077  *   Pointer to the queue action.
1078  * @param[in] action_flags
1079  *   Bit-fields that holds the actions detected until now.
1080  * @param[in] dev
1081  *   Pointer to the Ethernet device structure.
1082  * @param[in] attr
1083  *   Attributes of flow that includes this action.
1084  * @param[in] item_flags
1085  *   Items that were detected.
1086  * @param[out] error
1087  *   Pointer to error structure.
1088  *
1089  * @return
1090  *   0 on success, a negative errno value otherwise and rte_errno is set.
1091  */
1092 int
1093 mlx5_flow_validate_action_rss(const struct rte_flow_action *action,
1094                               uint64_t action_flags,
1095                               struct rte_eth_dev *dev,
1096                               const struct rte_flow_attr *attr,
1097                               uint64_t item_flags,
1098                               struct rte_flow_error *error)
1099 {
1100         struct mlx5_priv *priv = dev->data->dev_private;
1101         const struct rte_flow_action_rss *rss = action->conf;
1102         int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1103         unsigned int i;
1104
1105         if (action_flags & MLX5_FLOW_FATE_ACTIONS)
1106                 return rte_flow_error_set(error, EINVAL,
1107                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1108                                           "can't have 2 fate actions"
1109                                           " in same flow");
1110         if (rss->func != RTE_ETH_HASH_FUNCTION_DEFAULT &&
1111             rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ)
1112                 return rte_flow_error_set(error, ENOTSUP,
1113                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1114                                           &rss->func,
1115                                           "RSS hash function not supported");
1116 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
1117         if (rss->level > 2)
1118 #else
1119         if (rss->level > 1)
1120 #endif
1121                 return rte_flow_error_set(error, ENOTSUP,
1122                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1123                                           &rss->level,
1124                                           "tunnel RSS is not supported");
1125         /* allow RSS key_len 0 in case of NULL (default) RSS key. */
1126         if (rss->key_len == 0 && rss->key != NULL)
1127                 return rte_flow_error_set(error, ENOTSUP,
1128                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1129                                           &rss->key_len,
1130                                           "RSS hash key length 0");
1131         if (rss->key_len > 0 && rss->key_len < MLX5_RSS_HASH_KEY_LEN)
1132                 return rte_flow_error_set(error, ENOTSUP,
1133                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1134                                           &rss->key_len,
1135                                           "RSS hash key too small");
1136         if (rss->key_len > MLX5_RSS_HASH_KEY_LEN)
1137                 return rte_flow_error_set(error, ENOTSUP,
1138                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1139                                           &rss->key_len,
1140                                           "RSS hash key too large");
1141         if (rss->queue_num > priv->config.ind_table_max_size)
1142                 return rte_flow_error_set(error, ENOTSUP,
1143                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1144                                           &rss->queue_num,
1145                                           "number of queues too large");
1146         if (rss->types & MLX5_RSS_HF_MASK)
1147                 return rte_flow_error_set(error, ENOTSUP,
1148                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1149                                           &rss->types,
1150                                           "some RSS protocols are not"
1151                                           " supported");
1152         if ((rss->types & (ETH_RSS_L3_SRC_ONLY | ETH_RSS_L3_DST_ONLY)) &&
1153             !(rss->types & ETH_RSS_IP))
1154                 return rte_flow_error_set(error, EINVAL,
1155                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
1156                                           "L3 partial RSS requested but L3 RSS"
1157                                           " type not specified");
1158         if ((rss->types & (ETH_RSS_L4_SRC_ONLY | ETH_RSS_L4_DST_ONLY)) &&
1159             !(rss->types & (ETH_RSS_UDP | ETH_RSS_TCP)))
1160                 return rte_flow_error_set(error, EINVAL,
1161                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
1162                                           "L4 partial RSS requested but L4 RSS"
1163                                           " type not specified");
1164         if (!priv->rxqs_n)
1165                 return rte_flow_error_set(error, EINVAL,
1166                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1167                                           NULL, "No Rx queues configured");
1168         if (!rss->queue_num)
1169                 return rte_flow_error_set(error, EINVAL,
1170                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1171                                           NULL, "No queues configured");
1172         for (i = 0; i != rss->queue_num; ++i) {
1173                 if (rss->queue[i] >= priv->rxqs_n)
1174                         return rte_flow_error_set
1175                                 (error, EINVAL,
1176                                  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1177                                  &rss->queue[i], "queue index out of range");
1178                 if (!(*priv->rxqs)[rss->queue[i]])
1179                         return rte_flow_error_set
1180                                 (error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1181                                  &rss->queue[i], "queue is not configured");
1182         }
1183         if (attr->egress)
1184                 return rte_flow_error_set(error, ENOTSUP,
1185                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1186                                           "rss action not supported for "
1187                                           "egress");
1188         if (rss->level > 1 &&  !tunnel)
1189                 return rte_flow_error_set(error, EINVAL,
1190                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
1191                                           "inner RSS is not supported for "
1192                                           "non-tunnel flows");
1193         return 0;
1194 }
1195
1196 /*
1197  * Validate the count action.
1198  *
1199  * @param[in] dev
1200  *   Pointer to the Ethernet device structure.
1201  * @param[in] attr
1202  *   Attributes of flow that includes this action.
1203  * @param[out] error
1204  *   Pointer to error structure.
1205  *
1206  * @return
1207  *   0 on success, a negative errno value otherwise and rte_errno is set.
1208  */
1209 int
1210 mlx5_flow_validate_action_count(struct rte_eth_dev *dev __rte_unused,
1211                                 const struct rte_flow_attr *attr,
1212                                 struct rte_flow_error *error)
1213 {
1214         if (attr->egress)
1215                 return rte_flow_error_set(error, ENOTSUP,
1216                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1217                                           "count action not supported for "
1218                                           "egress");
1219         return 0;
1220 }
1221
1222 /**
1223  * Verify the @p attributes will be correctly understood by the NIC and store
1224  * them in the @p flow if everything is correct.
1225  *
1226  * @param[in] dev
1227  *   Pointer to the Ethernet device structure.
1228  * @param[in] attributes
1229  *   Pointer to flow attributes
1230  * @param[out] error
1231  *   Pointer to error structure.
1232  *
1233  * @return
1234  *   0 on success, a negative errno value otherwise and rte_errno is set.
1235  */
1236 int
1237 mlx5_flow_validate_attributes(struct rte_eth_dev *dev,
1238                               const struct rte_flow_attr *attributes,
1239                               struct rte_flow_error *error)
1240 {
1241         struct mlx5_priv *priv = dev->data->dev_private;
1242         uint32_t priority_max = priv->config.flow_prio - 1;
1243
1244         if (attributes->group)
1245                 return rte_flow_error_set(error, ENOTSUP,
1246                                           RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
1247                                           NULL, "groups is not supported");
1248         if (attributes->priority != MLX5_FLOW_PRIO_RSVD &&
1249             attributes->priority >= priority_max)
1250                 return rte_flow_error_set(error, ENOTSUP,
1251                                           RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
1252                                           NULL, "priority out of range");
1253         if (attributes->egress)
1254                 return rte_flow_error_set(error, ENOTSUP,
1255                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1256                                           "egress is not supported");
1257         if (attributes->transfer && !priv->config.dv_esw_en)
1258                 return rte_flow_error_set(error, ENOTSUP,
1259                                           RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
1260                                           NULL, "transfer is not supported");
1261         if (!attributes->ingress)
1262                 return rte_flow_error_set(error, EINVAL,
1263                                           RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
1264                                           NULL,
1265                                           "ingress attribute is mandatory");
1266         return 0;
1267 }
1268
1269 /**
1270  * Validate ICMP6 item.
1271  *
1272  * @param[in] item
1273  *   Item specification.
1274  * @param[in] item_flags
1275  *   Bit-fields that holds the items detected until now.
1276  * @param[out] error
1277  *   Pointer to error structure.
1278  *
1279  * @return
1280  *   0 on success, a negative errno value otherwise and rte_errno is set.
1281  */
1282 int
1283 mlx5_flow_validate_item_icmp6(const struct rte_flow_item *item,
1284                                uint64_t item_flags,
1285                                uint8_t target_protocol,
1286                                struct rte_flow_error *error)
1287 {
1288         const struct rte_flow_item_icmp6 *mask = item->mask;
1289         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1290         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
1291                                       MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1292         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
1293                                       MLX5_FLOW_LAYER_OUTER_L4;
1294         int ret;
1295
1296         if (target_protocol != 0xFF && target_protocol != IPPROTO_ICMPV6)
1297                 return rte_flow_error_set(error, EINVAL,
1298                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1299                                           "protocol filtering not compatible"
1300                                           " with ICMP6 layer");
1301         if (!(item_flags & l3m))
1302                 return rte_flow_error_set(error, EINVAL,
1303                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1304                                           "IPv6 is mandatory to filter on"
1305                                           " ICMP6");
1306         if (item_flags & l4m)
1307                 return rte_flow_error_set(error, EINVAL,
1308                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1309                                           "multiple L4 layers not supported");
1310         if (!mask)
1311                 mask = &rte_flow_item_icmp6_mask;
1312         ret = mlx5_flow_item_acceptable
1313                 (item, (const uint8_t *)mask,
1314                  (const uint8_t *)&rte_flow_item_icmp6_mask,
1315                  sizeof(struct rte_flow_item_icmp6), error);
1316         if (ret < 0)
1317                 return ret;
1318         return 0;
1319 }
1320
1321 /**
1322  * Validate ICMP item.
1323  *
1324  * @param[in] item
1325  *   Item specification.
1326  * @param[in] item_flags
1327  *   Bit-fields that holds the items detected until now.
1328  * @param[out] error
1329  *   Pointer to error structure.
1330  *
1331  * @return
1332  *   0 on success, a negative errno value otherwise and rte_errno is set.
1333  */
1334 int
1335 mlx5_flow_validate_item_icmp(const struct rte_flow_item *item,
1336                              uint64_t item_flags,
1337                              uint8_t target_protocol,
1338                              struct rte_flow_error *error)
1339 {
1340         const struct rte_flow_item_icmp *mask = item->mask;
1341         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1342         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
1343                                       MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1344         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
1345                                       MLX5_FLOW_LAYER_OUTER_L4;
1346         int ret;
1347
1348         if (target_protocol != 0xFF && target_protocol != IPPROTO_ICMP)
1349                 return rte_flow_error_set(error, EINVAL,
1350                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1351                                           "protocol filtering not compatible"
1352                                           " with ICMP layer");
1353         if (!(item_flags & l3m))
1354                 return rte_flow_error_set(error, EINVAL,
1355                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1356                                           "IPv4 is mandatory to filter"
1357                                           " on ICMP");
1358         if (item_flags & l4m)
1359                 return rte_flow_error_set(error, EINVAL,
1360                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1361                                           "multiple L4 layers not supported");
1362         if (!mask)
1363                 mask = &rte_flow_item_icmp_mask;
1364         ret = mlx5_flow_item_acceptable
1365                 (item, (const uint8_t *)mask,
1366                  (const uint8_t *)&rte_flow_item_icmp_mask,
1367                  sizeof(struct rte_flow_item_icmp), error);
1368         if (ret < 0)
1369                 return ret;
1370         return 0;
1371 }
1372
1373 /**
1374  * Validate Ethernet item.
1375  *
1376  * @param[in] item
1377  *   Item specification.
1378  * @param[in] item_flags
1379  *   Bit-fields that holds the items detected until now.
1380  * @param[out] error
1381  *   Pointer to error structure.
1382  *
1383  * @return
1384  *   0 on success, a negative errno value otherwise and rte_errno is set.
1385  */
1386 int
1387 mlx5_flow_validate_item_eth(const struct rte_flow_item *item,
1388                             uint64_t item_flags,
1389                             struct rte_flow_error *error)
1390 {
1391         const struct rte_flow_item_eth *mask = item->mask;
1392         const struct rte_flow_item_eth nic_mask = {
1393                 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1394                 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1395                 .type = RTE_BE16(0xffff),
1396         };
1397         int ret;
1398         int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1399         const uint64_t ethm = tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
1400                                        MLX5_FLOW_LAYER_OUTER_L2;
1401
1402         if (item_flags & ethm)
1403                 return rte_flow_error_set(error, ENOTSUP,
1404                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1405                                           "multiple L2 layers not supported");
1406         if ((!tunnel && (item_flags & MLX5_FLOW_LAYER_OUTER_L3)) ||
1407             (tunnel && (item_flags & MLX5_FLOW_LAYER_INNER_L3)))
1408                 return rte_flow_error_set(error, EINVAL,
1409                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1410                                           "L2 layer should not follow "
1411                                           "L3 layers");
1412         if ((!tunnel && (item_flags & MLX5_FLOW_LAYER_OUTER_VLAN)) ||
1413             (tunnel && (item_flags & MLX5_FLOW_LAYER_INNER_VLAN)))
1414                 return rte_flow_error_set(error, EINVAL,
1415                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1416                                           "L2 layer should not follow VLAN");
1417         if (!mask)
1418                 mask = &rte_flow_item_eth_mask;
1419         ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
1420                                         (const uint8_t *)&nic_mask,
1421                                         sizeof(struct rte_flow_item_eth),
1422                                         error);
1423         return ret;
1424 }
1425
1426 /**
1427  * Validate VLAN item.
1428  *
1429  * @param[in] item
1430  *   Item specification.
1431  * @param[in] item_flags
1432  *   Bit-fields that holds the items detected until now.
1433  * @param[in] dev
1434  *   Ethernet device flow is being created on.
1435  * @param[out] error
1436  *   Pointer to error structure.
1437  *
1438  * @return
1439  *   0 on success, a negative errno value otherwise and rte_errno is set.
1440  */
1441 int
1442 mlx5_flow_validate_item_vlan(const struct rte_flow_item *item,
1443                              uint64_t item_flags,
1444                              struct rte_eth_dev *dev,
1445                              struct rte_flow_error *error)
1446 {
1447         const struct rte_flow_item_vlan *spec = item->spec;
1448         const struct rte_flow_item_vlan *mask = item->mask;
1449         const struct rte_flow_item_vlan nic_mask = {
1450                 .tci = RTE_BE16(UINT16_MAX),
1451                 .inner_type = RTE_BE16(UINT16_MAX),
1452         };
1453         uint16_t vlan_tag = 0;
1454         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1455         int ret;
1456         const uint64_t l34m = tunnel ? (MLX5_FLOW_LAYER_INNER_L3 |
1457                                         MLX5_FLOW_LAYER_INNER_L4) :
1458                                        (MLX5_FLOW_LAYER_OUTER_L3 |
1459                                         MLX5_FLOW_LAYER_OUTER_L4);
1460         const uint64_t vlanm = tunnel ? MLX5_FLOW_LAYER_INNER_VLAN :
1461                                         MLX5_FLOW_LAYER_OUTER_VLAN;
1462
1463         if (item_flags & vlanm)
1464                 return rte_flow_error_set(error, EINVAL,
1465                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1466                                           "multiple VLAN layers not supported");
1467         else if ((item_flags & l34m) != 0)
1468                 return rte_flow_error_set(error, EINVAL,
1469                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1470                                           "VLAN cannot follow L3/L4 layer");
1471         if (!mask)
1472                 mask = &rte_flow_item_vlan_mask;
1473         ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
1474                                         (const uint8_t *)&nic_mask,
1475                                         sizeof(struct rte_flow_item_vlan),
1476                                         error);
1477         if (ret)
1478                 return ret;
1479         if (!tunnel && mask->tci != RTE_BE16(0x0fff)) {
1480                 struct mlx5_priv *priv = dev->data->dev_private;
1481
1482                 if (priv->vmwa_context) {
1483                         /*
1484                          * Non-NULL context means we have a virtual machine
1485                          * and SR-IOV enabled, we have to create VLAN interface
1486                          * to make hypervisor to setup E-Switch vport
1487                          * context correctly. We avoid creating the multiple
1488                          * VLAN interfaces, so we cannot support VLAN tag mask.
1489                          */
1490                         return rte_flow_error_set(error, EINVAL,
1491                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1492                                                   item,
1493                                                   "VLAN tag mask is not"
1494                                                   " supported in virtual"
1495                                                   " environment");
1496                 }
1497         }
1498         if (spec) {
1499                 vlan_tag = spec->tci;
1500                 vlan_tag &= mask->tci;
1501         }
1502         /*
1503          * From verbs perspective an empty VLAN is equivalent
1504          * to a packet without VLAN layer.
1505          */
1506         if (!vlan_tag)
1507                 return rte_flow_error_set(error, EINVAL,
1508                                           RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
1509                                           item->spec,
1510                                           "VLAN cannot be empty");
1511         return 0;
1512 }
1513
1514 /**
1515  * Validate IPV4 item.
1516  *
1517  * @param[in] item
1518  *   Item specification.
1519  * @param[in] item_flags
1520  *   Bit-fields that holds the items detected until now.
1521  * @param[in] acc_mask
1522  *   Acceptable mask, if NULL default internal default mask
1523  *   will be used to check whether item fields are supported.
1524  * @param[out] error
1525  *   Pointer to error structure.
1526  *
1527  * @return
1528  *   0 on success, a negative errno value otherwise and rte_errno is set.
1529  */
1530 int
1531 mlx5_flow_validate_item_ipv4(const struct rte_flow_item *item,
1532                              uint64_t item_flags,
1533                              uint64_t last_item,
1534                              uint16_t ether_type,
1535                              const struct rte_flow_item_ipv4 *acc_mask,
1536                              struct rte_flow_error *error)
1537 {
1538         const struct rte_flow_item_ipv4 *mask = item->mask;
1539         const struct rte_flow_item_ipv4 *spec = item->spec;
1540         const struct rte_flow_item_ipv4 nic_mask = {
1541                 .hdr = {
1542                         .src_addr = RTE_BE32(0xffffffff),
1543                         .dst_addr = RTE_BE32(0xffffffff),
1544                         .type_of_service = 0xff,
1545                         .next_proto_id = 0xff,
1546                 },
1547         };
1548         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1549         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
1550                                       MLX5_FLOW_LAYER_OUTER_L3;
1551         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
1552                                       MLX5_FLOW_LAYER_OUTER_L4;
1553         int ret;
1554         uint8_t next_proto = 0xFF;
1555         const uint64_t l2_vlan = (MLX5_FLOW_LAYER_L2 |
1556                                   MLX5_FLOW_LAYER_OUTER_VLAN |
1557                                   MLX5_FLOW_LAYER_INNER_VLAN);
1558
1559         if ((last_item & l2_vlan) && ether_type &&
1560             ether_type != RTE_ETHER_TYPE_IPV4)
1561                 return rte_flow_error_set(error, EINVAL,
1562                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1563                                           "IPv4 cannot follow L2/VLAN layer "
1564                                           "which ether type is not IPv4");
1565         if (item_flags & MLX5_FLOW_LAYER_IPIP) {
1566                 if (mask && spec)
1567                         next_proto = mask->hdr.next_proto_id &
1568                                      spec->hdr.next_proto_id;
1569                 if (next_proto == IPPROTO_IPIP || next_proto == IPPROTO_IPV6)
1570                         return rte_flow_error_set(error, EINVAL,
1571                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1572                                                   item,
1573                                                   "multiple tunnel "
1574                                                   "not supported");
1575         }
1576         if (item_flags & MLX5_FLOW_LAYER_IPV6_ENCAP)
1577                 return rte_flow_error_set(error, EINVAL,
1578                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1579                                           "wrong tunnel type - IPv6 specified "
1580                                           "but IPv4 item provided");
1581         if (item_flags & l3m)
1582                 return rte_flow_error_set(error, ENOTSUP,
1583                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1584                                           "multiple L3 layers not supported");
1585         else if (item_flags & l4m)
1586                 return rte_flow_error_set(error, EINVAL,
1587                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1588                                           "L3 cannot follow an L4 layer.");
1589         else if ((item_flags & MLX5_FLOW_LAYER_NVGRE) &&
1590                   !(item_flags & MLX5_FLOW_LAYER_INNER_L2))
1591                 return rte_flow_error_set(error, EINVAL,
1592                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1593                                           "L3 cannot follow an NVGRE layer.");
1594         if (!mask)
1595                 mask = &rte_flow_item_ipv4_mask;
1596         else if (mask->hdr.next_proto_id != 0 &&
1597                  mask->hdr.next_proto_id != 0xff)
1598                 return rte_flow_error_set(error, EINVAL,
1599                                           RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
1600                                           "partial mask is not supported"
1601                                           " for protocol");
1602         ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
1603                                         acc_mask ? (const uint8_t *)acc_mask
1604                                                  : (const uint8_t *)&nic_mask,
1605                                         sizeof(struct rte_flow_item_ipv4),
1606                                         error);
1607         if (ret < 0)
1608                 return ret;
1609         return 0;
1610 }
1611
1612 /**
1613  * Validate IPV6 item.
1614  *
1615  * @param[in] item
1616  *   Item specification.
1617  * @param[in] item_flags
1618  *   Bit-fields that holds the items detected until now.
1619  * @param[in] acc_mask
1620  *   Acceptable mask, if NULL default internal default mask
1621  *   will be used to check whether item fields are supported.
1622  * @param[out] error
1623  *   Pointer to error structure.
1624  *
1625  * @return
1626  *   0 on success, a negative errno value otherwise and rte_errno is set.
1627  */
1628 int
1629 mlx5_flow_validate_item_ipv6(const struct rte_flow_item *item,
1630                              uint64_t item_flags,
1631                              uint64_t last_item,
1632                              uint16_t ether_type,
1633                              const struct rte_flow_item_ipv6 *acc_mask,
1634                              struct rte_flow_error *error)
1635 {
1636         const struct rte_flow_item_ipv6 *mask = item->mask;
1637         const struct rte_flow_item_ipv6 *spec = item->spec;
1638         const struct rte_flow_item_ipv6 nic_mask = {
1639                 .hdr = {
1640                         .src_addr =
1641                                 "\xff\xff\xff\xff\xff\xff\xff\xff"
1642                                 "\xff\xff\xff\xff\xff\xff\xff\xff",
1643                         .dst_addr =
1644                                 "\xff\xff\xff\xff\xff\xff\xff\xff"
1645                                 "\xff\xff\xff\xff\xff\xff\xff\xff",
1646                         .vtc_flow = RTE_BE32(0xffffffff),
1647                         .proto = 0xff,
1648                 },
1649         };
1650         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1651         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
1652                                       MLX5_FLOW_LAYER_OUTER_L3;
1653         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
1654                                       MLX5_FLOW_LAYER_OUTER_L4;
1655         int ret;
1656         uint8_t next_proto = 0xFF;
1657         const uint64_t l2_vlan = (MLX5_FLOW_LAYER_L2 |
1658                                   MLX5_FLOW_LAYER_OUTER_VLAN |
1659                                   MLX5_FLOW_LAYER_INNER_VLAN);
1660
1661         if ((last_item & l2_vlan) && ether_type &&
1662             ether_type != RTE_ETHER_TYPE_IPV6)
1663                 return rte_flow_error_set(error, EINVAL,
1664                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1665                                           "IPv6 cannot follow L2/VLAN layer "
1666                                           "which ether type is not IPv6");
1667         if (item_flags & MLX5_FLOW_LAYER_IPV6_ENCAP) {
1668                 if (mask && spec)
1669                         next_proto = mask->hdr.proto & spec->hdr.proto;
1670                 if (next_proto == IPPROTO_IPIP || next_proto == IPPROTO_IPV6)
1671                         return rte_flow_error_set(error, EINVAL,
1672                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1673                                                   item,
1674                                                   "multiple tunnel "
1675                                                   "not supported");
1676         }
1677         if (item_flags & MLX5_FLOW_LAYER_IPIP)
1678                 return rte_flow_error_set(error, EINVAL,
1679                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1680                                           "wrong tunnel type - IPv4 specified "
1681                                           "but IPv6 item provided");
1682         if (item_flags & l3m)
1683                 return rte_flow_error_set(error, ENOTSUP,
1684                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1685                                           "multiple L3 layers not supported");
1686         else if (item_flags & l4m)
1687                 return rte_flow_error_set(error, EINVAL,
1688                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1689                                           "L3 cannot follow an L4 layer.");
1690         else if ((item_flags & MLX5_FLOW_LAYER_NVGRE) &&
1691                   !(item_flags & MLX5_FLOW_LAYER_INNER_L2))
1692                 return rte_flow_error_set(error, EINVAL,
1693                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1694                                           "L3 cannot follow an NVGRE layer.");
1695         if (!mask)
1696                 mask = &rte_flow_item_ipv6_mask;
1697         ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
1698                                         acc_mask ? (const uint8_t *)acc_mask
1699                                                  : (const uint8_t *)&nic_mask,
1700                                         sizeof(struct rte_flow_item_ipv6),
1701                                         error);
1702         if (ret < 0)
1703                 return ret;
1704         return 0;
1705 }
1706
1707 /**
1708  * Validate UDP item.
1709  *
1710  * @param[in] item
1711  *   Item specification.
1712  * @param[in] item_flags
1713  *   Bit-fields that holds the items detected until now.
1714  * @param[in] target_protocol
1715  *   The next protocol in the previous item.
1716  * @param[in] flow_mask
1717  *   mlx5 flow-specific (DV, verbs, etc.) supported header fields mask.
1718  * @param[out] error
1719  *   Pointer to error structure.
1720  *
1721  * @return
1722  *   0 on success, a negative errno value otherwise and rte_errno is set.
1723  */
1724 int
1725 mlx5_flow_validate_item_udp(const struct rte_flow_item *item,
1726                             uint64_t item_flags,
1727                             uint8_t target_protocol,
1728                             struct rte_flow_error *error)
1729 {
1730         const struct rte_flow_item_udp *mask = item->mask;
1731         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1732         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
1733                                       MLX5_FLOW_LAYER_OUTER_L3;
1734         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
1735                                       MLX5_FLOW_LAYER_OUTER_L4;
1736         int ret;
1737
1738         if (target_protocol != 0xff && target_protocol != IPPROTO_UDP)
1739                 return rte_flow_error_set(error, EINVAL,
1740                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1741                                           "protocol filtering not compatible"
1742                                           " with UDP layer");
1743         if (!(item_flags & l3m))
1744                 return rte_flow_error_set(error, EINVAL,
1745                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1746                                           "L3 is mandatory to filter on L4");
1747         if (item_flags & l4m)
1748                 return rte_flow_error_set(error, EINVAL,
1749                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1750                                           "multiple L4 layers not supported");
1751         if (!mask)
1752                 mask = &rte_flow_item_udp_mask;
1753         ret = mlx5_flow_item_acceptable
1754                 (item, (const uint8_t *)mask,
1755                  (const uint8_t *)&rte_flow_item_udp_mask,
1756                  sizeof(struct rte_flow_item_udp), error);
1757         if (ret < 0)
1758                 return ret;
1759         return 0;
1760 }
1761
1762 /**
1763  * Validate TCP item.
1764  *
1765  * @param[in] item
1766  *   Item specification.
1767  * @param[in] item_flags
1768  *   Bit-fields that holds the items detected until now.
1769  * @param[in] target_protocol
1770  *   The next protocol in the previous item.
1771  * @param[out] error
1772  *   Pointer to error structure.
1773  *
1774  * @return
1775  *   0 on success, a negative errno value otherwise and rte_errno is set.
1776  */
1777 int
1778 mlx5_flow_validate_item_tcp(const struct rte_flow_item *item,
1779                             uint64_t item_flags,
1780                             uint8_t target_protocol,
1781                             const struct rte_flow_item_tcp *flow_mask,
1782                             struct rte_flow_error *error)
1783 {
1784         const struct rte_flow_item_tcp *mask = item->mask;
1785         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1786         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
1787                                       MLX5_FLOW_LAYER_OUTER_L3;
1788         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
1789                                       MLX5_FLOW_LAYER_OUTER_L4;
1790         int ret;
1791
1792         MLX5_ASSERT(flow_mask);
1793         if (target_protocol != 0xff && target_protocol != IPPROTO_TCP)
1794                 return rte_flow_error_set(error, EINVAL,
1795                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1796                                           "protocol filtering not compatible"
1797                                           " with TCP layer");
1798         if (!(item_flags & l3m))
1799                 return rte_flow_error_set(error, EINVAL,
1800                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1801                                           "L3 is mandatory to filter on L4");
1802         if (item_flags & l4m)
1803                 return rte_flow_error_set(error, EINVAL,
1804                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1805                                           "multiple L4 layers not supported");
1806         if (!mask)
1807                 mask = &rte_flow_item_tcp_mask;
1808         ret = mlx5_flow_item_acceptable
1809                 (item, (const uint8_t *)mask,
1810                  (const uint8_t *)flow_mask,
1811                  sizeof(struct rte_flow_item_tcp), error);
1812         if (ret < 0)
1813                 return ret;
1814         return 0;
1815 }
1816
1817 /**
1818  * Validate VXLAN item.
1819  *
1820  * @param[in] item
1821  *   Item specification.
1822  * @param[in] item_flags
1823  *   Bit-fields that holds the items detected until now.
1824  * @param[in] target_protocol
1825  *   The next protocol in the previous item.
1826  * @param[out] error
1827  *   Pointer to error structure.
1828  *
1829  * @return
1830  *   0 on success, a negative errno value otherwise and rte_errno is set.
1831  */
1832 int
1833 mlx5_flow_validate_item_vxlan(const struct rte_flow_item *item,
1834                               uint64_t item_flags,
1835                               struct rte_flow_error *error)
1836 {
1837         const struct rte_flow_item_vxlan *spec = item->spec;
1838         const struct rte_flow_item_vxlan *mask = item->mask;
1839         int ret;
1840         union vni {
1841                 uint32_t vlan_id;
1842                 uint8_t vni[4];
1843         } id = { .vlan_id = 0, };
1844
1845
1846         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
1847                 return rte_flow_error_set(error, ENOTSUP,
1848                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1849                                           "multiple tunnel layers not"
1850                                           " supported");
1851         /*
1852          * Verify only UDPv4 is present as defined in
1853          * https://tools.ietf.org/html/rfc7348
1854          */
1855         if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
1856                 return rte_flow_error_set(error, EINVAL,
1857                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1858                                           "no outer UDP layer found");
1859         if (!mask)
1860                 mask = &rte_flow_item_vxlan_mask;
1861         ret = mlx5_flow_item_acceptable
1862                 (item, (const uint8_t *)mask,
1863                  (const uint8_t *)&rte_flow_item_vxlan_mask,
1864                  sizeof(struct rte_flow_item_vxlan),
1865                  error);
1866         if (ret < 0)
1867                 return ret;
1868         if (spec) {
1869                 memcpy(&id.vni[1], spec->vni, 3);
1870                 memcpy(&id.vni[1], mask->vni, 3);
1871         }
1872         if (!(item_flags & MLX5_FLOW_LAYER_OUTER))
1873                 return rte_flow_error_set(error, ENOTSUP,
1874                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1875                                           "VXLAN tunnel must be fully defined");
1876         return 0;
1877 }
1878
1879 /**
1880  * Validate VXLAN_GPE item.
1881  *
1882  * @param[in] item
1883  *   Item specification.
1884  * @param[in] item_flags
1885  *   Bit-fields that holds the items detected until now.
1886  * @param[in] priv
1887  *   Pointer to the private data structure.
1888  * @param[in] target_protocol
1889  *   The next protocol in the previous item.
1890  * @param[out] error
1891  *   Pointer to error structure.
1892  *
1893  * @return
1894  *   0 on success, a negative errno value otherwise and rte_errno is set.
1895  */
1896 int
1897 mlx5_flow_validate_item_vxlan_gpe(const struct rte_flow_item *item,
1898                                   uint64_t item_flags,
1899                                   struct rte_eth_dev *dev,
1900                                   struct rte_flow_error *error)
1901 {
1902         struct mlx5_priv *priv = dev->data->dev_private;
1903         const struct rte_flow_item_vxlan_gpe *spec = item->spec;
1904         const struct rte_flow_item_vxlan_gpe *mask = item->mask;
1905         int ret;
1906         union vni {
1907                 uint32_t vlan_id;
1908                 uint8_t vni[4];
1909         } id = { .vlan_id = 0, };
1910
1911         if (!priv->config.l3_vxlan_en)
1912                 return rte_flow_error_set(error, ENOTSUP,
1913                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1914                                           "L3 VXLAN is not enabled by device"
1915                                           " parameter and/or not configured in"
1916                                           " firmware");
1917         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
1918                 return rte_flow_error_set(error, ENOTSUP,
1919                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1920                                           "multiple tunnel layers not"
1921                                           " supported");
1922         /*
1923          * Verify only UDPv4 is present as defined in
1924          * https://tools.ietf.org/html/rfc7348
1925          */
1926         if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
1927                 return rte_flow_error_set(error, EINVAL,
1928                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1929                                           "no outer UDP layer found");
1930         if (!mask)
1931                 mask = &rte_flow_item_vxlan_gpe_mask;
1932         ret = mlx5_flow_item_acceptable
1933                 (item, (const uint8_t *)mask,
1934                  (const uint8_t *)&rte_flow_item_vxlan_gpe_mask,
1935                  sizeof(struct rte_flow_item_vxlan_gpe),
1936                  error);
1937         if (ret < 0)
1938                 return ret;
1939         if (spec) {
1940                 if (spec->protocol)
1941                         return rte_flow_error_set(error, ENOTSUP,
1942                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1943                                                   item,
1944                                                   "VxLAN-GPE protocol"
1945                                                   " not supported");
1946                 memcpy(&id.vni[1], spec->vni, 3);
1947                 memcpy(&id.vni[1], mask->vni, 3);
1948         }
1949         if (!(item_flags & MLX5_FLOW_LAYER_OUTER))
1950                 return rte_flow_error_set(error, ENOTSUP,
1951                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1952                                           "VXLAN-GPE tunnel must be fully"
1953                                           " defined");
1954         return 0;
1955 }
1956 /**
1957  * Validate GRE Key item.
1958  *
1959  * @param[in] item
1960  *   Item specification.
1961  * @param[in] item_flags
1962  *   Bit flags to mark detected items.
1963  * @param[in] gre_item
1964  *   Pointer to gre_item
1965  * @param[out] error
1966  *   Pointer to error structure.
1967  *
1968  * @return
1969  *   0 on success, a negative errno value otherwise and rte_errno is set.
1970  */
1971 int
1972 mlx5_flow_validate_item_gre_key(const struct rte_flow_item *item,
1973                                 uint64_t item_flags,
1974                                 const struct rte_flow_item *gre_item,
1975                                 struct rte_flow_error *error)
1976 {
1977         const rte_be32_t *mask = item->mask;
1978         int ret = 0;
1979         rte_be32_t gre_key_default_mask = RTE_BE32(UINT32_MAX);
1980         const struct rte_flow_item_gre *gre_spec;
1981         const struct rte_flow_item_gre *gre_mask;
1982
1983         if (item_flags & MLX5_FLOW_LAYER_GRE_KEY)
1984                 return rte_flow_error_set(error, ENOTSUP,
1985                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1986                                           "Multiple GRE key not support");
1987         if (!(item_flags & MLX5_FLOW_LAYER_GRE))
1988                 return rte_flow_error_set(error, ENOTSUP,
1989                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1990                                           "No preceding GRE header");
1991         if (item_flags & MLX5_FLOW_LAYER_INNER)
1992                 return rte_flow_error_set(error, ENOTSUP,
1993                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1994                                           "GRE key following a wrong item");
1995         gre_mask = gre_item->mask;
1996         if (!gre_mask)
1997                 gre_mask = &rte_flow_item_gre_mask;
1998         gre_spec = gre_item->spec;
1999         if (gre_spec && (gre_mask->c_rsvd0_ver & RTE_BE16(0x2000)) &&
2000                          !(gre_spec->c_rsvd0_ver & RTE_BE16(0x2000)))
2001                 return rte_flow_error_set(error, EINVAL,
2002                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2003                                           "Key bit must be on");
2004
2005         if (!mask)
2006                 mask = &gre_key_default_mask;
2007         ret = mlx5_flow_item_acceptable
2008                 (item, (const uint8_t *)mask,
2009                  (const uint8_t *)&gre_key_default_mask,
2010                  sizeof(rte_be32_t), error);
2011         return ret;
2012 }
2013
2014 /**
2015  * Validate GRE item.
2016  *
2017  * @param[in] item
2018  *   Item specification.
2019  * @param[in] item_flags
2020  *   Bit flags to mark detected items.
2021  * @param[in] target_protocol
2022  *   The next protocol in the previous item.
2023  * @param[out] error
2024  *   Pointer to error structure.
2025  *
2026  * @return
2027  *   0 on success, a negative errno value otherwise and rte_errno is set.
2028  */
2029 int
2030 mlx5_flow_validate_item_gre(const struct rte_flow_item *item,
2031                             uint64_t item_flags,
2032                             uint8_t target_protocol,
2033                             struct rte_flow_error *error)
2034 {
2035         const struct rte_flow_item_gre *spec __rte_unused = item->spec;
2036         const struct rte_flow_item_gre *mask = item->mask;
2037         int ret;
2038         const struct rte_flow_item_gre nic_mask = {
2039                 .c_rsvd0_ver = RTE_BE16(0xB000),
2040                 .protocol = RTE_BE16(UINT16_MAX),
2041         };
2042
2043         if (target_protocol != 0xff && target_protocol != IPPROTO_GRE)
2044                 return rte_flow_error_set(error, EINVAL,
2045                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2046                                           "protocol filtering not compatible"
2047                                           " with this GRE layer");
2048         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2049                 return rte_flow_error_set(error, ENOTSUP,
2050                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2051                                           "multiple tunnel layers not"
2052                                           " supported");
2053         if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3))
2054                 return rte_flow_error_set(error, ENOTSUP,
2055                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2056                                           "L3 Layer is missing");
2057         if (!mask)
2058                 mask = &rte_flow_item_gre_mask;
2059         ret = mlx5_flow_item_acceptable
2060                 (item, (const uint8_t *)mask,
2061                  (const uint8_t *)&nic_mask,
2062                  sizeof(struct rte_flow_item_gre), error);
2063         if (ret < 0)
2064                 return ret;
2065 #ifndef HAVE_MLX5DV_DR
2066 #ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT
2067         if (spec && (spec->protocol & mask->protocol))
2068                 return rte_flow_error_set(error, ENOTSUP,
2069                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2070                                           "without MPLS support the"
2071                                           " specification cannot be used for"
2072                                           " filtering");
2073 #endif
2074 #endif
2075         return 0;
2076 }
2077
2078 /**
2079  * Validate Geneve item.
2080  *
2081  * @param[in] item
2082  *   Item specification.
2083  * @param[in] itemFlags
2084  *   Bit-fields that holds the items detected until now.
2085  * @param[in] enPriv
2086  *   Pointer to the private data structure.
2087  * @param[out] error
2088  *   Pointer to error structure.
2089  *
2090  * @return
2091  *   0 on success, a negative errno value otherwise and rte_errno is set.
2092  */
2093
2094 int
2095 mlx5_flow_validate_item_geneve(const struct rte_flow_item *item,
2096                                uint64_t item_flags,
2097                                struct rte_eth_dev *dev,
2098                                struct rte_flow_error *error)
2099 {
2100         struct mlx5_priv *priv = dev->data->dev_private;
2101         const struct rte_flow_item_geneve *spec = item->spec;
2102         const struct rte_flow_item_geneve *mask = item->mask;
2103         int ret;
2104         uint16_t gbhdr;
2105         uint8_t opt_len = priv->config.hca_attr.geneve_max_opt_len ?
2106                           MLX5_GENEVE_OPT_LEN_1 : MLX5_GENEVE_OPT_LEN_0;
2107         const struct rte_flow_item_geneve nic_mask = {
2108                 .ver_opt_len_o_c_rsvd0 = RTE_BE16(0x3f80),
2109                 .vni = "\xff\xff\xff",
2110                 .protocol = RTE_BE16(UINT16_MAX),
2111         };
2112
2113         if (!priv->config.hca_attr.tunnel_stateless_geneve_rx)
2114                 return rte_flow_error_set(error, ENOTSUP,
2115                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2116                                           "L3 Geneve is not enabled by device"
2117                                           " parameter and/or not configured in"
2118                                           " firmware");
2119         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2120                 return rte_flow_error_set(error, ENOTSUP,
2121                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2122                                           "multiple tunnel layers not"
2123                                           " supported");
2124         /*
2125          * Verify only UDPv4 is present as defined in
2126          * https://tools.ietf.org/html/rfc7348
2127          */
2128         if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
2129                 return rte_flow_error_set(error, EINVAL,
2130                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2131                                           "no outer UDP layer found");
2132         if (!mask)
2133                 mask = &rte_flow_item_geneve_mask;
2134         ret = mlx5_flow_item_acceptable
2135                                   (item, (const uint8_t *)mask,
2136                                    (const uint8_t *)&nic_mask,
2137                                    sizeof(struct rte_flow_item_geneve), error);
2138         if (ret)
2139                 return ret;
2140         if (spec) {
2141                 gbhdr = rte_be_to_cpu_16(spec->ver_opt_len_o_c_rsvd0);
2142                 if (MLX5_GENEVE_VER_VAL(gbhdr) ||
2143                      MLX5_GENEVE_CRITO_VAL(gbhdr) ||
2144                      MLX5_GENEVE_RSVD_VAL(gbhdr) || spec->rsvd1)
2145                         return rte_flow_error_set(error, ENOTSUP,
2146                                                   RTE_FLOW_ERROR_TYPE_ITEM,
2147                                                   item,
2148                                                   "Geneve protocol unsupported"
2149                                                   " fields are being used");
2150                 if (MLX5_GENEVE_OPTLEN_VAL(gbhdr) > opt_len)
2151                         return rte_flow_error_set
2152                                         (error, ENOTSUP,
2153                                          RTE_FLOW_ERROR_TYPE_ITEM,
2154                                          item,
2155                                          "Unsupported Geneve options length");
2156         }
2157         if (!(item_flags & MLX5_FLOW_LAYER_OUTER))
2158                 return rte_flow_error_set
2159                                     (error, ENOTSUP,
2160                                      RTE_FLOW_ERROR_TYPE_ITEM, item,
2161                                      "Geneve tunnel must be fully defined");
2162         return 0;
2163 }
2164
2165 /**
2166  * Validate MPLS item.
2167  *
2168  * @param[in] dev
2169  *   Pointer to the rte_eth_dev structure.
2170  * @param[in] item
2171  *   Item specification.
2172  * @param[in] item_flags
2173  *   Bit-fields that holds the items detected until now.
2174  * @param[in] prev_layer
2175  *   The protocol layer indicated in previous item.
2176  * @param[out] error
2177  *   Pointer to error structure.
2178  *
2179  * @return
2180  *   0 on success, a negative errno value otherwise and rte_errno is set.
2181  */
2182 int
2183 mlx5_flow_validate_item_mpls(struct rte_eth_dev *dev __rte_unused,
2184                              const struct rte_flow_item *item __rte_unused,
2185                              uint64_t item_flags __rte_unused,
2186                              uint64_t prev_layer __rte_unused,
2187                              struct rte_flow_error *error)
2188 {
2189 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
2190         const struct rte_flow_item_mpls *mask = item->mask;
2191         struct mlx5_priv *priv = dev->data->dev_private;
2192         int ret;
2193
2194         if (!priv->config.mpls_en)
2195                 return rte_flow_error_set(error, ENOTSUP,
2196                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2197                                           "MPLS not supported or"
2198                                           " disabled in firmware"
2199                                           " configuration.");
2200         /* MPLS over IP, UDP, GRE is allowed */
2201         if (!(prev_layer & (MLX5_FLOW_LAYER_OUTER_L3 |
2202                             MLX5_FLOW_LAYER_OUTER_L4_UDP |
2203                             MLX5_FLOW_LAYER_GRE)))
2204                 return rte_flow_error_set(error, EINVAL,
2205                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2206                                           "protocol filtering not compatible"
2207                                           " with MPLS layer");
2208         /* Multi-tunnel isn't allowed but MPLS over GRE is an exception. */
2209         if ((item_flags & MLX5_FLOW_LAYER_TUNNEL) &&
2210             !(item_flags & MLX5_FLOW_LAYER_GRE))
2211                 return rte_flow_error_set(error, ENOTSUP,
2212                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2213                                           "multiple tunnel layers not"
2214                                           " supported");
2215         if (!mask)
2216                 mask = &rte_flow_item_mpls_mask;
2217         ret = mlx5_flow_item_acceptable
2218                 (item, (const uint8_t *)mask,
2219                  (const uint8_t *)&rte_flow_item_mpls_mask,
2220                  sizeof(struct rte_flow_item_mpls), error);
2221         if (ret < 0)
2222                 return ret;
2223         return 0;
2224 #endif
2225         return rte_flow_error_set(error, ENOTSUP,
2226                                   RTE_FLOW_ERROR_TYPE_ITEM, item,
2227                                   "MPLS is not supported by Verbs, please"
2228                                   " update.");
2229 }
2230
2231 /**
2232  * Validate NVGRE item.
2233  *
2234  * @param[in] item
2235  *   Item specification.
2236  * @param[in] item_flags
2237  *   Bit flags to mark detected items.
2238  * @param[in] target_protocol
2239  *   The next protocol in the previous item.
2240  * @param[out] error
2241  *   Pointer to error structure.
2242  *
2243  * @return
2244  *   0 on success, a negative errno value otherwise and rte_errno is set.
2245  */
2246 int
2247 mlx5_flow_validate_item_nvgre(const struct rte_flow_item *item,
2248                               uint64_t item_flags,
2249                               uint8_t target_protocol,
2250                               struct rte_flow_error *error)
2251 {
2252         const struct rte_flow_item_nvgre *mask = item->mask;
2253         int ret;
2254
2255         if (target_protocol != 0xff && target_protocol != IPPROTO_GRE)
2256                 return rte_flow_error_set(error, EINVAL,
2257                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2258                                           "protocol filtering not compatible"
2259                                           " with this GRE layer");
2260         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2261                 return rte_flow_error_set(error, ENOTSUP,
2262                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2263                                           "multiple tunnel layers not"
2264                                           " supported");
2265         if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3))
2266                 return rte_flow_error_set(error, ENOTSUP,
2267                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2268                                           "L3 Layer is missing");
2269         if (!mask)
2270                 mask = &rte_flow_item_nvgre_mask;
2271         ret = mlx5_flow_item_acceptable
2272                 (item, (const uint8_t *)mask,
2273                  (const uint8_t *)&rte_flow_item_nvgre_mask,
2274                  sizeof(struct rte_flow_item_nvgre), error);
2275         if (ret < 0)
2276                 return ret;
2277         return 0;
2278 }
2279
2280 /* Allocate unique ID for the split Q/RSS subflows. */
2281 static uint32_t
2282 flow_qrss_get_id(struct rte_eth_dev *dev)
2283 {
2284         struct mlx5_priv *priv = dev->data->dev_private;
2285         uint32_t qrss_id, ret;
2286
2287         ret = mlx5_flow_id_get(priv->qrss_id_pool, &qrss_id);
2288         if (ret)
2289                 return 0;
2290         MLX5_ASSERT(qrss_id);
2291         return qrss_id;
2292 }
2293
2294 /* Free unique ID for the split Q/RSS subflows. */
2295 static void
2296 flow_qrss_free_id(struct rte_eth_dev *dev,  uint32_t qrss_id)
2297 {
2298         struct mlx5_priv *priv = dev->data->dev_private;
2299
2300         if (qrss_id)
2301                 mlx5_flow_id_release(priv->qrss_id_pool, qrss_id);
2302 }
2303
2304 /**
2305  * Release resource related QUEUE/RSS action split.
2306  *
2307  * @param dev
2308  *   Pointer to Ethernet device.
2309  * @param flow
2310  *   Flow to release id's from.
2311  */
2312 static void
2313 flow_mreg_split_qrss_release(struct rte_eth_dev *dev,
2314                              struct rte_flow *flow)
2315 {
2316         struct mlx5_flow_handle *dev_handle;
2317
2318         LIST_FOREACH(dev_handle, &flow->dev_handles, next)
2319                 if (dev_handle->qrss_id)
2320                         flow_qrss_free_id(dev, dev_handle->qrss_id);
2321 }
2322
2323 static int
2324 flow_null_validate(struct rte_eth_dev *dev __rte_unused,
2325                    const struct rte_flow_attr *attr __rte_unused,
2326                    const struct rte_flow_item items[] __rte_unused,
2327                    const struct rte_flow_action actions[] __rte_unused,
2328                    bool external __rte_unused,
2329                    struct rte_flow_error *error)
2330 {
2331         return rte_flow_error_set(error, ENOTSUP,
2332                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
2333 }
2334
2335 static struct mlx5_flow *
2336 flow_null_prepare(struct rte_eth_dev *dev __rte_unused,
2337                   const struct rte_flow_attr *attr __rte_unused,
2338                   const struct rte_flow_item items[] __rte_unused,
2339                   const struct rte_flow_action actions[] __rte_unused,
2340                   struct rte_flow_error *error)
2341 {
2342         rte_flow_error_set(error, ENOTSUP,
2343                            RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
2344         return NULL;
2345 }
2346
2347 static int
2348 flow_null_translate(struct rte_eth_dev *dev __rte_unused,
2349                     struct mlx5_flow *dev_flow __rte_unused,
2350                     const struct rte_flow_attr *attr __rte_unused,
2351                     const struct rte_flow_item items[] __rte_unused,
2352                     const struct rte_flow_action actions[] __rte_unused,
2353                     struct rte_flow_error *error)
2354 {
2355         return rte_flow_error_set(error, ENOTSUP,
2356                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
2357 }
2358
2359 static int
2360 flow_null_apply(struct rte_eth_dev *dev __rte_unused,
2361                 struct rte_flow *flow __rte_unused,
2362                 struct rte_flow_error *error)
2363 {
2364         return rte_flow_error_set(error, ENOTSUP,
2365                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
2366 }
2367
2368 static void
2369 flow_null_remove(struct rte_eth_dev *dev __rte_unused,
2370                  struct rte_flow *flow __rte_unused)
2371 {
2372 }
2373
2374 static void
2375 flow_null_destroy(struct rte_eth_dev *dev __rte_unused,
2376                   struct rte_flow *flow __rte_unused)
2377 {
2378 }
2379
2380 static int
2381 flow_null_query(struct rte_eth_dev *dev __rte_unused,
2382                 struct rte_flow *flow __rte_unused,
2383                 const struct rte_flow_action *actions __rte_unused,
2384                 void *data __rte_unused,
2385                 struct rte_flow_error *error)
2386 {
2387         return rte_flow_error_set(error, ENOTSUP,
2388                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
2389 }
2390
2391 /* Void driver to protect from null pointer reference. */
2392 const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops = {
2393         .validate = flow_null_validate,
2394         .prepare = flow_null_prepare,
2395         .translate = flow_null_translate,
2396         .apply = flow_null_apply,
2397         .remove = flow_null_remove,
2398         .destroy = flow_null_destroy,
2399         .query = flow_null_query,
2400 };
2401
2402 /**
2403  * Select flow driver type according to flow attributes and device
2404  * configuration.
2405  *
2406  * @param[in] dev
2407  *   Pointer to the dev structure.
2408  * @param[in] attr
2409  *   Pointer to the flow attributes.
2410  *
2411  * @return
2412  *   flow driver type, MLX5_FLOW_TYPE_MAX otherwise.
2413  */
2414 static enum mlx5_flow_drv_type
2415 flow_get_drv_type(struct rte_eth_dev *dev, const struct rte_flow_attr *attr)
2416 {
2417         struct mlx5_priv *priv = dev->data->dev_private;
2418         enum mlx5_flow_drv_type type = MLX5_FLOW_TYPE_MAX;
2419
2420         if (attr->transfer && priv->config.dv_esw_en)
2421                 type = MLX5_FLOW_TYPE_DV;
2422         if (!attr->transfer)
2423                 type = priv->config.dv_flow_en ? MLX5_FLOW_TYPE_DV :
2424                                                  MLX5_FLOW_TYPE_VERBS;
2425         return type;
2426 }
2427
2428 #define flow_get_drv_ops(type) flow_drv_ops[type]
2429
2430 /**
2431  * Flow driver validation API. This abstracts calling driver specific functions.
2432  * The type of flow driver is determined according to flow attributes.
2433  *
2434  * @param[in] dev
2435  *   Pointer to the dev structure.
2436  * @param[in] attr
2437  *   Pointer to the flow attributes.
2438  * @param[in] items
2439  *   Pointer to the list of items.
2440  * @param[in] actions
2441  *   Pointer to the list of actions.
2442  * @param[in] external
2443  *   This flow rule is created by request external to PMD.
2444  * @param[out] error
2445  *   Pointer to the error structure.
2446  *
2447  * @return
2448  *   0 on success, a negative errno value otherwise and rte_errno is set.
2449  */
2450 static inline int
2451 flow_drv_validate(struct rte_eth_dev *dev,
2452                   const struct rte_flow_attr *attr,
2453                   const struct rte_flow_item items[],
2454                   const struct rte_flow_action actions[],
2455                   bool external, struct rte_flow_error *error)
2456 {
2457         const struct mlx5_flow_driver_ops *fops;
2458         enum mlx5_flow_drv_type type = flow_get_drv_type(dev, attr);
2459
2460         fops = flow_get_drv_ops(type);
2461         return fops->validate(dev, attr, items, actions, external, error);
2462 }
2463
2464 /**
2465  * Flow driver preparation API. This abstracts calling driver specific
2466  * functions. Parent flow (rte_flow) should have driver type (drv_type). It
2467  * calculates the size of memory required for device flow, allocates the memory,
2468  * initializes the device flow and returns the pointer.
2469  *
2470  * @note
2471  *   This function initializes device flow structure such as dv or verbs in
2472  *   struct mlx5_flow. However, it is caller's responsibility to initialize the
2473  *   rest. For example, adding returning device flow to flow->dev_flow list and
2474  *   setting backward reference to the flow should be done out of this function.
2475  *   layers field is not filled either.
2476  *
2477  * @param[in] dev
2478  *   Pointer to the dev structure.
2479  * @param[in] attr
2480  *   Pointer to the flow attributes.
2481  * @param[in] items
2482  *   Pointer to the list of items.
2483  * @param[in] actions
2484  *   Pointer to the list of actions.
2485  * @param[out] error
2486  *   Pointer to the error structure.
2487  *
2488  * @return
2489  *   Pointer to device flow on success, otherwise NULL and rte_errno is set.
2490  */
2491 static inline struct mlx5_flow *
2492 flow_drv_prepare(struct rte_eth_dev *dev,
2493                  const struct rte_flow *flow,
2494                  const struct rte_flow_attr *attr,
2495                  const struct rte_flow_item items[],
2496                  const struct rte_flow_action actions[],
2497                  struct rte_flow_error *error)
2498 {
2499         const struct mlx5_flow_driver_ops *fops;
2500         enum mlx5_flow_drv_type type = flow->drv_type;
2501
2502         MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
2503         fops = flow_get_drv_ops(type);
2504         return fops->prepare(dev, attr, items, actions, error);
2505 }
2506
2507 /**
2508  * Flow driver translation API. This abstracts calling driver specific
2509  * functions. Parent flow (rte_flow) should have driver type (drv_type). It
2510  * translates a generic flow into a driver flow. flow_drv_prepare() must
2511  * precede.
2512  *
2513  * @note
2514  *   dev_flow->layers could be filled as a result of parsing during translation
2515  *   if needed by flow_drv_apply(). dev_flow->flow->actions can also be filled
2516  *   if necessary. As a flow can have multiple dev_flows by RSS flow expansion,
2517  *   flow->actions could be overwritten even though all the expanded dev_flows
2518  *   have the same actions.
2519  *
2520  * @param[in] dev
2521  *   Pointer to the rte dev structure.
2522  * @param[in, out] dev_flow
2523  *   Pointer to the mlx5 flow.
2524  * @param[in] attr
2525  *   Pointer to the flow attributes.
2526  * @param[in] items
2527  *   Pointer to the list of items.
2528  * @param[in] actions
2529  *   Pointer to the list of actions.
2530  * @param[out] error
2531  *   Pointer to the error structure.
2532  *
2533  * @return
2534  *   0 on success, a negative errno value otherwise and rte_errno is set.
2535  */
2536 static inline int
2537 flow_drv_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
2538                    const struct rte_flow_attr *attr,
2539                    const struct rte_flow_item items[],
2540                    const struct rte_flow_action actions[],
2541                    struct rte_flow_error *error)
2542 {
2543         const struct mlx5_flow_driver_ops *fops;
2544         enum mlx5_flow_drv_type type = dev_flow->flow->drv_type;
2545
2546         MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
2547         fops = flow_get_drv_ops(type);
2548         return fops->translate(dev, dev_flow, attr, items, actions, error);
2549 }
2550
2551 /**
2552  * Flow driver apply API. This abstracts calling driver specific functions.
2553  * Parent flow (rte_flow) should have driver type (drv_type). It applies
2554  * translated driver flows on to device. flow_drv_translate() must precede.
2555  *
2556  * @param[in] dev
2557  *   Pointer to Ethernet device structure.
2558  * @param[in, out] flow
2559  *   Pointer to flow structure.
2560  * @param[out] error
2561  *   Pointer to error structure.
2562  *
2563  * @return
2564  *   0 on success, a negative errno value otherwise and rte_errno is set.
2565  */
2566 static inline int
2567 flow_drv_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
2568                struct rte_flow_error *error)
2569 {
2570         const struct mlx5_flow_driver_ops *fops;
2571         enum mlx5_flow_drv_type type = flow->drv_type;
2572
2573         MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
2574         fops = flow_get_drv_ops(type);
2575         return fops->apply(dev, flow, error);
2576 }
2577
2578 /**
2579  * Flow driver remove API. This abstracts calling driver specific functions.
2580  * Parent flow (rte_flow) should have driver type (drv_type). It removes a flow
2581  * on device. All the resources of the flow should be freed by calling
2582  * flow_drv_destroy().
2583  *
2584  * @param[in] dev
2585  *   Pointer to Ethernet device.
2586  * @param[in, out] flow
2587  *   Pointer to flow structure.
2588  */
2589 static inline void
2590 flow_drv_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
2591 {
2592         const struct mlx5_flow_driver_ops *fops;
2593         enum mlx5_flow_drv_type type = flow->drv_type;
2594
2595         MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
2596         fops = flow_get_drv_ops(type);
2597         fops->remove(dev, flow);
2598 }
2599
2600 /**
2601  * Flow driver destroy API. This abstracts calling driver specific functions.
2602  * Parent flow (rte_flow) should have driver type (drv_type). It removes a flow
2603  * on device and releases resources of the flow.
2604  *
2605  * @param[in] dev
2606  *   Pointer to Ethernet device.
2607  * @param[in, out] flow
2608  *   Pointer to flow structure.
2609  */
2610 static inline void
2611 flow_drv_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
2612 {
2613         const struct mlx5_flow_driver_ops *fops;
2614         enum mlx5_flow_drv_type type = flow->drv_type;
2615
2616         flow_mreg_split_qrss_release(dev, flow);
2617         MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
2618         fops = flow_get_drv_ops(type);
2619         fops->destroy(dev, flow);
2620 }
2621
2622 /**
2623  * Validate a flow supported by the NIC.
2624  *
2625  * @see rte_flow_validate()
2626  * @see rte_flow_ops
2627  */
2628 int
2629 mlx5_flow_validate(struct rte_eth_dev *dev,
2630                    const struct rte_flow_attr *attr,
2631                    const struct rte_flow_item items[],
2632                    const struct rte_flow_action actions[],
2633                    struct rte_flow_error *error)
2634 {
2635         int ret;
2636
2637         ret = flow_drv_validate(dev, attr, items, actions, true, error);
2638         if (ret < 0)
2639                 return ret;
2640         return 0;
2641 }
2642
2643 /**
2644  * Get RSS action from the action list.
2645  *
2646  * @param[in] actions
2647  *   Pointer to the list of actions.
2648  *
2649  * @return
2650  *   Pointer to the RSS action if exist, else return NULL.
2651  */
2652 static const struct rte_flow_action_rss*
2653 flow_get_rss_action(const struct rte_flow_action actions[])
2654 {
2655         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
2656                 switch (actions->type) {
2657                 case RTE_FLOW_ACTION_TYPE_RSS:
2658                         return (const struct rte_flow_action_rss *)
2659                                actions->conf;
2660                 default:
2661                         break;
2662                 }
2663         }
2664         return NULL;
2665 }
2666
2667 static unsigned int
2668 find_graph_root(const struct rte_flow_item pattern[], uint32_t rss_level)
2669 {
2670         const struct rte_flow_item *item;
2671         unsigned int has_vlan = 0;
2672
2673         for (item = pattern; item->type != RTE_FLOW_ITEM_TYPE_END; item++) {
2674                 if (item->type == RTE_FLOW_ITEM_TYPE_VLAN) {
2675                         has_vlan = 1;
2676                         break;
2677                 }
2678         }
2679         if (has_vlan)
2680                 return rss_level < 2 ? MLX5_EXPANSION_ROOT_ETH_VLAN :
2681                                        MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN;
2682         return rss_level < 2 ? MLX5_EXPANSION_ROOT :
2683                                MLX5_EXPANSION_ROOT_OUTER;
2684 }
2685
2686 /**
2687  *  Get layer flags from the prefix flow.
2688  *
2689  *  Some flows may be split to several subflows, the prefix subflow gets the
2690  *  match items and the suffix sub flow gets the actions.
2691  *  Some actions need the user defined match item flags to get the detail for
2692  *  the action.
2693  *  This function helps the suffix flow to get the item layer flags from prefix
2694  *  subflow.
2695  *
2696  * @param[in] dev_flow
2697  *   Pointer the created preifx subflow.
2698  *
2699  * @return
2700  *   The layers get from prefix subflow.
2701  */
2702 static inline uint64_t
2703 flow_get_prefix_layer_flags(struct mlx5_flow *dev_flow)
2704 {
2705         uint64_t layers = 0;
2706
2707         /*
2708          * Layers bits could be localization, but usually the compiler will
2709          * help to do the optimization work for source code.
2710          * If no decap actions, use the layers directly.
2711          */
2712         if (!(dev_flow->handle->act_flags & MLX5_FLOW_ACTION_DECAP))
2713                 return dev_flow->handle->layers;
2714         /* Convert L3 layers with decap action. */
2715         if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L3_IPV4)
2716                 layers |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
2717         else if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L3_IPV6)
2718                 layers |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
2719         /* Convert L4 layers with decap action.  */
2720         if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L4_TCP)
2721                 layers |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
2722         else if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L4_UDP)
2723                 layers |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
2724         return layers;
2725 }
2726
2727 /**
2728  * Get metadata split action information.
2729  *
2730  * @param[in] actions
2731  *   Pointer to the list of actions.
2732  * @param[out] qrss
2733  *   Pointer to the return pointer.
2734  * @param[out] qrss_type
2735  *   Pointer to the action type to return. RTE_FLOW_ACTION_TYPE_END is returned
2736  *   if no QUEUE/RSS is found.
2737  * @param[out] encap_idx
2738  *   Pointer to the index of the encap action if exists, otherwise the last
2739  *   action index.
2740  *
2741  * @return
2742  *   Total number of actions.
2743  */
2744 static int
2745 flow_parse_metadata_split_actions_info(const struct rte_flow_action actions[],
2746                                        const struct rte_flow_action **qrss,
2747                                        int *encap_idx)
2748 {
2749         const struct rte_flow_action_raw_encap *raw_encap;
2750         int actions_n = 0;
2751         int raw_decap_idx = -1;
2752
2753         *encap_idx = -1;
2754         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
2755                 switch (actions->type) {
2756                 case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
2757                 case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
2758                         *encap_idx = actions_n;
2759                         break;
2760                 case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
2761                         raw_decap_idx = actions_n;
2762                         break;
2763                 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
2764                         raw_encap = actions->conf;
2765                         if (raw_encap->size > MLX5_ENCAPSULATION_DECISION_SIZE)
2766                                 *encap_idx = raw_decap_idx != -1 ?
2767                                                       raw_decap_idx : actions_n;
2768                         break;
2769                 case RTE_FLOW_ACTION_TYPE_QUEUE:
2770                 case RTE_FLOW_ACTION_TYPE_RSS:
2771                         *qrss = actions;
2772                         break;
2773                 default:
2774                         break;
2775                 }
2776                 actions_n++;
2777         }
2778         if (*encap_idx == -1)
2779                 *encap_idx = actions_n;
2780         /* Count RTE_FLOW_ACTION_TYPE_END. */
2781         return actions_n + 1;
2782 }
2783
2784 /**
2785  * Check meter action from the action list.
2786  *
2787  * @param[in] actions
2788  *   Pointer to the list of actions.
2789  * @param[out] mtr
2790  *   Pointer to the meter exist flag.
2791  *
2792  * @return
2793  *   Total number of actions.
2794  */
2795 static int
2796 flow_check_meter_action(const struct rte_flow_action actions[], uint32_t *mtr)
2797 {
2798         int actions_n = 0;
2799
2800         MLX5_ASSERT(mtr);
2801         *mtr = 0;
2802         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
2803                 switch (actions->type) {
2804                 case RTE_FLOW_ACTION_TYPE_METER:
2805                         *mtr = 1;
2806                         break;
2807                 default:
2808                         break;
2809                 }
2810                 actions_n++;
2811         }
2812         /* Count RTE_FLOW_ACTION_TYPE_END. */
2813         return actions_n + 1;
2814 }
2815
2816 /**
2817  * Check if the flow should be splited due to hairpin.
2818  * The reason for the split is that in current HW we can't
2819  * support encap on Rx, so if a flow have encap we move it
2820  * to Tx.
2821  *
2822  * @param dev
2823  *   Pointer to Ethernet device.
2824  * @param[in] attr
2825  *   Flow rule attributes.
2826  * @param[in] actions
2827  *   Associated actions (list terminated by the END action).
2828  *
2829  * @return
2830  *   > 0 the number of actions and the flow should be split,
2831  *   0 when no split required.
2832  */
2833 static int
2834 flow_check_hairpin_split(struct rte_eth_dev *dev,
2835                          const struct rte_flow_attr *attr,
2836                          const struct rte_flow_action actions[])
2837 {
2838         int queue_action = 0;
2839         int action_n = 0;
2840         int encap = 0;
2841         const struct rte_flow_action_queue *queue;
2842         const struct rte_flow_action_rss *rss;
2843         const struct rte_flow_action_raw_encap *raw_encap;
2844
2845         if (!attr->ingress)
2846                 return 0;
2847         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
2848                 switch (actions->type) {
2849                 case RTE_FLOW_ACTION_TYPE_QUEUE:
2850                         queue = actions->conf;
2851                         if (queue == NULL)
2852                                 return 0;
2853                         if (mlx5_rxq_get_type(dev, queue->index) !=
2854                             MLX5_RXQ_TYPE_HAIRPIN)
2855                                 return 0;
2856                         queue_action = 1;
2857                         action_n++;
2858                         break;
2859                 case RTE_FLOW_ACTION_TYPE_RSS:
2860                         rss = actions->conf;
2861                         if (rss == NULL || rss->queue_num == 0)
2862                                 return 0;
2863                         if (mlx5_rxq_get_type(dev, rss->queue[0]) !=
2864                             MLX5_RXQ_TYPE_HAIRPIN)
2865                                 return 0;
2866                         queue_action = 1;
2867                         action_n++;
2868                         break;
2869                 case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
2870                 case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
2871                         encap = 1;
2872                         action_n++;
2873                         break;
2874                 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
2875                         raw_encap = actions->conf;
2876                         if (raw_encap->size >
2877                             (sizeof(struct rte_flow_item_eth) +
2878                              sizeof(struct rte_flow_item_ipv4)))
2879                                 encap = 1;
2880                         action_n++;
2881                         break;
2882                 default:
2883                         action_n++;
2884                         break;
2885                 }
2886         }
2887         if (encap == 1 && queue_action)
2888                 return action_n;
2889         return 0;
2890 }
2891
2892 /* Declare flow create/destroy prototype in advance. */
2893 static struct rte_flow *
2894 flow_list_create(struct rte_eth_dev *dev, struct mlx5_flows *list,
2895                  const struct rte_flow_attr *attr,
2896                  const struct rte_flow_item items[],
2897                  const struct rte_flow_action actions[],
2898                  bool external, struct rte_flow_error *error);
2899
2900 static void
2901 flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list,
2902                   struct rte_flow *flow);
2903
2904 /**
2905  * Add a flow of copying flow metadata registers in RX_CP_TBL.
2906  *
2907  * As mark_id is unique, if there's already a registered flow for the mark_id,
2908  * return by increasing the reference counter of the resource. Otherwise, create
2909  * the resource (mcp_res) and flow.
2910  *
2911  * Flow looks like,
2912  *   - If ingress port is ANY and reg_c[1] is mark_id,
2913  *     flow_tag := mark_id, reg_b := reg_c[0] and jump to RX_ACT_TBL.
2914  *
2915  * For default flow (zero mark_id), flow is like,
2916  *   - If ingress port is ANY,
2917  *     reg_b := reg_c[0] and jump to RX_ACT_TBL.
2918  *
2919  * @param dev
2920  *   Pointer to Ethernet device.
2921  * @param mark_id
2922  *   ID of MARK action, zero means default flow for META.
2923  * @param[out] error
2924  *   Perform verbose error reporting if not NULL.
2925  *
2926  * @return
2927  *   Associated resource on success, NULL otherwise and rte_errno is set.
2928  */
2929 static struct mlx5_flow_mreg_copy_resource *
2930 flow_mreg_add_copy_action(struct rte_eth_dev *dev, uint32_t mark_id,
2931                           struct rte_flow_error *error)
2932 {
2933         struct mlx5_priv *priv = dev->data->dev_private;
2934         struct rte_flow_attr attr = {
2935                 .group = MLX5_FLOW_MREG_CP_TABLE_GROUP,
2936                 .ingress = 1,
2937         };
2938         struct mlx5_rte_flow_item_tag tag_spec = {
2939                 .data = mark_id,
2940         };
2941         struct rte_flow_item items[] = {
2942                 [1] = { .type = RTE_FLOW_ITEM_TYPE_END, },
2943         };
2944         struct rte_flow_action_mark ftag = {
2945                 .id = mark_id,
2946         };
2947         struct mlx5_flow_action_copy_mreg cp_mreg = {
2948                 .dst = REG_B,
2949                 .src = 0,
2950         };
2951         struct rte_flow_action_jump jump = {
2952                 .group = MLX5_FLOW_MREG_ACT_TABLE_GROUP,
2953         };
2954         struct rte_flow_action actions[] = {
2955                 [3] = { .type = RTE_FLOW_ACTION_TYPE_END, },
2956         };
2957         struct mlx5_flow_mreg_copy_resource *mcp_res;
2958         int ret;
2959
2960         /* Fill the register fileds in the flow. */
2961         ret = mlx5_flow_get_reg_id(dev, MLX5_FLOW_MARK, 0, error);
2962         if (ret < 0)
2963                 return NULL;
2964         tag_spec.id = ret;
2965         ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_RX, 0, error);
2966         if (ret < 0)
2967                 return NULL;
2968         cp_mreg.src = ret;
2969         /* Check if already registered. */
2970         MLX5_ASSERT(priv->mreg_cp_tbl);
2971         mcp_res = (void *)mlx5_hlist_lookup(priv->mreg_cp_tbl, mark_id);
2972         if (mcp_res) {
2973                 /* For non-default rule. */
2974                 if (mark_id != MLX5_DEFAULT_COPY_ID)
2975                         mcp_res->refcnt++;
2976                 MLX5_ASSERT(mark_id != MLX5_DEFAULT_COPY_ID ||
2977                             mcp_res->refcnt == 1);
2978                 return mcp_res;
2979         }
2980         /* Provide the full width of FLAG specific value. */
2981         if (mark_id == (priv->sh->dv_regc0_mask & MLX5_FLOW_MARK_DEFAULT))
2982                 tag_spec.data = MLX5_FLOW_MARK_DEFAULT;
2983         /* Build a new flow. */
2984         if (mark_id != MLX5_DEFAULT_COPY_ID) {
2985                 items[0] = (struct rte_flow_item){
2986                         .type = MLX5_RTE_FLOW_ITEM_TYPE_TAG,
2987                         .spec = &tag_spec,
2988                 };
2989                 items[1] = (struct rte_flow_item){
2990                         .type = RTE_FLOW_ITEM_TYPE_END,
2991                 };
2992                 actions[0] = (struct rte_flow_action){
2993                         .type = MLX5_RTE_FLOW_ACTION_TYPE_MARK,
2994                         .conf = &ftag,
2995                 };
2996                 actions[1] = (struct rte_flow_action){
2997                         .type = MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
2998                         .conf = &cp_mreg,
2999                 };
3000                 actions[2] = (struct rte_flow_action){
3001                         .type = RTE_FLOW_ACTION_TYPE_JUMP,
3002                         .conf = &jump,
3003                 };
3004                 actions[3] = (struct rte_flow_action){
3005                         .type = RTE_FLOW_ACTION_TYPE_END,
3006                 };
3007         } else {
3008                 /* Default rule, wildcard match. */
3009                 attr.priority = MLX5_FLOW_PRIO_RSVD;
3010                 items[0] = (struct rte_flow_item){
3011                         .type = RTE_FLOW_ITEM_TYPE_END,
3012                 };
3013                 actions[0] = (struct rte_flow_action){
3014                         .type = MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
3015                         .conf = &cp_mreg,
3016                 };
3017                 actions[1] = (struct rte_flow_action){
3018                         .type = RTE_FLOW_ACTION_TYPE_JUMP,
3019                         .conf = &jump,
3020                 };
3021                 actions[2] = (struct rte_flow_action){
3022                         .type = RTE_FLOW_ACTION_TYPE_END,
3023                 };
3024         }
3025         /* Build a new entry. */
3026         mcp_res = rte_zmalloc(__func__, sizeof(*mcp_res), 0);
3027         if (!mcp_res) {
3028                 rte_errno = ENOMEM;
3029                 return NULL;
3030         }
3031         /*
3032          * The copy Flows are not included in any list. There
3033          * ones are referenced from other Flows and can not
3034          * be applied, removed, deleted in ardbitrary order
3035          * by list traversing.
3036          */
3037         mcp_res->flow = flow_list_create(dev, NULL, &attr, items,
3038                                          actions, false, error);
3039         if (!mcp_res->flow)
3040                 goto error;
3041         mcp_res->refcnt++;
3042         mcp_res->hlist_ent.key = mark_id;
3043         ret = mlx5_hlist_insert(priv->mreg_cp_tbl,
3044                                 &mcp_res->hlist_ent);
3045         MLX5_ASSERT(!ret);
3046         if (ret)
3047                 goto error;
3048         return mcp_res;
3049 error:
3050         if (mcp_res->flow)
3051                 flow_list_destroy(dev, NULL, mcp_res->flow);
3052         rte_free(mcp_res);
3053         return NULL;
3054 }
3055
3056 /**
3057  * Release flow in RX_CP_TBL.
3058  *
3059  * @param dev
3060  *   Pointer to Ethernet device.
3061  * @flow
3062  *   Parent flow for wich copying is provided.
3063  */
3064 static void
3065 flow_mreg_del_copy_action(struct rte_eth_dev *dev,
3066                           struct rte_flow *flow)
3067 {
3068         struct mlx5_flow_mreg_copy_resource *mcp_res = flow->mreg_copy;
3069         struct mlx5_priv *priv = dev->data->dev_private;
3070
3071         if (!mcp_res || !priv->mreg_cp_tbl)
3072                 return;
3073         if (flow->copy_applied) {
3074                 MLX5_ASSERT(mcp_res->appcnt);
3075                 flow->copy_applied = 0;
3076                 --mcp_res->appcnt;
3077                 if (!mcp_res->appcnt)
3078                         flow_drv_remove(dev, mcp_res->flow);
3079         }
3080         /*
3081          * We do not check availability of metadata registers here,
3082          * because copy resources are not allocated in this case.
3083          */
3084         if (--mcp_res->refcnt)
3085                 return;
3086         MLX5_ASSERT(mcp_res->flow);
3087         flow_list_destroy(dev, NULL, mcp_res->flow);
3088         mlx5_hlist_remove(priv->mreg_cp_tbl, &mcp_res->hlist_ent);
3089         rte_free(mcp_res);
3090         flow->mreg_copy = NULL;
3091 }
3092
3093 /**
3094  * Start flow in RX_CP_TBL.
3095  *
3096  * @param dev
3097  *   Pointer to Ethernet device.
3098  * @flow
3099  *   Parent flow for wich copying is provided.
3100  *
3101  * @return
3102  *   0 on success, a negative errno value otherwise and rte_errno is set.
3103  */
3104 static int
3105 flow_mreg_start_copy_action(struct rte_eth_dev *dev,
3106                             struct rte_flow *flow)
3107 {
3108         struct mlx5_flow_mreg_copy_resource *mcp_res = flow->mreg_copy;
3109         int ret;
3110
3111         if (!mcp_res || flow->copy_applied)
3112                 return 0;
3113         if (!mcp_res->appcnt) {
3114                 ret = flow_drv_apply(dev, mcp_res->flow, NULL);
3115                 if (ret)
3116                         return ret;
3117         }
3118         ++mcp_res->appcnt;
3119         flow->copy_applied = 1;
3120         return 0;
3121 }
3122
3123 /**
3124  * Stop flow in RX_CP_TBL.
3125  *
3126  * @param dev
3127  *   Pointer to Ethernet device.
3128  * @flow
3129  *   Parent flow for wich copying is provided.
3130  */
3131 static void
3132 flow_mreg_stop_copy_action(struct rte_eth_dev *dev,
3133                            struct rte_flow *flow)
3134 {
3135         struct mlx5_flow_mreg_copy_resource *mcp_res = flow->mreg_copy;
3136
3137         if (!mcp_res || !flow->copy_applied)
3138                 return;
3139         MLX5_ASSERT(mcp_res->appcnt);
3140         --mcp_res->appcnt;
3141         flow->copy_applied = 0;
3142         if (!mcp_res->appcnt)
3143                 flow_drv_remove(dev, mcp_res->flow);
3144 }
3145
3146 /**
3147  * Remove the default copy action from RX_CP_TBL.
3148  *
3149  * @param dev
3150  *   Pointer to Ethernet device.
3151  */
3152 static void
3153 flow_mreg_del_default_copy_action(struct rte_eth_dev *dev)
3154 {
3155         struct mlx5_flow_mreg_copy_resource *mcp_res;
3156         struct mlx5_priv *priv = dev->data->dev_private;
3157
3158         /* Check if default flow is registered. */
3159         if (!priv->mreg_cp_tbl)
3160                 return;
3161         mcp_res = (void *)mlx5_hlist_lookup(priv->mreg_cp_tbl,
3162                                             MLX5_DEFAULT_COPY_ID);
3163         if (!mcp_res)
3164                 return;
3165         MLX5_ASSERT(mcp_res->flow);
3166         flow_list_destroy(dev, NULL, mcp_res->flow);
3167         mlx5_hlist_remove(priv->mreg_cp_tbl, &mcp_res->hlist_ent);
3168         rte_free(mcp_res);
3169 }
3170
3171 /**
3172  * Add the default copy action in in RX_CP_TBL.
3173  *
3174  * @param dev
3175  *   Pointer to Ethernet device.
3176  * @param[out] error
3177  *   Perform verbose error reporting if not NULL.
3178  *
3179  * @return
3180  *   0 for success, negative value otherwise and rte_errno is set.
3181  */
3182 static int
3183 flow_mreg_add_default_copy_action(struct rte_eth_dev *dev,
3184                                   struct rte_flow_error *error)
3185 {
3186         struct mlx5_priv *priv = dev->data->dev_private;
3187         struct mlx5_flow_mreg_copy_resource *mcp_res;
3188
3189         /* Check whether extensive metadata feature is engaged. */
3190         if (!priv->config.dv_flow_en ||
3191             priv->config.dv_xmeta_en == MLX5_XMETA_MODE_LEGACY ||
3192             !mlx5_flow_ext_mreg_supported(dev) ||
3193             !priv->sh->dv_regc0_mask)
3194                 return 0;
3195         mcp_res = flow_mreg_add_copy_action(dev, MLX5_DEFAULT_COPY_ID, error);
3196         if (!mcp_res)
3197                 return -rte_errno;
3198         return 0;
3199 }
3200
3201 /**
3202  * Add a flow of copying flow metadata registers in RX_CP_TBL.
3203  *
3204  * All the flow having Q/RSS action should be split by
3205  * flow_mreg_split_qrss_prep() to pass by RX_CP_TBL. A flow in the RX_CP_TBL
3206  * performs the following,
3207  *   - CQE->flow_tag := reg_c[1] (MARK)
3208  *   - CQE->flow_table_metadata (reg_b) := reg_c[0] (META)
3209  * As CQE's flow_tag is not a register, it can't be simply copied from reg_c[1]
3210  * but there should be a flow per each MARK ID set by MARK action.
3211  *
3212  * For the aforementioned reason, if there's a MARK action in flow's action
3213  * list, a corresponding flow should be added to the RX_CP_TBL in order to copy
3214  * the MARK ID to CQE's flow_tag like,
3215  *   - If reg_c[1] is mark_id,
3216  *     flow_tag := mark_id, reg_b := reg_c[0] and jump to RX_ACT_TBL.
3217  *
3218  * For SET_META action which stores value in reg_c[0], as the destination is
3219  * also a flow metadata register (reg_b), adding a default flow is enough. Zero
3220  * MARK ID means the default flow. The default flow looks like,
3221  *   - For all flow, reg_b := reg_c[0] and jump to RX_ACT_TBL.
3222  *
3223  * @param dev
3224  *   Pointer to Ethernet device.
3225  * @param flow
3226  *   Pointer to flow structure.
3227  * @param[in] actions
3228  *   Pointer to the list of actions.
3229  * @param[out] error
3230  *   Perform verbose error reporting if not NULL.
3231  *
3232  * @return
3233  *   0 on success, negative value otherwise and rte_errno is set.
3234  */
3235 static int
3236 flow_mreg_update_copy_table(struct rte_eth_dev *dev,
3237                             struct rte_flow *flow,
3238                             const struct rte_flow_action *actions,
3239                             struct rte_flow_error *error)
3240 {
3241         struct mlx5_priv *priv = dev->data->dev_private;
3242         struct mlx5_dev_config *config = &priv->config;
3243         struct mlx5_flow_mreg_copy_resource *mcp_res;
3244         const struct rte_flow_action_mark *mark;
3245
3246         /* Check whether extensive metadata feature is engaged. */
3247         if (!config->dv_flow_en ||
3248             config->dv_xmeta_en == MLX5_XMETA_MODE_LEGACY ||
3249             !mlx5_flow_ext_mreg_supported(dev) ||
3250             !priv->sh->dv_regc0_mask)
3251                 return 0;
3252         /* Find MARK action. */
3253         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
3254                 switch (actions->type) {
3255                 case RTE_FLOW_ACTION_TYPE_FLAG:
3256                         mcp_res = flow_mreg_add_copy_action
3257                                 (dev, MLX5_FLOW_MARK_DEFAULT, error);
3258                         if (!mcp_res)
3259                                 return -rte_errno;
3260                         flow->mreg_copy = mcp_res;
3261                         if (dev->data->dev_started) {
3262                                 mcp_res->appcnt++;
3263                                 flow->copy_applied = 1;
3264                         }
3265                         return 0;
3266                 case RTE_FLOW_ACTION_TYPE_MARK:
3267                         mark = (const struct rte_flow_action_mark *)
3268                                 actions->conf;
3269                         mcp_res =
3270                                 flow_mreg_add_copy_action(dev, mark->id, error);
3271                         if (!mcp_res)
3272                                 return -rte_errno;
3273                         flow->mreg_copy = mcp_res;
3274                         if (dev->data->dev_started) {
3275                                 mcp_res->appcnt++;
3276                                 flow->copy_applied = 1;
3277                         }
3278                         return 0;
3279                 default:
3280                         break;
3281                 }
3282         }
3283         return 0;
3284 }
3285
3286 #define MLX5_MAX_SPLIT_ACTIONS 24
3287 #define MLX5_MAX_SPLIT_ITEMS 24
3288
3289 /**
3290  * Split the hairpin flow.
3291  * Since HW can't support encap on Rx we move the encap to Tx.
3292  * If the count action is after the encap then we also
3293  * move the count action. in this case the count will also measure
3294  * the outer bytes.
3295  *
3296  * @param dev
3297  *   Pointer to Ethernet device.
3298  * @param[in] actions
3299  *   Associated actions (list terminated by the END action).
3300  * @param[out] actions_rx
3301  *   Rx flow actions.
3302  * @param[out] actions_tx
3303  *   Tx flow actions..
3304  * @param[out] pattern_tx
3305  *   The pattern items for the Tx flow.
3306  * @param[out] flow_id
3307  *   The flow ID connected to this flow.
3308  *
3309  * @return
3310  *   0 on success.
3311  */
3312 static int
3313 flow_hairpin_split(struct rte_eth_dev *dev,
3314                    const struct rte_flow_action actions[],
3315                    struct rte_flow_action actions_rx[],
3316                    struct rte_flow_action actions_tx[],
3317                    struct rte_flow_item pattern_tx[],
3318                    uint32_t *flow_id)
3319 {
3320         struct mlx5_priv *priv = dev->data->dev_private;
3321         const struct rte_flow_action_raw_encap *raw_encap;
3322         const struct rte_flow_action_raw_decap *raw_decap;
3323         struct mlx5_rte_flow_action_set_tag *set_tag;
3324         struct rte_flow_action *tag_action;
3325         struct mlx5_rte_flow_item_tag *tag_item;
3326         struct rte_flow_item *item;
3327         char *addr;
3328         int encap = 0;
3329
3330         mlx5_flow_id_get(priv->sh->flow_id_pool, flow_id);
3331         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
3332                 switch (actions->type) {
3333                 case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
3334                 case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
3335                         rte_memcpy(actions_tx, actions,
3336                                sizeof(struct rte_flow_action));
3337                         actions_tx++;
3338                         break;
3339                 case RTE_FLOW_ACTION_TYPE_COUNT:
3340                         if (encap) {
3341                                 rte_memcpy(actions_tx, actions,
3342                                            sizeof(struct rte_flow_action));
3343                                 actions_tx++;
3344                         } else {
3345                                 rte_memcpy(actions_rx, actions,
3346                                            sizeof(struct rte_flow_action));
3347                                 actions_rx++;
3348                         }
3349                         break;
3350                 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
3351                         raw_encap = actions->conf;
3352                         if (raw_encap->size >
3353                             (sizeof(struct rte_flow_item_eth) +
3354                              sizeof(struct rte_flow_item_ipv4))) {
3355                                 memcpy(actions_tx, actions,
3356                                        sizeof(struct rte_flow_action));
3357                                 actions_tx++;
3358                                 encap = 1;
3359                         } else {
3360                                 rte_memcpy(actions_rx, actions,
3361                                            sizeof(struct rte_flow_action));
3362                                 actions_rx++;
3363                         }
3364                         break;
3365                 case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
3366                         raw_decap = actions->conf;
3367                         if (raw_decap->size <
3368                             (sizeof(struct rte_flow_item_eth) +
3369                              sizeof(struct rte_flow_item_ipv4))) {
3370                                 memcpy(actions_tx, actions,
3371                                        sizeof(struct rte_flow_action));
3372                                 actions_tx++;
3373                         } else {
3374                                 rte_memcpy(actions_rx, actions,
3375                                            sizeof(struct rte_flow_action));
3376                                 actions_rx++;
3377                         }
3378                         break;
3379                 default:
3380                         rte_memcpy(actions_rx, actions,
3381                                    sizeof(struct rte_flow_action));
3382                         actions_rx++;
3383                         break;
3384                 }
3385         }
3386         /* Add set meta action and end action for the Rx flow. */
3387         tag_action = actions_rx;
3388         tag_action->type = MLX5_RTE_FLOW_ACTION_TYPE_TAG;
3389         actions_rx++;
3390         rte_memcpy(actions_rx, actions, sizeof(struct rte_flow_action));
3391         actions_rx++;
3392         set_tag = (void *)actions_rx;
3393         set_tag->id = mlx5_flow_get_reg_id(dev, MLX5_HAIRPIN_RX, 0, NULL);
3394         MLX5_ASSERT(set_tag->id > REG_NONE);
3395         set_tag->data = *flow_id;
3396         tag_action->conf = set_tag;
3397         /* Create Tx item list. */
3398         rte_memcpy(actions_tx, actions, sizeof(struct rte_flow_action));
3399         addr = (void *)&pattern_tx[2];
3400         item = pattern_tx;
3401         item->type = MLX5_RTE_FLOW_ITEM_TYPE_TAG;
3402         tag_item = (void *)addr;
3403         tag_item->data = *flow_id;
3404         tag_item->id = mlx5_flow_get_reg_id(dev, MLX5_HAIRPIN_TX, 0, NULL);
3405         MLX5_ASSERT(set_tag->id > REG_NONE);
3406         item->spec = tag_item;
3407         addr += sizeof(struct mlx5_rte_flow_item_tag);
3408         tag_item = (void *)addr;
3409         tag_item->data = UINT32_MAX;
3410         tag_item->id = UINT16_MAX;
3411         item->mask = tag_item;
3412         addr += sizeof(struct mlx5_rte_flow_item_tag);
3413         item->last = NULL;
3414         item++;
3415         item->type = RTE_FLOW_ITEM_TYPE_END;
3416         return 0;
3417 }
3418
3419 /**
3420  * The last stage of splitting chain, just creates the subflow
3421  * without any modification.
3422  *
3423  * @param[in] dev
3424  *   Pointer to Ethernet device.
3425  * @param[in] flow
3426  *   Parent flow structure pointer.
3427  * @param[in, out] sub_flow
3428  *   Pointer to return the created subflow, may be NULL.
3429  * @param[in] prefix_layers
3430  *   Prefix subflow layers, may be 0.
3431  * @param[in] attr
3432  *   Flow rule attributes.
3433  * @param[in] items
3434  *   Pattern specification (list terminated by the END pattern item).
3435  * @param[in] actions
3436  *   Associated actions (list terminated by the END action).
3437  * @param[in] external
3438  *   This flow rule is created by request external to PMD.
3439  * @param[out] error
3440  *   Perform verbose error reporting if not NULL.
3441  * @return
3442  *   0 on success, negative value otherwise
3443  */
3444 static int
3445 flow_create_split_inner(struct rte_eth_dev *dev,
3446                         struct rte_flow *flow,
3447                         struct mlx5_flow **sub_flow,
3448                         uint64_t prefix_layers,
3449                         const struct rte_flow_attr *attr,
3450                         const struct rte_flow_item items[],
3451                         const struct rte_flow_action actions[],
3452                         bool external, struct rte_flow_error *error)
3453 {
3454         struct mlx5_flow *dev_flow;
3455
3456         dev_flow = flow_drv_prepare(dev, flow, attr, items, actions, error);
3457         if (!dev_flow)
3458                 return -rte_errno;
3459         dev_flow->flow = flow;
3460         dev_flow->external = external;
3461         /* Subflow object was created, we must include one in the list. */
3462         LIST_INSERT_HEAD(&flow->dev_handles, dev_flow->handle, next);
3463         /*
3464          * If dev_flow is as one of the suffix flow, some actions in suffix
3465          * flow may need some user defined item layer flags.
3466          */
3467         if (prefix_layers)
3468                 dev_flow->handle->layers = prefix_layers;
3469         if (sub_flow)
3470                 *sub_flow = dev_flow;
3471         return flow_drv_translate(dev, dev_flow, attr, items, actions, error);
3472 }
3473
3474 /**
3475  * Split the meter flow.
3476  *
3477  * As meter flow will split to three sub flow, other than meter
3478  * action, the other actions make sense to only meter accepts
3479  * the packet. If it need to be dropped, no other additional
3480  * actions should be take.
3481  *
3482  * One kind of special action which decapsulates the L3 tunnel
3483  * header will be in the prefix sub flow, as not to take the
3484  * L3 tunnel header into account.
3485  *
3486  * @param dev
3487  *   Pointer to Ethernet device.
3488  * @param[in] items
3489  *   Pattern specification (list terminated by the END pattern item).
3490  * @param[out] sfx_items
3491  *   Suffix flow match items (list terminated by the END pattern item).
3492  * @param[in] actions
3493  *   Associated actions (list terminated by the END action).
3494  * @param[out] actions_sfx
3495  *   Suffix flow actions.
3496  * @param[out] actions_pre
3497  *   Prefix flow actions.
3498  * @param[out] pattern_sfx
3499  *   The pattern items for the suffix flow.
3500  * @param[out] tag_sfx
3501  *   Pointer to suffix flow tag.
3502  *
3503  * @return
3504  *   0 on success.
3505  */
3506 static int
3507 flow_meter_split_prep(struct rte_eth_dev *dev,
3508                  const struct rte_flow_item items[],
3509                  struct rte_flow_item sfx_items[],
3510                  const struct rte_flow_action actions[],
3511                  struct rte_flow_action actions_sfx[],
3512                  struct rte_flow_action actions_pre[])
3513 {
3514         struct rte_flow_action *tag_action = NULL;
3515         struct rte_flow_item *tag_item;
3516         struct mlx5_rte_flow_action_set_tag *set_tag;
3517         struct rte_flow_error error;
3518         const struct rte_flow_action_raw_encap *raw_encap;
3519         const struct rte_flow_action_raw_decap *raw_decap;
3520         struct mlx5_rte_flow_item_tag *tag_spec;
3521         struct mlx5_rte_flow_item_tag *tag_mask;
3522         uint32_t tag_id;
3523         bool copy_vlan = false;
3524
3525         /* Prepare the actions for prefix and suffix flow. */
3526         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
3527                 struct rte_flow_action **action_cur = NULL;
3528
3529                 switch (actions->type) {
3530                 case RTE_FLOW_ACTION_TYPE_METER:
3531                         /* Add the extra tag action first. */
3532                         tag_action = actions_pre;
3533                         tag_action->type = MLX5_RTE_FLOW_ACTION_TYPE_TAG;
3534                         actions_pre++;
3535                         action_cur = &actions_pre;
3536                         break;
3537                 case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP:
3538                 case RTE_FLOW_ACTION_TYPE_NVGRE_DECAP:
3539                         action_cur = &actions_pre;
3540                         break;
3541                 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
3542                         raw_encap = actions->conf;
3543                         if (raw_encap->size < MLX5_ENCAPSULATION_DECISION_SIZE)
3544                                 action_cur = &actions_pre;
3545                         break;
3546                 case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
3547                         raw_decap = actions->conf;
3548                         if (raw_decap->size > MLX5_ENCAPSULATION_DECISION_SIZE)
3549                                 action_cur = &actions_pre;
3550                         break;
3551                 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
3552                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
3553                         copy_vlan = true;
3554                         break;
3555                 default:
3556                         break;
3557                 }
3558                 if (!action_cur)
3559                         action_cur = &actions_sfx;
3560                 memcpy(*action_cur, actions, sizeof(struct rte_flow_action));
3561                 (*action_cur)++;
3562         }
3563         /* Add end action to the actions. */
3564         actions_sfx->type = RTE_FLOW_ACTION_TYPE_END;
3565         actions_pre->type = RTE_FLOW_ACTION_TYPE_END;
3566         actions_pre++;
3567         /* Set the tag. */
3568         set_tag = (void *)actions_pre;
3569         set_tag->id = mlx5_flow_get_reg_id(dev, MLX5_MTR_SFX, 0, &error);
3570         /*
3571          * Get the id from the qrss_pool to make qrss share the id with meter.
3572          */
3573         tag_id = flow_qrss_get_id(dev);
3574         set_tag->data = tag_id << MLX5_MTR_COLOR_BITS;
3575         assert(tag_action);
3576         tag_action->conf = set_tag;
3577         /* Prepare the suffix subflow items. */
3578         tag_item = sfx_items++;
3579         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
3580                 int item_type = items->type;
3581
3582                 switch (item_type) {
3583                 case RTE_FLOW_ITEM_TYPE_PORT_ID:
3584                         memcpy(sfx_items, items, sizeof(*sfx_items));
3585                         sfx_items++;
3586                         break;
3587                 case RTE_FLOW_ITEM_TYPE_VLAN:
3588                         if (copy_vlan) {
3589                                 memcpy(sfx_items, items, sizeof(*sfx_items));
3590                                 /*
3591                                  * Convert to internal match item, it is used
3592                                  * for vlan push and set vid.
3593                                  */
3594                                 sfx_items->type = MLX5_RTE_FLOW_ITEM_TYPE_VLAN;
3595                                 sfx_items++;
3596                         }
3597                         break;
3598                 default:
3599                         break;
3600                 }
3601         }
3602         sfx_items->type = RTE_FLOW_ITEM_TYPE_END;
3603         sfx_items++;
3604         tag_spec = (struct mlx5_rte_flow_item_tag *)sfx_items;
3605         tag_spec->data = tag_id << MLX5_MTR_COLOR_BITS;
3606         tag_spec->id = mlx5_flow_get_reg_id(dev, MLX5_MTR_SFX, 0, &error);
3607         tag_mask = tag_spec + 1;
3608         tag_mask->data = 0xffffff00;
3609         tag_item->type = MLX5_RTE_FLOW_ITEM_TYPE_TAG;
3610         tag_item->spec = tag_spec;
3611         tag_item->last = NULL;
3612         tag_item->mask = tag_mask;
3613         return tag_id;
3614 }
3615
3616 /**
3617  * Split action list having QUEUE/RSS for metadata register copy.
3618  *
3619  * Once Q/RSS action is detected in user's action list, the flow action
3620  * should be split in order to copy metadata registers, which will happen in
3621  * RX_CP_TBL like,
3622  *   - CQE->flow_tag := reg_c[1] (MARK)
3623  *   - CQE->flow_table_metadata (reg_b) := reg_c[0] (META)
3624  * The Q/RSS action will be performed on RX_ACT_TBL after passing by RX_CP_TBL.
3625  * This is because the last action of each flow must be a terminal action
3626  * (QUEUE, RSS or DROP).
3627  *
3628  * Flow ID must be allocated to identify actions in the RX_ACT_TBL and it is
3629  * stored and kept in the mlx5_flow structure per each sub_flow.
3630  *
3631  * The Q/RSS action is replaced with,
3632  *   - SET_TAG, setting the allocated flow ID to reg_c[2].
3633  * And the following JUMP action is added at the end,
3634  *   - JUMP, to RX_CP_TBL.
3635  *
3636  * A flow to perform remained Q/RSS action will be created in RX_ACT_TBL by
3637  * flow_create_split_metadata() routine. The flow will look like,
3638  *   - If flow ID matches (reg_c[2]), perform Q/RSS.
3639  *
3640  * @param dev
3641  *   Pointer to Ethernet device.
3642  * @param[out] split_actions
3643  *   Pointer to store split actions to jump to CP_TBL.
3644  * @param[in] actions
3645  *   Pointer to the list of original flow actions.
3646  * @param[in] qrss
3647  *   Pointer to the Q/RSS action.
3648  * @param[in] actions_n
3649  *   Number of original actions.
3650  * @param[out] error
3651  *   Perform verbose error reporting if not NULL.
3652  *
3653  * @return
3654  *   non-zero unique flow_id on success, otherwise 0 and
3655  *   error/rte_error are set.
3656  */
3657 static uint32_t
3658 flow_mreg_split_qrss_prep(struct rte_eth_dev *dev,
3659                           struct rte_flow_action *split_actions,
3660                           const struct rte_flow_action *actions,
3661                           const struct rte_flow_action *qrss,
3662                           int actions_n, struct rte_flow_error *error)
3663 {
3664         struct mlx5_rte_flow_action_set_tag *set_tag;
3665         struct rte_flow_action_jump *jump;
3666         const int qrss_idx = qrss - actions;
3667         uint32_t flow_id = 0;
3668         int ret = 0;
3669
3670         /*
3671          * Given actions will be split
3672          * - Replace QUEUE/RSS action with SET_TAG to set flow ID.
3673          * - Add jump to mreg CP_TBL.
3674          * As a result, there will be one more action.
3675          */
3676         ++actions_n;
3677         memcpy(split_actions, actions, sizeof(*split_actions) * actions_n);
3678         set_tag = (void *)(split_actions + actions_n);
3679         /*
3680          * If tag action is not set to void(it means we are not the meter
3681          * suffix flow), add the tag action. Since meter suffix flow already
3682          * has the tag added.
3683          */
3684         if (split_actions[qrss_idx].type != RTE_FLOW_ACTION_TYPE_VOID) {
3685                 /*
3686                  * Allocate the new subflow ID. This one is unique within
3687                  * device and not shared with representors. Otherwise,
3688                  * we would have to resolve multi-thread access synch
3689                  * issue. Each flow on the shared device is appended
3690                  * with source vport identifier, so the resulting
3691                  * flows will be unique in the shared (by master and
3692                  * representors) domain even if they have coinciding
3693                  * IDs.
3694                  */
3695                 flow_id = flow_qrss_get_id(dev);
3696                 if (!flow_id)
3697                         return rte_flow_error_set(error, ENOMEM,
3698                                                   RTE_FLOW_ERROR_TYPE_ACTION,
3699                                                   NULL, "can't allocate id "
3700                                                   "for split Q/RSS subflow");
3701                 /* Internal SET_TAG action to set flow ID. */
3702                 *set_tag = (struct mlx5_rte_flow_action_set_tag){
3703                         .data = flow_id,
3704                 };
3705                 ret = mlx5_flow_get_reg_id(dev, MLX5_COPY_MARK, 0, error);
3706                 if (ret < 0)
3707                         return ret;
3708                 set_tag->id = ret;
3709                 /* Construct new actions array. */
3710                 /* Replace QUEUE/RSS action. */
3711                 split_actions[qrss_idx] = (struct rte_flow_action){
3712                         .type = MLX5_RTE_FLOW_ACTION_TYPE_TAG,
3713                         .conf = set_tag,
3714                 };
3715         }
3716         /* JUMP action to jump to mreg copy table (CP_TBL). */
3717         jump = (void *)(set_tag + 1);
3718         *jump = (struct rte_flow_action_jump){
3719                 .group = MLX5_FLOW_MREG_CP_TABLE_GROUP,
3720         };
3721         split_actions[actions_n - 2] = (struct rte_flow_action){
3722                 .type = RTE_FLOW_ACTION_TYPE_JUMP,
3723                 .conf = jump,
3724         };
3725         split_actions[actions_n - 1] = (struct rte_flow_action){
3726                 .type = RTE_FLOW_ACTION_TYPE_END,
3727         };
3728         return flow_id;
3729 }
3730
3731 /**
3732  * Extend the given action list for Tx metadata copy.
3733  *
3734  * Copy the given action list to the ext_actions and add flow metadata register
3735  * copy action in order to copy reg_a set by WQE to reg_c[0].
3736  *
3737  * @param[out] ext_actions
3738  *   Pointer to the extended action list.
3739  * @param[in] actions
3740  *   Pointer to the list of actions.
3741  * @param[in] actions_n
3742  *   Number of actions in the list.
3743  * @param[out] error
3744  *   Perform verbose error reporting if not NULL.
3745  * @param[in] encap_idx
3746  *   The encap action inndex.
3747  *
3748  * @return
3749  *   0 on success, negative value otherwise
3750  */
3751 static int
3752 flow_mreg_tx_copy_prep(struct rte_eth_dev *dev,
3753                        struct rte_flow_action *ext_actions,
3754                        const struct rte_flow_action *actions,
3755                        int actions_n, struct rte_flow_error *error,
3756                        int encap_idx)
3757 {
3758         struct mlx5_flow_action_copy_mreg *cp_mreg =
3759                 (struct mlx5_flow_action_copy_mreg *)
3760                         (ext_actions + actions_n + 1);
3761         int ret;
3762
3763         ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_RX, 0, error);
3764         if (ret < 0)
3765                 return ret;
3766         cp_mreg->dst = ret;
3767         ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_TX, 0, error);
3768         if (ret < 0)
3769                 return ret;
3770         cp_mreg->src = ret;
3771         if (encap_idx != 0)
3772                 memcpy(ext_actions, actions, sizeof(*ext_actions) * encap_idx);
3773         if (encap_idx == actions_n - 1) {
3774                 ext_actions[actions_n - 1] = (struct rte_flow_action){
3775                         .type = MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
3776                         .conf = cp_mreg,
3777                 };
3778                 ext_actions[actions_n] = (struct rte_flow_action){
3779                         .type = RTE_FLOW_ACTION_TYPE_END,
3780                 };
3781         } else {
3782                 ext_actions[encap_idx] = (struct rte_flow_action){
3783                         .type = MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
3784                         .conf = cp_mreg,
3785                 };
3786                 memcpy(ext_actions + encap_idx + 1, actions + encap_idx,
3787                                 sizeof(*ext_actions) * (actions_n - encap_idx));
3788         }
3789         return 0;
3790 }
3791
3792 /**
3793  * The splitting for metadata feature.
3794  *
3795  * - Q/RSS action on NIC Rx should be split in order to pass by
3796  *   the mreg copy table (RX_CP_TBL) and then it jumps to the
3797  *   action table (RX_ACT_TBL) which has the split Q/RSS action.
3798  *
3799  * - All the actions on NIC Tx should have a mreg copy action to
3800  *   copy reg_a from WQE to reg_c[0].
3801  *
3802  * @param dev
3803  *   Pointer to Ethernet device.
3804  * @param[in] flow
3805  *   Parent flow structure pointer.
3806  * @param[in] prefix_layers
3807  *   Prefix flow layer flags.
3808  * @param[in] attr
3809  *   Flow rule attributes.
3810  * @param[in] items
3811  *   Pattern specification (list terminated by the END pattern item).
3812  * @param[in] actions
3813  *   Associated actions (list terminated by the END action).
3814  * @param[in] external
3815  *   This flow rule is created by request external to PMD.
3816  * @param[out] error
3817  *   Perform verbose error reporting if not NULL.
3818  * @return
3819  *   0 on success, negative value otherwise
3820  */
3821 static int
3822 flow_create_split_metadata(struct rte_eth_dev *dev,
3823                            struct rte_flow *flow,
3824                            uint64_t prefix_layers,
3825                            const struct rte_flow_attr *attr,
3826                            const struct rte_flow_item items[],
3827                            const struct rte_flow_action actions[],
3828                            bool external, struct rte_flow_error *error)
3829 {
3830         struct mlx5_priv *priv = dev->data->dev_private;
3831         struct mlx5_dev_config *config = &priv->config;
3832         const struct rte_flow_action *qrss = NULL;
3833         struct rte_flow_action *ext_actions = NULL;
3834         struct mlx5_flow *dev_flow = NULL;
3835         uint32_t qrss_id = 0;
3836         int mtr_sfx = 0;
3837         size_t act_size;
3838         int actions_n;
3839         int encap_idx;
3840         int ret;
3841
3842         /* Check whether extensive metadata feature is engaged. */
3843         if (!config->dv_flow_en ||
3844             config->dv_xmeta_en == MLX5_XMETA_MODE_LEGACY ||
3845             !mlx5_flow_ext_mreg_supported(dev))
3846                 return flow_create_split_inner(dev, flow, NULL, prefix_layers,
3847                                                attr, items, actions, external,
3848                                                error);
3849         actions_n = flow_parse_metadata_split_actions_info(actions, &qrss,
3850                                                            &encap_idx);
3851         if (qrss) {
3852                 /* Exclude hairpin flows from splitting. */
3853                 if (qrss->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
3854                         const struct rte_flow_action_queue *queue;
3855
3856                         queue = qrss->conf;
3857                         if (mlx5_rxq_get_type(dev, queue->index) ==
3858                             MLX5_RXQ_TYPE_HAIRPIN)
3859                                 qrss = NULL;
3860                 } else if (qrss->type == RTE_FLOW_ACTION_TYPE_RSS) {
3861                         const struct rte_flow_action_rss *rss;
3862
3863                         rss = qrss->conf;
3864                         if (mlx5_rxq_get_type(dev, rss->queue[0]) ==
3865                             MLX5_RXQ_TYPE_HAIRPIN)
3866                                 qrss = NULL;
3867                 }
3868         }
3869         if (qrss) {
3870                 /* Check if it is in meter suffix table. */
3871                 mtr_sfx = attr->group == (attr->transfer ?
3872                           (MLX5_FLOW_TABLE_LEVEL_SUFFIX - 1) :
3873                           MLX5_FLOW_TABLE_LEVEL_SUFFIX);
3874                 /*
3875                  * Q/RSS action on NIC Rx should be split in order to pass by
3876                  * the mreg copy table (RX_CP_TBL) and then it jumps to the
3877                  * action table (RX_ACT_TBL) which has the split Q/RSS action.
3878                  */
3879                 act_size = sizeof(struct rte_flow_action) * (actions_n + 1) +
3880                            sizeof(struct rte_flow_action_set_tag) +
3881                            sizeof(struct rte_flow_action_jump);
3882                 ext_actions = rte_zmalloc(__func__, act_size, 0);
3883                 if (!ext_actions)
3884                         return rte_flow_error_set(error, ENOMEM,
3885                                                   RTE_FLOW_ERROR_TYPE_ACTION,
3886                                                   NULL, "no memory to split "
3887                                                   "metadata flow");
3888                 /*
3889                  * If we are the suffix flow of meter, tag already exist.
3890                  * Set the tag action to void.
3891                  */
3892                 if (mtr_sfx)
3893                         ext_actions[qrss - actions].type =
3894                                                 RTE_FLOW_ACTION_TYPE_VOID;
3895                 else
3896                         ext_actions[qrss - actions].type =
3897                                                 MLX5_RTE_FLOW_ACTION_TYPE_TAG;
3898                 /*
3899                  * Create the new actions list with removed Q/RSS action
3900                  * and appended set tag and jump to register copy table
3901                  * (RX_CP_TBL). We should preallocate unique tag ID here
3902                  * in advance, because it is needed for set tag action.
3903                  */
3904                 qrss_id = flow_mreg_split_qrss_prep(dev, ext_actions, actions,
3905                                                     qrss, actions_n, error);
3906                 if (!mtr_sfx && !qrss_id) {
3907                         ret = -rte_errno;
3908                         goto exit;
3909                 }
3910         } else if (attr->egress && !attr->transfer) {
3911                 /*
3912                  * All the actions on NIC Tx should have a metadata register
3913                  * copy action to copy reg_a from WQE to reg_c[meta]
3914                  */
3915                 act_size = sizeof(struct rte_flow_action) * (actions_n + 1) +
3916                            sizeof(struct mlx5_flow_action_copy_mreg);
3917                 ext_actions = rte_zmalloc(__func__, act_size, 0);
3918                 if (!ext_actions)
3919                         return rte_flow_error_set(error, ENOMEM,
3920                                                   RTE_FLOW_ERROR_TYPE_ACTION,
3921                                                   NULL, "no memory to split "
3922                                                   "metadata flow");
3923                 /* Create the action list appended with copy register. */
3924                 ret = flow_mreg_tx_copy_prep(dev, ext_actions, actions,
3925                                              actions_n, error, encap_idx);
3926                 if (ret < 0)
3927                         goto exit;
3928         }
3929         /* Add the unmodified original or prefix subflow. */
3930         ret = flow_create_split_inner(dev, flow, &dev_flow, prefix_layers, attr,
3931                                       items, ext_actions ? ext_actions :
3932                                       actions, external, error);
3933         if (ret < 0)
3934                 goto exit;
3935         MLX5_ASSERT(dev_flow);
3936         if (qrss) {
3937                 const struct rte_flow_attr q_attr = {
3938                         .group = MLX5_FLOW_MREG_ACT_TABLE_GROUP,
3939                         .ingress = 1,
3940                 };
3941                 /* Internal PMD action to set register. */
3942                 struct mlx5_rte_flow_item_tag q_tag_spec = {
3943                         .data = qrss_id,
3944                         .id = 0,
3945                 };
3946                 struct rte_flow_item q_items[] = {
3947                         {
3948                                 .type = MLX5_RTE_FLOW_ITEM_TYPE_TAG,
3949                                 .spec = &q_tag_spec,
3950                                 .last = NULL,
3951                                 .mask = NULL,
3952                         },
3953                         {
3954                                 .type = RTE_FLOW_ITEM_TYPE_END,
3955                         },
3956                 };
3957                 struct rte_flow_action q_actions[] = {
3958                         {
3959                                 .type = qrss->type,
3960                                 .conf = qrss->conf,
3961                         },
3962                         {
3963                                 .type = RTE_FLOW_ACTION_TYPE_END,
3964                         },
3965                 };
3966                 uint64_t layers = flow_get_prefix_layer_flags(dev_flow);
3967
3968                 /*
3969                  * Configure the tag item only if there is no meter subflow.
3970                  * Since tag is already marked in the meter suffix subflow
3971                  * we can just use the meter suffix items as is.
3972                  */
3973                 if (qrss_id) {
3974                         /* Not meter subflow. */
3975                         MLX5_ASSERT(!mtr_sfx);
3976                         /*
3977                          * Put unique id in prefix flow due to it is destroyed
3978                          * after suffix flow and id will be freed after there
3979                          * is no actual flows with this id and identifier
3980                          * reallocation becomes possible (for example, for
3981                          * other flows in other threads).
3982                          */
3983                         dev_flow->handle->qrss_id = qrss_id;
3984                         ret = mlx5_flow_get_reg_id(dev, MLX5_COPY_MARK, 0,
3985                                                    error);
3986                         if (ret < 0)
3987                                 goto exit;
3988                         q_tag_spec.id = ret;
3989                 }
3990                 dev_flow = NULL;
3991                 /* Add suffix subflow to execute Q/RSS. */
3992                 ret = flow_create_split_inner(dev, flow, &dev_flow, layers,
3993                                               &q_attr, mtr_sfx ? items :
3994                                               q_items, q_actions,
3995                                               external, error);
3996                 if (ret < 0)
3997                         goto exit;
3998                 /* qrss ID should be freed if failed. */
3999                 qrss_id = 0;
4000                 MLX5_ASSERT(dev_flow);
4001         }
4002
4003 exit:
4004         /*
4005          * We do not destroy the partially created sub_flows in case of error.
4006          * These ones are included into parent flow list and will be destroyed
4007          * by flow_drv_destroy.
4008          */
4009         flow_qrss_free_id(dev, qrss_id);
4010         rte_free(ext_actions);
4011         return ret;
4012 }
4013
4014 /**
4015  * The splitting for meter feature.
4016  *
4017  * - The meter flow will be split to two flows as prefix and
4018  *   suffix flow. The packets make sense only it pass the prefix
4019  *   meter action.
4020  *
4021  * - Reg_C_5 is used for the packet to match betweend prefix and
4022  *   suffix flow.
4023  *
4024  * @param dev
4025  *   Pointer to Ethernet device.
4026  * @param[in] flow
4027  *   Parent flow structure pointer.
4028  * @param[in] attr
4029  *   Flow rule attributes.
4030  * @param[in] items
4031  *   Pattern specification (list terminated by the END pattern item).
4032  * @param[in] actions
4033  *   Associated actions (list terminated by the END action).
4034  * @param[in] external
4035  *   This flow rule is created by request external to PMD.
4036  * @param[out] error
4037  *   Perform verbose error reporting if not NULL.
4038  * @return
4039  *   0 on success, negative value otherwise
4040  */
4041 static int
4042 flow_create_split_meter(struct rte_eth_dev *dev,
4043                            struct rte_flow *flow,
4044                            const struct rte_flow_attr *attr,
4045                            const struct rte_flow_item items[],
4046                            const struct rte_flow_action actions[],
4047                            bool external, struct rte_flow_error *error)
4048 {
4049         struct mlx5_priv *priv = dev->data->dev_private;
4050         struct rte_flow_action *sfx_actions = NULL;
4051         struct rte_flow_action *pre_actions = NULL;
4052         struct rte_flow_item *sfx_items = NULL;
4053         struct mlx5_flow *dev_flow = NULL;
4054         struct rte_flow_attr sfx_attr = *attr;
4055         uint32_t mtr = 0;
4056         uint32_t mtr_tag_id = 0;
4057         size_t act_size;
4058         size_t item_size;
4059         int actions_n = 0;
4060         int ret;
4061
4062         if (priv->mtr_en)
4063                 actions_n = flow_check_meter_action(actions, &mtr);
4064         if (mtr) {
4065                 /* The five prefix actions: meter, decap, encap, tag, end. */
4066                 act_size = sizeof(struct rte_flow_action) * (actions_n + 5) +
4067                            sizeof(struct mlx5_rte_flow_action_set_tag);
4068                 /* tag, vlan, port id, end. */
4069 #define METER_SUFFIX_ITEM 4
4070                 item_size = sizeof(struct rte_flow_item) * METER_SUFFIX_ITEM +
4071                             sizeof(struct mlx5_rte_flow_item_tag) * 2;
4072                 sfx_actions = rte_zmalloc(__func__, (act_size + item_size), 0);
4073                 if (!sfx_actions)
4074                         return rte_flow_error_set(error, ENOMEM,
4075                                                   RTE_FLOW_ERROR_TYPE_ACTION,
4076                                                   NULL, "no memory to split "
4077                                                   "meter flow");
4078                 sfx_items = (struct rte_flow_item *)((char *)sfx_actions +
4079                              act_size);
4080                 pre_actions = sfx_actions + actions_n;
4081                 mtr_tag_id = flow_meter_split_prep(dev, items, sfx_items,
4082                                                    actions, sfx_actions,
4083                                                    pre_actions);
4084                 if (!mtr_tag_id) {
4085                         ret = -rte_errno;
4086                         goto exit;
4087                 }
4088                 /* Add the prefix subflow. */
4089                 ret = flow_create_split_inner(dev, flow, &dev_flow, 0, attr,
4090                                               items, pre_actions, external,
4091                                               error);
4092                 if (ret) {
4093                         ret = -rte_errno;
4094                         goto exit;
4095                 }
4096                 dev_flow->handle->mtr_flow_id = mtr_tag_id;
4097                 /* Setting the sfx group atrr. */
4098                 sfx_attr.group = sfx_attr.transfer ?
4099                                 (MLX5_FLOW_TABLE_LEVEL_SUFFIX - 1) :
4100                                  MLX5_FLOW_TABLE_LEVEL_SUFFIX;
4101         }
4102         /* Add the prefix subflow. */
4103         ret = flow_create_split_metadata(dev, flow, dev_flow ?
4104                                          flow_get_prefix_layer_flags(dev_flow) :
4105                                          0, &sfx_attr,
4106                                          sfx_items ? sfx_items : items,
4107                                          sfx_actions ? sfx_actions : actions,
4108                                          external, error);
4109 exit:
4110         if (sfx_actions)
4111                 rte_free(sfx_actions);
4112         return ret;
4113 }
4114
4115 /**
4116  * Split the flow to subflow set. The splitters might be linked
4117  * in the chain, like this:
4118  * flow_create_split_outer() calls:
4119  *   flow_create_split_meter() calls:
4120  *     flow_create_split_metadata(meter_subflow_0) calls:
4121  *       flow_create_split_inner(metadata_subflow_0)
4122  *       flow_create_split_inner(metadata_subflow_1)
4123  *       flow_create_split_inner(metadata_subflow_2)
4124  *     flow_create_split_metadata(meter_subflow_1) calls:
4125  *       flow_create_split_inner(metadata_subflow_0)
4126  *       flow_create_split_inner(metadata_subflow_1)
4127  *       flow_create_split_inner(metadata_subflow_2)
4128  *
4129  * This provide flexible way to add new levels of flow splitting.
4130  * The all of successfully created subflows are included to the
4131  * parent flow dev_flow list.
4132  *
4133  * @param dev
4134  *   Pointer to Ethernet device.
4135  * @param[in] flow
4136  *   Parent flow structure pointer.
4137  * @param[in] attr
4138  *   Flow rule attributes.
4139  * @param[in] items
4140  *   Pattern specification (list terminated by the END pattern item).
4141  * @param[in] actions
4142  *   Associated actions (list terminated by the END action).
4143  * @param[in] external
4144  *   This flow rule is created by request external to PMD.
4145  * @param[out] error
4146  *   Perform verbose error reporting if not NULL.
4147  * @return
4148  *   0 on success, negative value otherwise
4149  */
4150 static int
4151 flow_create_split_outer(struct rte_eth_dev *dev,
4152                         struct rte_flow *flow,
4153                         const struct rte_flow_attr *attr,
4154                         const struct rte_flow_item items[],
4155                         const struct rte_flow_action actions[],
4156                         bool external, struct rte_flow_error *error)
4157 {
4158         int ret;
4159
4160         ret = flow_create_split_meter(dev, flow, attr, items,
4161                                          actions, external, error);
4162         MLX5_ASSERT(ret <= 0);
4163         return ret;
4164 }
4165
4166 /**
4167  * Create a flow and add it to @p list.
4168  *
4169  * @param dev
4170  *   Pointer to Ethernet device.
4171  * @param list
4172  *   Pointer to a TAILQ flow list. If this parameter NULL,
4173  *   no list insertion occurred, flow is just created,
4174  *   this is caller's responsibility to track the
4175  *   created flow.
4176  * @param[in] attr
4177  *   Flow rule attributes.
4178  * @param[in] items
4179  *   Pattern specification (list terminated by the END pattern item).
4180  * @param[in] actions
4181  *   Associated actions (list terminated by the END action).
4182  * @param[in] external
4183  *   This flow rule is created by request external to PMD.
4184  * @param[out] error
4185  *   Perform verbose error reporting if not NULL.
4186  *
4187  * @return
4188  *   A flow on success, NULL otherwise and rte_errno is set.
4189  */
4190 static struct rte_flow *
4191 flow_list_create(struct rte_eth_dev *dev, struct mlx5_flows *list,
4192                  const struct rte_flow_attr *attr,
4193                  const struct rte_flow_item items[],
4194                  const struct rte_flow_action actions[],
4195                  bool external, struct rte_flow_error *error)
4196 {
4197         struct mlx5_priv *priv = dev->data->dev_private;
4198         struct rte_flow *flow = NULL;
4199         struct mlx5_flow *dev_flow;
4200         const struct rte_flow_action_rss *rss;
4201         union {
4202                 struct rte_flow_expand_rss buf;
4203                 uint8_t buffer[2048];
4204         } expand_buffer;
4205         union {
4206                 struct rte_flow_action actions[MLX5_MAX_SPLIT_ACTIONS];
4207                 uint8_t buffer[2048];
4208         } actions_rx;
4209         union {
4210                 struct rte_flow_action actions[MLX5_MAX_SPLIT_ACTIONS];
4211                 uint8_t buffer[2048];
4212         } actions_hairpin_tx;
4213         union {
4214                 struct rte_flow_item items[MLX5_MAX_SPLIT_ITEMS];
4215                 uint8_t buffer[2048];
4216         } items_tx;
4217         struct rte_flow_expand_rss *buf = &expand_buffer.buf;
4218         const struct rte_flow_action *p_actions_rx = actions;
4219         uint32_t i;
4220         uint32_t flow_size;
4221         int hairpin_flow = 0;
4222         uint32_t hairpin_id = 0;
4223         struct rte_flow_attr attr_tx = { .priority = 0 };
4224         int ret = flow_drv_validate(dev, attr, items, p_actions_rx, external,
4225                                     error);
4226
4227         if (ret < 0)
4228                 return NULL;
4229         hairpin_flow = flow_check_hairpin_split(dev, attr, actions);
4230         if (hairpin_flow > 0) {
4231                 if (hairpin_flow > MLX5_MAX_SPLIT_ACTIONS) {
4232                         rte_errno = EINVAL;
4233                         return NULL;
4234                 }
4235                 flow_hairpin_split(dev, actions, actions_rx.actions,
4236                                    actions_hairpin_tx.actions, items_tx.items,
4237                                    &hairpin_id);
4238                 p_actions_rx = actions_rx.actions;
4239         }
4240         flow_size = sizeof(struct rte_flow);
4241         rss = flow_get_rss_action(p_actions_rx);
4242         if (rss)
4243                 flow_size += RTE_ALIGN_CEIL(rss->queue_num * sizeof(uint16_t),
4244                                             sizeof(void *));
4245         else
4246                 flow_size += RTE_ALIGN_CEIL(sizeof(uint16_t), sizeof(void *));
4247         flow = rte_calloc(__func__, 1, flow_size, 0);
4248         if (!flow) {
4249                 rte_errno = ENOMEM;
4250                 goto error_before_flow;
4251         }
4252         flow->drv_type = flow_get_drv_type(dev, attr);
4253         if (hairpin_id != 0)
4254                 flow->hairpin_flow_id = hairpin_id;
4255         MLX5_ASSERT(flow->drv_type > MLX5_FLOW_TYPE_MIN &&
4256                     flow->drv_type < MLX5_FLOW_TYPE_MAX);
4257         flow->rss.queue = (void *)(flow + 1);
4258         if (rss) {
4259                 /*
4260                  * The following information is required by
4261                  * mlx5_flow_hashfields_adjust() in advance.
4262                  */
4263                 flow->rss.level = rss->level;
4264                 /* RSS type 0 indicates default RSS type (ETH_RSS_IP). */
4265                 flow->rss.types = !rss->types ? ETH_RSS_IP : rss->types;
4266         }
4267         LIST_INIT(&flow->dev_handles);
4268         if (rss && rss->types) {
4269                 unsigned int graph_root;
4270
4271                 graph_root = find_graph_root(items, rss->level);
4272                 ret = rte_flow_expand_rss(buf, sizeof(expand_buffer.buffer),
4273                                           items, rss->types,
4274                                           mlx5_support_expansion,
4275                                           graph_root);
4276                 MLX5_ASSERT(ret > 0 &&
4277                        (unsigned int)ret < sizeof(expand_buffer.buffer));
4278         } else {
4279                 buf->entries = 1;
4280                 buf->entry[0].pattern = (void *)(uintptr_t)items;
4281         }
4282         /* Reset device flow index to 0. */
4283         priv->flow_idx = 0;
4284         for (i = 0; i < buf->entries; ++i) {
4285                 /*
4286                  * The splitter may create multiple dev_flows,
4287                  * depending on configuration. In the simplest
4288                  * case it just creates unmodified original flow.
4289                  */
4290                 ret = flow_create_split_outer(dev, flow, attr,
4291                                               buf->entry[i].pattern,
4292                                               p_actions_rx, external,
4293                                               error);
4294                 if (ret < 0)
4295                         goto error;
4296         }
4297         /* Create the tx flow. */
4298         if (hairpin_flow) {
4299                 attr_tx.group = MLX5_HAIRPIN_TX_TABLE;
4300                 attr_tx.ingress = 0;
4301                 attr_tx.egress = 1;
4302                 dev_flow = flow_drv_prepare(dev, flow, &attr_tx, items_tx.items,
4303                                             actions_hairpin_tx.actions, error);
4304                 if (!dev_flow)
4305                         goto error;
4306                 dev_flow->flow = flow;
4307                 dev_flow->external = 0;
4308                 LIST_INSERT_HEAD(&flow->dev_handles, dev_flow->handle, next);
4309                 ret = flow_drv_translate(dev, dev_flow, &attr_tx,
4310                                          items_tx.items,
4311                                          actions_hairpin_tx.actions, error);
4312                 if (ret < 0)
4313                         goto error;
4314         }
4315         /*
4316          * Update the metadata register copy table. If extensive
4317          * metadata feature is enabled and registers are supported
4318          * we might create the extra rte_flow for each unique
4319          * MARK/FLAG action ID.
4320          *
4321          * The table is updated for ingress Flows only, because
4322          * the egress Flows belong to the different device and
4323          * copy table should be updated in peer NIC Rx domain.
4324          */
4325         if (attr->ingress &&
4326             (external || attr->group != MLX5_FLOW_MREG_CP_TABLE_GROUP)) {
4327                 ret = flow_mreg_update_copy_table(dev, flow, actions, error);
4328                 if (ret)
4329                         goto error;
4330         }
4331         if (dev->data->dev_started) {
4332                 ret = flow_drv_apply(dev, flow, error);
4333                 if (ret < 0)
4334                         goto error;
4335         }
4336         if (list)
4337                 TAILQ_INSERT_TAIL(list, flow, next);
4338         flow_rxq_flags_set(dev, flow);
4339         return flow;
4340 error_before_flow:
4341         if (hairpin_id)
4342                 mlx5_flow_id_release(priv->sh->flow_id_pool,
4343                                      hairpin_id);
4344         return NULL;
4345 error:
4346         MLX5_ASSERT(flow);
4347         flow_mreg_del_copy_action(dev, flow);
4348         ret = rte_errno; /* Save rte_errno before cleanup. */
4349         if (flow->hairpin_flow_id)
4350                 mlx5_flow_id_release(priv->sh->flow_id_pool,
4351                                      flow->hairpin_flow_id);
4352         MLX5_ASSERT(flow);
4353         flow_drv_destroy(dev, flow);
4354         rte_free(flow);
4355         rte_errno = ret; /* Restore rte_errno. */
4356         return NULL;
4357 }
4358
4359 /**
4360  * Create a dedicated flow rule on e-switch table 0 (root table), to direct all
4361  * incoming packets to table 1.
4362  *
4363  * Other flow rules, requested for group n, will be created in
4364  * e-switch table n+1.
4365  * Jump action to e-switch group n will be created to group n+1.
4366  *
4367  * Used when working in switchdev mode, to utilise advantages of table 1
4368  * and above.
4369  *
4370  * @param dev
4371  *   Pointer to Ethernet device.
4372  *
4373  * @return
4374  *   Pointer to flow on success, NULL otherwise and rte_errno is set.
4375  */
4376 struct rte_flow *
4377 mlx5_flow_create_esw_table_zero_flow(struct rte_eth_dev *dev)
4378 {
4379         const struct rte_flow_attr attr = {
4380                 .group = 0,
4381                 .priority = 0,
4382                 .ingress = 1,
4383                 .egress = 0,
4384                 .transfer = 1,
4385         };
4386         const struct rte_flow_item pattern = {
4387                 .type = RTE_FLOW_ITEM_TYPE_END,
4388         };
4389         struct rte_flow_action_jump jump = {
4390                 .group = 1,
4391         };
4392         const struct rte_flow_action actions[] = {
4393                 {
4394                         .type = RTE_FLOW_ACTION_TYPE_JUMP,
4395                         .conf = &jump,
4396                 },
4397                 {
4398                         .type = RTE_FLOW_ACTION_TYPE_END,
4399                 },
4400         };
4401         struct mlx5_priv *priv = dev->data->dev_private;
4402         struct rte_flow_error error;
4403
4404         return flow_list_create(dev, &priv->ctrl_flows, &attr, &pattern,
4405                                 actions, false, &error);
4406 }
4407
4408 /**
4409  * Create a flow.
4410  *
4411  * @see rte_flow_create()
4412  * @see rte_flow_ops
4413  */
4414 struct rte_flow *
4415 mlx5_flow_create(struct rte_eth_dev *dev,
4416                  const struct rte_flow_attr *attr,
4417                  const struct rte_flow_item items[],
4418                  const struct rte_flow_action actions[],
4419                  struct rte_flow_error *error)
4420 {
4421         struct mlx5_priv *priv = dev->data->dev_private;
4422
4423         return flow_list_create(dev, &priv->flows,
4424                                 attr, items, actions, true, error);
4425 }
4426
4427 /**
4428  * Destroy a flow in a list.
4429  *
4430  * @param dev
4431  *   Pointer to Ethernet device.
4432  * @param list
4433  *   Pointer to a TAILQ flow list. If this parameter NULL,
4434  *   there is no flow removal from the list.
4435  * @param[in] flow
4436  *   Flow to destroy.
4437  */
4438 static void
4439 flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list,
4440                   struct rte_flow *flow)
4441 {
4442         struct mlx5_priv *priv = dev->data->dev_private;
4443
4444         /*
4445          * Update RX queue flags only if port is started, otherwise it is
4446          * already clean.
4447          */
4448         if (dev->data->dev_started)
4449                 flow_rxq_flags_trim(dev, flow);
4450         if (flow->hairpin_flow_id)
4451                 mlx5_flow_id_release(priv->sh->flow_id_pool,
4452                                      flow->hairpin_flow_id);
4453         flow_drv_destroy(dev, flow);
4454         if (list)
4455                 TAILQ_REMOVE(list, flow, next);
4456         flow_mreg_del_copy_action(dev, flow);
4457         rte_free(flow->fdir);
4458         rte_free(flow);
4459 }
4460
4461 /**
4462  * Destroy all flows.
4463  *
4464  * @param dev
4465  *   Pointer to Ethernet device.
4466  * @param list
4467  *   Pointer to a TAILQ flow list.
4468  * @param active
4469  *   If flushing is called avtively.
4470  */
4471 void
4472 mlx5_flow_list_flush(struct rte_eth_dev *dev, struct mlx5_flows *list,
4473                      bool active)
4474 {
4475         uint32_t num_flushed = 0;
4476
4477         while (!TAILQ_EMPTY(list)) {
4478                 struct rte_flow *flow;
4479
4480                 flow = TAILQ_FIRST(list);
4481                 flow_list_destroy(dev, list, flow);
4482                 num_flushed++;
4483         }
4484         if (active) {
4485                 DRV_LOG(INFO, "port %u: %u flows flushed before stopping",
4486                         dev->data->port_id, num_flushed);
4487         }
4488 }
4489
4490 /**
4491  * Remove all flows.
4492  *
4493  * @param dev
4494  *   Pointer to Ethernet device.
4495  * @param list
4496  *   Pointer to a TAILQ flow list.
4497  */
4498 void
4499 mlx5_flow_stop(struct rte_eth_dev *dev, struct mlx5_flows *list)
4500 {
4501         struct rte_flow *flow;
4502
4503         TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {
4504                 flow_drv_remove(dev, flow);
4505                 flow_mreg_stop_copy_action(dev, flow);
4506         }
4507         flow_mreg_del_default_copy_action(dev);
4508         flow_rxq_flags_clear(dev);
4509 }
4510
4511 /**
4512  * Add all flows.
4513  *
4514  * @param dev
4515  *   Pointer to Ethernet device.
4516  * @param list
4517  *   Pointer to a TAILQ flow list.
4518  *
4519  * @return
4520  *   0 on success, a negative errno value otherwise and rte_errno is set.
4521  */
4522 int
4523 mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list)
4524 {
4525         struct rte_flow *flow;
4526         struct rte_flow_error error;
4527         int ret = 0;
4528
4529         /* Make sure default copy action (reg_c[0] -> reg_b) is created. */
4530         ret = flow_mreg_add_default_copy_action(dev, &error);
4531         if (ret < 0)
4532                 return -rte_errno;
4533         /* Apply Flows created by application. */
4534         TAILQ_FOREACH(flow, list, next) {
4535                 ret = flow_mreg_start_copy_action(dev, flow);
4536                 if (ret < 0)
4537                         goto error;
4538                 ret = flow_drv_apply(dev, flow, &error);
4539                 if (ret < 0)
4540                         goto error;
4541                 flow_rxq_flags_set(dev, flow);
4542         }
4543         return 0;
4544 error:
4545         ret = rte_errno; /* Save rte_errno before cleanup. */
4546         mlx5_flow_stop(dev, list);
4547         rte_errno = ret; /* Restore rte_errno. */
4548         return -rte_errno;
4549 }
4550
4551 /**
4552  * Stop all default actions for flows.
4553  *
4554  * @param dev
4555  *   Pointer to Ethernet device.
4556  */
4557 void
4558 mlx5_flow_stop_default(struct rte_eth_dev *dev)
4559 {
4560         flow_mreg_del_default_copy_action(dev);
4561 }
4562
4563 /**
4564  * Start all default actions for flows.
4565  *
4566  * @param dev
4567  *   Pointer to Ethernet device.
4568  * @return
4569  *   0 on success, a negative errno value otherwise and rte_errno is set.
4570  */
4571 int
4572 mlx5_flow_start_default(struct rte_eth_dev *dev)
4573 {
4574         struct rte_flow_error error;
4575
4576         /* Make sure default copy action (reg_c[0] -> reg_b) is created. */
4577         return flow_mreg_add_default_copy_action(dev, &error);
4578 }
4579
4580 /**
4581  * Allocate intermediate resources for flow creation.
4582  *
4583  * @param dev
4584  *   Pointer to Ethernet device.
4585  */
4586 void
4587 mlx5_flow_alloc_intermediate(struct rte_eth_dev *dev)
4588 {
4589         struct mlx5_priv *priv = dev->data->dev_private;
4590
4591         if (!priv->inter_flows)
4592                 priv->inter_flows = rte_calloc(__func__, MLX5_NUM_MAX_DEV_FLOWS,
4593                                                sizeof(struct mlx5_flow), 0);
4594 }
4595
4596 /**
4597  * Free intermediate resources for flows.
4598  *
4599  * @param dev
4600  *   Pointer to Ethernet device.
4601  */
4602 void
4603 mlx5_flow_free_intermediate(struct rte_eth_dev *dev)
4604 {
4605         struct mlx5_priv *priv = dev->data->dev_private;
4606
4607         rte_free(priv->inter_flows);
4608         priv->inter_flows = NULL;
4609 }
4610
4611 /**
4612  * Verify the flow list is empty
4613  *
4614  * @param dev
4615  *  Pointer to Ethernet device.
4616  *
4617  * @return the number of flows not released.
4618  */
4619 int
4620 mlx5_flow_verify(struct rte_eth_dev *dev)
4621 {
4622         struct mlx5_priv *priv = dev->data->dev_private;
4623         struct rte_flow *flow;
4624         int ret = 0;
4625
4626         TAILQ_FOREACH(flow, &priv->flows, next) {
4627                 DRV_LOG(DEBUG, "port %u flow %p still referenced",
4628                         dev->data->port_id, (void *)flow);
4629                 ++ret;
4630         }
4631         return ret;
4632 }
4633
4634 /**
4635  * Enable default hairpin egress flow.
4636  *
4637  * @param dev
4638  *   Pointer to Ethernet device.
4639  * @param queue
4640  *   The queue index.
4641  *
4642  * @return
4643  *   0 on success, a negative errno value otherwise and rte_errno is set.
4644  */
4645 int
4646 mlx5_ctrl_flow_source_queue(struct rte_eth_dev *dev,
4647                             uint32_t queue)
4648 {
4649         struct mlx5_priv *priv = dev->data->dev_private;
4650         const struct rte_flow_attr attr = {
4651                 .egress = 1,
4652                 .priority = 0,
4653         };
4654         struct mlx5_rte_flow_item_tx_queue queue_spec = {
4655                 .queue = queue,
4656         };
4657         struct mlx5_rte_flow_item_tx_queue queue_mask = {
4658                 .queue = UINT32_MAX,
4659         };
4660         struct rte_flow_item items[] = {
4661                 {
4662                         .type = MLX5_RTE_FLOW_ITEM_TYPE_TX_QUEUE,
4663                         .spec = &queue_spec,
4664                         .last = NULL,
4665                         .mask = &queue_mask,
4666                 },
4667                 {
4668                         .type = RTE_FLOW_ITEM_TYPE_END,
4669                 },
4670         };
4671         struct rte_flow_action_jump jump = {
4672                 .group = MLX5_HAIRPIN_TX_TABLE,
4673         };
4674         struct rte_flow_action actions[2];
4675         struct rte_flow *flow;
4676         struct rte_flow_error error;
4677
4678         actions[0].type = RTE_FLOW_ACTION_TYPE_JUMP;
4679         actions[0].conf = &jump;
4680         actions[1].type = RTE_FLOW_ACTION_TYPE_END;
4681         flow = flow_list_create(dev, &priv->ctrl_flows,
4682                                 &attr, items, actions, false, &error);
4683         if (!flow) {
4684                 DRV_LOG(DEBUG,
4685                         "Failed to create ctrl flow: rte_errno(%d),"
4686                         " type(%d), message(%s)",
4687                         rte_errno, error.type,
4688                         error.message ? error.message : " (no stated reason)");
4689                 return -rte_errno;
4690         }
4691         return 0;
4692 }
4693
4694 /**
4695  * Enable a control flow configured from the control plane.
4696  *
4697  * @param dev
4698  *   Pointer to Ethernet device.
4699  * @param eth_spec
4700  *   An Ethernet flow spec to apply.
4701  * @param eth_mask
4702  *   An Ethernet flow mask to apply.
4703  * @param vlan_spec
4704  *   A VLAN flow spec to apply.
4705  * @param vlan_mask
4706  *   A VLAN flow mask to apply.
4707  *
4708  * @return
4709  *   0 on success, a negative errno value otherwise and rte_errno is set.
4710  */
4711 int
4712 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
4713                     struct rte_flow_item_eth *eth_spec,
4714                     struct rte_flow_item_eth *eth_mask,
4715                     struct rte_flow_item_vlan *vlan_spec,
4716                     struct rte_flow_item_vlan *vlan_mask)
4717 {
4718         struct mlx5_priv *priv = dev->data->dev_private;
4719         const struct rte_flow_attr attr = {
4720                 .ingress = 1,
4721                 .priority = MLX5_FLOW_PRIO_RSVD,
4722         };
4723         struct rte_flow_item items[] = {
4724                 {
4725                         .type = RTE_FLOW_ITEM_TYPE_ETH,
4726                         .spec = eth_spec,
4727                         .last = NULL,
4728                         .mask = eth_mask,
4729                 },
4730                 {
4731                         .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
4732                                               RTE_FLOW_ITEM_TYPE_END,
4733                         .spec = vlan_spec,
4734                         .last = NULL,
4735                         .mask = vlan_mask,
4736                 },
4737                 {
4738                         .type = RTE_FLOW_ITEM_TYPE_END,
4739                 },
4740         };
4741         uint16_t queue[priv->reta_idx_n];
4742         struct rte_flow_action_rss action_rss = {
4743                 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
4744                 .level = 0,
4745                 .types = priv->rss_conf.rss_hf,
4746                 .key_len = priv->rss_conf.rss_key_len,
4747                 .queue_num = priv->reta_idx_n,
4748                 .key = priv->rss_conf.rss_key,
4749                 .queue = queue,
4750         };
4751         struct rte_flow_action actions[] = {
4752                 {
4753                         .type = RTE_FLOW_ACTION_TYPE_RSS,
4754                         .conf = &action_rss,
4755                 },
4756                 {
4757                         .type = RTE_FLOW_ACTION_TYPE_END,
4758                 },
4759         };
4760         struct rte_flow *flow;
4761         struct rte_flow_error error;
4762         unsigned int i;
4763
4764         if (!priv->reta_idx_n || !priv->rxqs_n) {
4765                 return 0;
4766         }
4767         for (i = 0; i != priv->reta_idx_n; ++i)
4768                 queue[i] = (*priv->reta_idx)[i];
4769         flow = flow_list_create(dev, &priv->ctrl_flows,
4770                                 &attr, items, actions, false, &error);
4771         if (!flow)
4772                 return -rte_errno;
4773         return 0;
4774 }
4775
4776 /**
4777  * Enable a flow control configured from the control plane.
4778  *
4779  * @param dev
4780  *   Pointer to Ethernet device.
4781  * @param eth_spec
4782  *   An Ethernet flow spec to apply.
4783  * @param eth_mask
4784  *   An Ethernet flow mask to apply.
4785  *
4786  * @return
4787  *   0 on success, a negative errno value otherwise and rte_errno is set.
4788  */
4789 int
4790 mlx5_ctrl_flow(struct rte_eth_dev *dev,
4791                struct rte_flow_item_eth *eth_spec,
4792                struct rte_flow_item_eth *eth_mask)
4793 {
4794         return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
4795 }
4796
4797 /**
4798  * Destroy a flow.
4799  *
4800  * @see rte_flow_destroy()
4801  * @see rte_flow_ops
4802  */
4803 int
4804 mlx5_flow_destroy(struct rte_eth_dev *dev,
4805                   struct rte_flow *flow,
4806                   struct rte_flow_error *error __rte_unused)
4807 {
4808         struct mlx5_priv *priv = dev->data->dev_private;
4809
4810         flow_list_destroy(dev, &priv->flows, flow);
4811         return 0;
4812 }
4813
4814 /**
4815  * Destroy all flows.
4816  *
4817  * @see rte_flow_flush()
4818  * @see rte_flow_ops
4819  */
4820 int
4821 mlx5_flow_flush(struct rte_eth_dev *dev,
4822                 struct rte_flow_error *error __rte_unused)
4823 {
4824         struct mlx5_priv *priv = dev->data->dev_private;
4825
4826         mlx5_flow_list_flush(dev, &priv->flows, false);
4827         return 0;
4828 }
4829
4830 /**
4831  * Isolated mode.
4832  *
4833  * @see rte_flow_isolate()
4834  * @see rte_flow_ops
4835  */
4836 int
4837 mlx5_flow_isolate(struct rte_eth_dev *dev,
4838                   int enable,
4839                   struct rte_flow_error *error)
4840 {
4841         struct mlx5_priv *priv = dev->data->dev_private;
4842
4843         if (dev->data->dev_started) {
4844                 rte_flow_error_set(error, EBUSY,
4845                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
4846                                    NULL,
4847                                    "port must be stopped first");
4848                 return -rte_errno;
4849         }
4850         priv->isolated = !!enable;
4851         if (enable)
4852                 dev->dev_ops = &mlx5_dev_ops_isolate;
4853         else
4854                 dev->dev_ops = &mlx5_dev_ops;
4855         return 0;
4856 }
4857
4858 /**
4859  * Query a flow.
4860  *
4861  * @see rte_flow_query()
4862  * @see rte_flow_ops
4863  */
4864 static int
4865 flow_drv_query(struct rte_eth_dev *dev,
4866                struct rte_flow *flow,
4867                const struct rte_flow_action *actions,
4868                void *data,
4869                struct rte_flow_error *error)
4870 {
4871         const struct mlx5_flow_driver_ops *fops;
4872         enum mlx5_flow_drv_type ftype = flow->drv_type;
4873
4874         MLX5_ASSERT(ftype > MLX5_FLOW_TYPE_MIN && ftype < MLX5_FLOW_TYPE_MAX);
4875         fops = flow_get_drv_ops(ftype);
4876
4877         return fops->query(dev, flow, actions, data, error);
4878 }
4879
4880 /**
4881  * Query a flow.
4882  *
4883  * @see rte_flow_query()
4884  * @see rte_flow_ops
4885  */
4886 int
4887 mlx5_flow_query(struct rte_eth_dev *dev,
4888                 struct rte_flow *flow,
4889                 const struct rte_flow_action *actions,
4890                 void *data,
4891                 struct rte_flow_error *error)
4892 {
4893         int ret;
4894
4895         ret = flow_drv_query(dev, flow, actions, data, error);
4896         if (ret < 0)
4897                 return ret;
4898         return 0;
4899 }
4900
4901 /**
4902  * Convert a flow director filter to a generic flow.
4903  *
4904  * @param dev
4905  *   Pointer to Ethernet device.
4906  * @param fdir_filter
4907  *   Flow director filter to add.
4908  * @param attributes
4909  *   Generic flow parameters structure.
4910  *
4911  * @return
4912  *   0 on success, a negative errno value otherwise and rte_errno is set.
4913  */
4914 static int
4915 flow_fdir_filter_convert(struct rte_eth_dev *dev,
4916                          const struct rte_eth_fdir_filter *fdir_filter,
4917                          struct mlx5_fdir *attributes)
4918 {
4919         struct mlx5_priv *priv = dev->data->dev_private;
4920         const struct rte_eth_fdir_input *input = &fdir_filter->input;
4921         const struct rte_eth_fdir_masks *mask =
4922                 &dev->data->dev_conf.fdir_conf.mask;
4923
4924         /* Validate queue number. */
4925         if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
4926                 DRV_LOG(ERR, "port %u invalid queue number %d",
4927                         dev->data->port_id, fdir_filter->action.rx_queue);
4928                 rte_errno = EINVAL;
4929                 return -rte_errno;
4930         }
4931         attributes->attr.ingress = 1;
4932         attributes->items[0] = (struct rte_flow_item) {
4933                 .type = RTE_FLOW_ITEM_TYPE_ETH,
4934                 .spec = &attributes->l2,
4935                 .mask = &attributes->l2_mask,
4936         };
4937         switch (fdir_filter->action.behavior) {
4938         case RTE_ETH_FDIR_ACCEPT:
4939                 attributes->actions[0] = (struct rte_flow_action){
4940                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
4941                         .conf = &attributes->queue,
4942                 };
4943                 break;
4944         case RTE_ETH_FDIR_REJECT:
4945                 attributes->actions[0] = (struct rte_flow_action){
4946                         .type = RTE_FLOW_ACTION_TYPE_DROP,
4947                 };
4948                 break;
4949         default:
4950                 DRV_LOG(ERR, "port %u invalid behavior %d",
4951                         dev->data->port_id,
4952                         fdir_filter->action.behavior);
4953                 rte_errno = ENOTSUP;
4954                 return -rte_errno;
4955         }
4956         attributes->queue.index = fdir_filter->action.rx_queue;
4957         /* Handle L3. */
4958         switch (fdir_filter->input.flow_type) {
4959         case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
4960         case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
4961         case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
4962                 attributes->l3.ipv4.hdr = (struct rte_ipv4_hdr){
4963                         .src_addr = input->flow.ip4_flow.src_ip,
4964                         .dst_addr = input->flow.ip4_flow.dst_ip,
4965                         .time_to_live = input->flow.ip4_flow.ttl,
4966                         .type_of_service = input->flow.ip4_flow.tos,
4967                 };
4968                 attributes->l3_mask.ipv4.hdr = (struct rte_ipv4_hdr){
4969                         .src_addr = mask->ipv4_mask.src_ip,
4970                         .dst_addr = mask->ipv4_mask.dst_ip,
4971                         .time_to_live = mask->ipv4_mask.ttl,
4972                         .type_of_service = mask->ipv4_mask.tos,
4973                         .next_proto_id = mask->ipv4_mask.proto,
4974                 };
4975                 attributes->items[1] = (struct rte_flow_item){
4976                         .type = RTE_FLOW_ITEM_TYPE_IPV4,
4977                         .spec = &attributes->l3,
4978                         .mask = &attributes->l3_mask,
4979                 };
4980                 break;
4981         case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
4982         case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
4983         case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
4984                 attributes->l3.ipv6.hdr = (struct rte_ipv6_hdr){
4985                         .hop_limits = input->flow.ipv6_flow.hop_limits,
4986                         .proto = input->flow.ipv6_flow.proto,
4987                 };
4988
4989                 memcpy(attributes->l3.ipv6.hdr.src_addr,
4990                        input->flow.ipv6_flow.src_ip,
4991                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
4992                 memcpy(attributes->l3.ipv6.hdr.dst_addr,
4993                        input->flow.ipv6_flow.dst_ip,
4994                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
4995                 memcpy(attributes->l3_mask.ipv6.hdr.src_addr,
4996                        mask->ipv6_mask.src_ip,
4997                        RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
4998                 memcpy(attributes->l3_mask.ipv6.hdr.dst_addr,
4999                        mask->ipv6_mask.dst_ip,
5000                        RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
5001                 attributes->items[1] = (struct rte_flow_item){
5002                         .type = RTE_FLOW_ITEM_TYPE_IPV6,
5003                         .spec = &attributes->l3,
5004                         .mask = &attributes->l3_mask,
5005                 };
5006                 break;
5007         default:
5008                 DRV_LOG(ERR, "port %u invalid flow type%d",
5009                         dev->data->port_id, fdir_filter->input.flow_type);
5010                 rte_errno = ENOTSUP;
5011                 return -rte_errno;
5012         }
5013         /* Handle L4. */
5014         switch (fdir_filter->input.flow_type) {
5015         case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
5016                 attributes->l4.udp.hdr = (struct rte_udp_hdr){
5017                         .src_port = input->flow.udp4_flow.src_port,
5018                         .dst_port = input->flow.udp4_flow.dst_port,
5019                 };
5020                 attributes->l4_mask.udp.hdr = (struct rte_udp_hdr){
5021                         .src_port = mask->src_port_mask,
5022                         .dst_port = mask->dst_port_mask,
5023                 };
5024                 attributes->items[2] = (struct rte_flow_item){
5025                         .type = RTE_FLOW_ITEM_TYPE_UDP,
5026                         .spec = &attributes->l4,
5027                         .mask = &attributes->l4_mask,
5028                 };
5029                 break;
5030         case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
5031                 attributes->l4.tcp.hdr = (struct rte_tcp_hdr){
5032                         .src_port = input->flow.tcp4_flow.src_port,
5033                         .dst_port = input->flow.tcp4_flow.dst_port,
5034                 };
5035                 attributes->l4_mask.tcp.hdr = (struct rte_tcp_hdr){
5036                         .src_port = mask->src_port_mask,
5037                         .dst_port = mask->dst_port_mask,
5038                 };
5039                 attributes->items[2] = (struct rte_flow_item){
5040                         .type = RTE_FLOW_ITEM_TYPE_TCP,
5041                         .spec = &attributes->l4,
5042                         .mask = &attributes->l4_mask,
5043                 };
5044                 break;
5045         case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
5046                 attributes->l4.udp.hdr = (struct rte_udp_hdr){
5047                         .src_port = input->flow.udp6_flow.src_port,
5048                         .dst_port = input->flow.udp6_flow.dst_port,
5049                 };
5050                 attributes->l4_mask.udp.hdr = (struct rte_udp_hdr){
5051                         .src_port = mask->src_port_mask,
5052                         .dst_port = mask->dst_port_mask,
5053                 };
5054                 attributes->items[2] = (struct rte_flow_item){
5055                         .type = RTE_FLOW_ITEM_TYPE_UDP,
5056                         .spec = &attributes->l4,
5057                         .mask = &attributes->l4_mask,
5058                 };
5059                 break;
5060         case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
5061                 attributes->l4.tcp.hdr = (struct rte_tcp_hdr){
5062                         .src_port = input->flow.tcp6_flow.src_port,
5063                         .dst_port = input->flow.tcp6_flow.dst_port,
5064                 };
5065                 attributes->l4_mask.tcp.hdr = (struct rte_tcp_hdr){
5066                         .src_port = mask->src_port_mask,
5067                         .dst_port = mask->dst_port_mask,
5068                 };
5069                 attributes->items[2] = (struct rte_flow_item){
5070                         .type = RTE_FLOW_ITEM_TYPE_TCP,
5071                         .spec = &attributes->l4,
5072                         .mask = &attributes->l4_mask,
5073                 };
5074                 break;
5075         case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
5076         case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
5077                 break;
5078         default:
5079                 DRV_LOG(ERR, "port %u invalid flow type%d",
5080                         dev->data->port_id, fdir_filter->input.flow_type);
5081                 rte_errno = ENOTSUP;
5082                 return -rte_errno;
5083         }
5084         return 0;
5085 }
5086
5087 #define FLOW_FDIR_CMP(f1, f2, fld) \
5088         memcmp(&(f1)->fld, &(f2)->fld, sizeof(f1->fld))
5089
5090 /**
5091  * Compare two FDIR flows. If items and actions are identical, the two flows are
5092  * regarded as same.
5093  *
5094  * @param dev
5095  *   Pointer to Ethernet device.
5096  * @param f1
5097  *   FDIR flow to compare.
5098  * @param f2
5099  *   FDIR flow to compare.
5100  *
5101  * @return
5102  *   Zero on match, 1 otherwise.
5103  */
5104 static int
5105 flow_fdir_cmp(const struct mlx5_fdir *f1, const struct mlx5_fdir *f2)
5106 {
5107         if (FLOW_FDIR_CMP(f1, f2, attr) ||
5108             FLOW_FDIR_CMP(f1, f2, l2) ||
5109             FLOW_FDIR_CMP(f1, f2, l2_mask) ||
5110             FLOW_FDIR_CMP(f1, f2, l3) ||
5111             FLOW_FDIR_CMP(f1, f2, l3_mask) ||
5112             FLOW_FDIR_CMP(f1, f2, l4) ||
5113             FLOW_FDIR_CMP(f1, f2, l4_mask) ||
5114             FLOW_FDIR_CMP(f1, f2, actions[0].type))
5115                 return 1;
5116         if (f1->actions[0].type == RTE_FLOW_ACTION_TYPE_QUEUE &&
5117             FLOW_FDIR_CMP(f1, f2, queue))
5118                 return 1;
5119         return 0;
5120 }
5121
5122 /**
5123  * Search device flow list to find out a matched FDIR flow.
5124  *
5125  * @param dev
5126  *   Pointer to Ethernet device.
5127  * @param fdir_flow
5128  *   FDIR flow to lookup.
5129  *
5130  * @return
5131  *   Pointer of flow if found, NULL otherwise.
5132  */
5133 static struct rte_flow *
5134 flow_fdir_filter_lookup(struct rte_eth_dev *dev, struct mlx5_fdir *fdir_flow)
5135 {
5136         struct mlx5_priv *priv = dev->data->dev_private;
5137         struct rte_flow *flow = NULL;
5138
5139         MLX5_ASSERT(fdir_flow);
5140         TAILQ_FOREACH(flow, &priv->flows, next) {
5141                 if (flow->fdir && !flow_fdir_cmp(flow->fdir, fdir_flow)) {
5142                         DRV_LOG(DEBUG, "port %u found FDIR flow %p",
5143                                 dev->data->port_id, (void *)flow);
5144                         break;
5145                 }
5146         }
5147         return flow;
5148 }
5149
5150 /**
5151  * Add new flow director filter and store it in list.
5152  *
5153  * @param dev
5154  *   Pointer to Ethernet device.
5155  * @param fdir_filter
5156  *   Flow director filter to add.
5157  *
5158  * @return
5159  *   0 on success, a negative errno value otherwise and rte_errno is set.
5160  */
5161 static int
5162 flow_fdir_filter_add(struct rte_eth_dev *dev,
5163                      const struct rte_eth_fdir_filter *fdir_filter)
5164 {
5165         struct mlx5_priv *priv = dev->data->dev_private;
5166         struct mlx5_fdir *fdir_flow;
5167         struct rte_flow *flow;
5168         int ret;
5169
5170         fdir_flow = rte_zmalloc(__func__, sizeof(*fdir_flow), 0);
5171         if (!fdir_flow) {
5172                 rte_errno = ENOMEM;
5173                 return -rte_errno;
5174         }
5175         ret = flow_fdir_filter_convert(dev, fdir_filter, fdir_flow);
5176         if (ret)
5177                 goto error;
5178         flow = flow_fdir_filter_lookup(dev, fdir_flow);
5179         if (flow) {
5180                 rte_errno = EEXIST;
5181                 goto error;
5182         }
5183         flow = flow_list_create(dev, &priv->flows, &fdir_flow->attr,
5184                                 fdir_flow->items, fdir_flow->actions, true,
5185                                 NULL);
5186         if (!flow)
5187                 goto error;
5188         MLX5_ASSERT(!flow->fdir);
5189         flow->fdir = fdir_flow;
5190         DRV_LOG(DEBUG, "port %u created FDIR flow %p",
5191                 dev->data->port_id, (void *)flow);
5192         return 0;
5193 error:
5194         rte_free(fdir_flow);
5195         return -rte_errno;
5196 }
5197
5198 /**
5199  * Delete specific filter.
5200  *
5201  * @param dev
5202  *   Pointer to Ethernet device.
5203  * @param fdir_filter
5204  *   Filter to be deleted.
5205  *
5206  * @return
5207  *   0 on success, a negative errno value otherwise and rte_errno is set.
5208  */
5209 static int
5210 flow_fdir_filter_delete(struct rte_eth_dev *dev,
5211                         const struct rte_eth_fdir_filter *fdir_filter)
5212 {
5213         struct mlx5_priv *priv = dev->data->dev_private;
5214         struct rte_flow *flow;
5215         struct mlx5_fdir fdir_flow = {
5216                 .attr.group = 0,
5217         };
5218         int ret;
5219
5220         ret = flow_fdir_filter_convert(dev, fdir_filter, &fdir_flow);
5221         if (ret)
5222                 return -rte_errno;
5223         flow = flow_fdir_filter_lookup(dev, &fdir_flow);
5224         if (!flow) {
5225                 rte_errno = ENOENT;
5226                 return -rte_errno;
5227         }
5228         flow_list_destroy(dev, &priv->flows, flow);
5229         DRV_LOG(DEBUG, "port %u deleted FDIR flow %p",
5230                 dev->data->port_id, (void *)flow);
5231         return 0;
5232 }
5233
5234 /**
5235  * Update queue for specific filter.
5236  *
5237  * @param dev
5238  *   Pointer to Ethernet device.
5239  * @param fdir_filter
5240  *   Filter to be updated.
5241  *
5242  * @return
5243  *   0 on success, a negative errno value otherwise and rte_errno is set.
5244  */
5245 static int
5246 flow_fdir_filter_update(struct rte_eth_dev *dev,
5247                         const struct rte_eth_fdir_filter *fdir_filter)
5248 {
5249         int ret;
5250
5251         ret = flow_fdir_filter_delete(dev, fdir_filter);
5252         if (ret)
5253                 return ret;
5254         return flow_fdir_filter_add(dev, fdir_filter);
5255 }
5256
5257 /**
5258  * Flush all filters.
5259  *
5260  * @param dev
5261  *   Pointer to Ethernet device.
5262  */
5263 static void
5264 flow_fdir_filter_flush(struct rte_eth_dev *dev)
5265 {
5266         struct mlx5_priv *priv = dev->data->dev_private;
5267
5268         mlx5_flow_list_flush(dev, &priv->flows, false);
5269 }
5270
5271 /**
5272  * Get flow director information.
5273  *
5274  * @param dev
5275  *   Pointer to Ethernet device.
5276  * @param[out] fdir_info
5277  *   Resulting flow director information.
5278  */
5279 static void
5280 flow_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info)
5281 {
5282         struct rte_eth_fdir_masks *mask =
5283                 &dev->data->dev_conf.fdir_conf.mask;
5284
5285         fdir_info->mode = dev->data->dev_conf.fdir_conf.mode;
5286         fdir_info->guarant_spc = 0;
5287         rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
5288         fdir_info->max_flexpayload = 0;
5289         fdir_info->flow_types_mask[0] = 0;
5290         fdir_info->flex_payload_unit = 0;
5291         fdir_info->max_flex_payload_segment_num = 0;
5292         fdir_info->flex_payload_limit = 0;
5293         memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
5294 }
5295
5296 /**
5297  * Deal with flow director operations.
5298  *
5299  * @param dev
5300  *   Pointer to Ethernet device.
5301  * @param filter_op
5302  *   Operation to perform.
5303  * @param arg
5304  *   Pointer to operation-specific structure.
5305  *
5306  * @return
5307  *   0 on success, a negative errno value otherwise and rte_errno is set.
5308  */
5309 static int
5310 flow_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op,
5311                     void *arg)
5312 {
5313         enum rte_fdir_mode fdir_mode =
5314                 dev->data->dev_conf.fdir_conf.mode;
5315
5316         if (filter_op == RTE_ETH_FILTER_NOP)
5317                 return 0;
5318         if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
5319             fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
5320                 DRV_LOG(ERR, "port %u flow director mode %d not supported",
5321                         dev->data->port_id, fdir_mode);
5322                 rte_errno = EINVAL;
5323                 return -rte_errno;
5324         }
5325         switch (filter_op) {
5326         case RTE_ETH_FILTER_ADD:
5327                 return flow_fdir_filter_add(dev, arg);
5328         case RTE_ETH_FILTER_UPDATE:
5329                 return flow_fdir_filter_update(dev, arg);
5330         case RTE_ETH_FILTER_DELETE:
5331                 return flow_fdir_filter_delete(dev, arg);
5332         case RTE_ETH_FILTER_FLUSH:
5333                 flow_fdir_filter_flush(dev);
5334                 break;
5335         case RTE_ETH_FILTER_INFO:
5336                 flow_fdir_info_get(dev, arg);
5337                 break;
5338         default:
5339                 DRV_LOG(DEBUG, "port %u unknown operation %u",
5340                         dev->data->port_id, filter_op);
5341                 rte_errno = EINVAL;
5342                 return -rte_errno;
5343         }
5344         return 0;
5345 }
5346
5347 /**
5348  * Manage filter operations.
5349  *
5350  * @param dev
5351  *   Pointer to Ethernet device structure.
5352  * @param filter_type
5353  *   Filter type.
5354  * @param filter_op
5355  *   Operation to perform.
5356  * @param arg
5357  *   Pointer to operation-specific structure.
5358  *
5359  * @return
5360  *   0 on success, a negative errno value otherwise and rte_errno is set.
5361  */
5362 int
5363 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
5364                      enum rte_filter_type filter_type,
5365                      enum rte_filter_op filter_op,
5366                      void *arg)
5367 {
5368         switch (filter_type) {
5369         case RTE_ETH_FILTER_GENERIC:
5370                 if (filter_op != RTE_ETH_FILTER_GET) {
5371                         rte_errno = EINVAL;
5372                         return -rte_errno;
5373                 }
5374                 *(const void **)arg = &mlx5_flow_ops;
5375                 return 0;
5376         case RTE_ETH_FILTER_FDIR:
5377                 return flow_fdir_ctrl_func(dev, filter_op, arg);
5378         default:
5379                 DRV_LOG(ERR, "port %u filter type (%d) not supported",
5380                         dev->data->port_id, filter_type);
5381                 rte_errno = ENOTSUP;
5382                 return -rte_errno;
5383         }
5384         return 0;
5385 }
5386
5387 /**
5388  * Create the needed meter and suffix tables.
5389  *
5390  * @param[in] dev
5391  *   Pointer to Ethernet device.
5392  * @param[in] fm
5393  *   Pointer to the flow meter.
5394  *
5395  * @return
5396  *   Pointer to table set on success, NULL otherwise.
5397  */
5398 struct mlx5_meter_domains_infos *
5399 mlx5_flow_create_mtr_tbls(struct rte_eth_dev *dev,
5400                           const struct mlx5_flow_meter *fm)
5401 {
5402         const struct mlx5_flow_driver_ops *fops;
5403
5404         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
5405         return fops->create_mtr_tbls(dev, fm);
5406 }
5407
5408 /**
5409  * Destroy the meter table set.
5410  *
5411  * @param[in] dev
5412  *   Pointer to Ethernet device.
5413  * @param[in] tbl
5414  *   Pointer to the meter table set.
5415  *
5416  * @return
5417  *   0 on success.
5418  */
5419 int
5420 mlx5_flow_destroy_mtr_tbls(struct rte_eth_dev *dev,
5421                            struct mlx5_meter_domains_infos *tbls)
5422 {
5423         const struct mlx5_flow_driver_ops *fops;
5424
5425         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
5426         return fops->destroy_mtr_tbls(dev, tbls);
5427 }
5428
5429 /**
5430  * Create policer rules.
5431  *
5432  * @param[in] dev
5433  *   Pointer to Ethernet device.
5434  * @param[in] fm
5435  *   Pointer to flow meter structure.
5436  * @param[in] attr
5437  *   Pointer to flow attributes.
5438  *
5439  * @return
5440  *   0 on success, -1 otherwise.
5441  */
5442 int
5443 mlx5_flow_create_policer_rules(struct rte_eth_dev *dev,
5444                                struct mlx5_flow_meter *fm,
5445                                const struct rte_flow_attr *attr)
5446 {
5447         const struct mlx5_flow_driver_ops *fops;
5448
5449         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
5450         return fops->create_policer_rules(dev, fm, attr);
5451 }
5452
5453 /**
5454  * Destroy policer rules.
5455  *
5456  * @param[in] fm
5457  *   Pointer to flow meter structure.
5458  * @param[in] attr
5459  *   Pointer to flow attributes.
5460  *
5461  * @return
5462  *   0 on success, -1 otherwise.
5463  */
5464 int
5465 mlx5_flow_destroy_policer_rules(struct rte_eth_dev *dev,
5466                                 struct mlx5_flow_meter *fm,
5467                                 const struct rte_flow_attr *attr)
5468 {
5469         const struct mlx5_flow_driver_ops *fops;
5470
5471         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
5472         return fops->destroy_policer_rules(dev, fm, attr);
5473 }
5474
5475 /**
5476  * Allocate a counter.
5477  *
5478  * @param[in] dev
5479  *   Pointer to Ethernet device structure.
5480  *
5481  * @return
5482  *   Pointer to allocated counter  on success, NULL otherwise.
5483  */
5484 struct mlx5_flow_counter *
5485 mlx5_counter_alloc(struct rte_eth_dev *dev)
5486 {
5487         const struct mlx5_flow_driver_ops *fops;
5488         struct rte_flow_attr attr = { .transfer = 0 };
5489
5490         if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
5491                 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
5492                 return fops->counter_alloc(dev);
5493         }
5494         DRV_LOG(ERR,
5495                 "port %u counter allocate is not supported.",
5496                  dev->data->port_id);
5497         return NULL;
5498 }
5499
5500 /**
5501  * Free a counter.
5502  *
5503  * @param[in] dev
5504  *   Pointer to Ethernet device structure.
5505  * @param[in] cnt
5506  *   Pointer to counter to be free.
5507  */
5508 void
5509 mlx5_counter_free(struct rte_eth_dev *dev, struct mlx5_flow_counter *cnt)
5510 {
5511         const struct mlx5_flow_driver_ops *fops;
5512         struct rte_flow_attr attr = { .transfer = 0 };
5513
5514         if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
5515                 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
5516                 fops->counter_free(dev, cnt);
5517                 return;
5518         }
5519         DRV_LOG(ERR,
5520                 "port %u counter free is not supported.",
5521                  dev->data->port_id);
5522 }
5523
5524 /**
5525  * Query counter statistics.
5526  *
5527  * @param[in] dev
5528  *   Pointer to Ethernet device structure.
5529  * @param[in] cnt
5530  *   Pointer to counter to query.
5531  * @param[in] clear
5532  *   Set to clear counter statistics.
5533  * @param[out] pkts
5534  *   The counter hits packets number to save.
5535  * @param[out] bytes
5536  *   The counter hits bytes number to save.
5537  *
5538  * @return
5539  *   0 on success, a negative errno value otherwise.
5540  */
5541 int
5542 mlx5_counter_query(struct rte_eth_dev *dev, struct mlx5_flow_counter *cnt,
5543                    bool clear, uint64_t *pkts, uint64_t *bytes)
5544 {
5545         const struct mlx5_flow_driver_ops *fops;
5546         struct rte_flow_attr attr = { .transfer = 0 };
5547
5548         if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
5549                 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
5550                 return fops->counter_query(dev, cnt, clear, pkts, bytes);
5551         }
5552         DRV_LOG(ERR,
5553                 "port %u counter query is not supported.",
5554                  dev->data->port_id);
5555         return -ENOTSUP;
5556 }
5557
5558 #define MLX5_POOL_QUERY_FREQ_US 1000000
5559
5560 /**
5561  * Set the periodic procedure for triggering asynchronous batch queries for all
5562  * the counter pools.
5563  *
5564  * @param[in] sh
5565  *   Pointer to mlx5_ibv_shared object.
5566  */
5567 void
5568 mlx5_set_query_alarm(struct mlx5_ibv_shared *sh)
5569 {
5570         struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(sh, 0, 0);
5571         uint32_t pools_n = rte_atomic16_read(&cont->n_valid);
5572         uint32_t us;
5573
5574         cont = MLX5_CNT_CONTAINER(sh, 1, 0);
5575         pools_n += rte_atomic16_read(&cont->n_valid);
5576         us = MLX5_POOL_QUERY_FREQ_US / pools_n;
5577         DRV_LOG(DEBUG, "Set alarm for %u pools each %u us", pools_n, us);
5578         if (rte_eal_alarm_set(us, mlx5_flow_query_alarm, sh)) {
5579                 sh->cmng.query_thread_on = 0;
5580                 DRV_LOG(ERR, "Cannot reinitialize query alarm");
5581         } else {
5582                 sh->cmng.query_thread_on = 1;
5583         }
5584 }
5585
5586 /**
5587  * The periodic procedure for triggering asynchronous batch queries for all the
5588  * counter pools. This function is probably called by the host thread.
5589  *
5590  * @param[in] arg
5591  *   The parameter for the alarm process.
5592  */
5593 void
5594 mlx5_flow_query_alarm(void *arg)
5595 {
5596         struct mlx5_ibv_shared *sh = arg;
5597         struct mlx5_devx_obj *dcs;
5598         uint16_t offset;
5599         int ret;
5600         uint8_t batch = sh->cmng.batch;
5601         uint16_t pool_index = sh->cmng.pool_index;
5602         struct mlx5_pools_container *cont;
5603         struct mlx5_pools_container *mcont;
5604         struct mlx5_flow_counter_pool *pool;
5605
5606         if (sh->cmng.pending_queries >= MLX5_MAX_PENDING_QUERIES)
5607                 goto set_alarm;
5608 next_container:
5609         cont = MLX5_CNT_CONTAINER(sh, batch, 1);
5610         mcont = MLX5_CNT_CONTAINER(sh, batch, 0);
5611         /* Check if resize was done and need to flip a container. */
5612         if (cont != mcont) {
5613                 if (cont->pools) {
5614                         /* Clean the old container. */
5615                         rte_free(cont->pools);
5616                         memset(cont, 0, sizeof(*cont));
5617                 }
5618                 rte_cio_wmb();
5619                  /* Flip the host container. */
5620                 sh->cmng.mhi[batch] ^= (uint8_t)2;
5621                 cont = mcont;
5622         }
5623         if (!cont->pools) {
5624                 /* 2 empty containers case is unexpected. */
5625                 if (unlikely(batch != sh->cmng.batch))
5626                         goto set_alarm;
5627                 batch ^= 0x1;
5628                 pool_index = 0;
5629                 goto next_container;
5630         }
5631         pool = cont->pools[pool_index];
5632         if (pool->raw_hw)
5633                 /* There is a pool query in progress. */
5634                 goto set_alarm;
5635         pool->raw_hw =
5636                 LIST_FIRST(&sh->cmng.free_stat_raws);
5637         if (!pool->raw_hw)
5638                 /* No free counter statistics raw memory. */
5639                 goto set_alarm;
5640         dcs = (struct mlx5_devx_obj *)(uintptr_t)rte_atomic64_read
5641                                                               (&pool->a64_dcs);
5642         offset = batch ? 0 : dcs->id % MLX5_COUNTERS_PER_POOL;
5643         ret = mlx5_devx_cmd_flow_counter_query(dcs, 0, MLX5_COUNTERS_PER_POOL -
5644                                                offset, NULL, NULL,
5645                                                pool->raw_hw->mem_mng->dm->id,
5646                                                (void *)(uintptr_t)
5647                                                (pool->raw_hw->data + offset),
5648                                                sh->devx_comp,
5649                                                (uint64_t)(uintptr_t)pool);
5650         if (ret) {
5651                 DRV_LOG(ERR, "Failed to trigger asynchronous query for dcs ID"
5652                         " %d", pool->min_dcs->id);
5653                 pool->raw_hw = NULL;
5654                 goto set_alarm;
5655         }
5656         pool->raw_hw->min_dcs_id = dcs->id;
5657         LIST_REMOVE(pool->raw_hw, next);
5658         sh->cmng.pending_queries++;
5659         pool_index++;
5660         if (pool_index >= rte_atomic16_read(&cont->n_valid)) {
5661                 batch ^= 0x1;
5662                 pool_index = 0;
5663         }
5664 set_alarm:
5665         sh->cmng.batch = batch;
5666         sh->cmng.pool_index = pool_index;
5667         mlx5_set_query_alarm(sh);
5668 }
5669
5670 /**
5671  * Handler for the HW respond about ready values from an asynchronous batch
5672  * query. This function is probably called by the host thread.
5673  *
5674  * @param[in] sh
5675  *   The pointer to the shared IB device context.
5676  * @param[in] async_id
5677  *   The Devx async ID.
5678  * @param[in] status
5679  *   The status of the completion.
5680  */
5681 void
5682 mlx5_flow_async_pool_query_handle(struct mlx5_ibv_shared *sh,
5683                                   uint64_t async_id, int status)
5684 {
5685         struct mlx5_flow_counter_pool *pool =
5686                 (struct mlx5_flow_counter_pool *)(uintptr_t)async_id;
5687         struct mlx5_counter_stats_raw *raw_to_free;
5688
5689         if (unlikely(status)) {
5690                 raw_to_free = pool->raw_hw;
5691         } else {
5692                 raw_to_free = pool->raw;
5693                 rte_spinlock_lock(&pool->sl);
5694                 pool->raw = pool->raw_hw;
5695                 rte_spinlock_unlock(&pool->sl);
5696                 rte_atomic64_add(&pool->query_gen, 1);
5697                 /* Be sure the new raw counters data is updated in memory. */
5698                 rte_cio_wmb();
5699         }
5700         LIST_INSERT_HEAD(&sh->cmng.free_stat_raws, raw_to_free, next);
5701         pool->raw_hw = NULL;
5702         sh->cmng.pending_queries--;
5703 }
5704
5705 /**
5706  * Translate the rte_flow group index to HW table value.
5707  *
5708  * @param[in] attributes
5709  *   Pointer to flow attributes
5710  * @param[in] external
5711  *   Value is part of flow rule created by request external to PMD.
5712  * @param[in] group
5713  *   rte_flow group index value.
5714  * @param[out] fdb_def_rule
5715  *   Whether fdb jump to table 1 is configured.
5716  * @param[out] table
5717  *   HW table value.
5718  * @param[out] error
5719  *   Pointer to error structure.
5720  *
5721  * @return
5722  *   0 on success, a negative errno value otherwise and rte_errno is set.
5723  */
5724 int
5725 mlx5_flow_group_to_table(const struct rte_flow_attr *attributes, bool external,
5726                          uint32_t group, bool fdb_def_rule, uint32_t *table,
5727                          struct rte_flow_error *error)
5728 {
5729         if (attributes->transfer && external && fdb_def_rule) {
5730                 if (group == UINT32_MAX)
5731                         return rte_flow_error_set
5732                                                 (error, EINVAL,
5733                                                  RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
5734                                                  NULL,
5735                                                  "group index not supported");
5736                 *table = group + 1;
5737         } else {
5738                 *table = group;
5739         }
5740         return 0;
5741 }
5742
5743 /**
5744  * Discover availability of metadata reg_c's.
5745  *
5746  * Iteratively use test flows to check availability.
5747  *
5748  * @param[in] dev
5749  *   Pointer to the Ethernet device structure.
5750  *
5751  * @return
5752  *   0 on success, a negative errno value otherwise and rte_errno is set.
5753  */
5754 int
5755 mlx5_flow_discover_mreg_c(struct rte_eth_dev *dev)
5756 {
5757         struct mlx5_priv *priv = dev->data->dev_private;
5758         struct mlx5_dev_config *config = &priv->config;
5759         enum modify_reg idx;
5760         int n = 0;
5761
5762         /* reg_c[0] and reg_c[1] are reserved. */
5763         config->flow_mreg_c[n++] = REG_C_0;
5764         config->flow_mreg_c[n++] = REG_C_1;
5765         /* Discover availability of other reg_c's. */
5766         for (idx = REG_C_2; idx <= REG_C_7; ++idx) {
5767                 struct rte_flow_attr attr = {
5768                         .group = MLX5_FLOW_MREG_CP_TABLE_GROUP,
5769                         .priority = MLX5_FLOW_PRIO_RSVD,
5770                         .ingress = 1,
5771                 };
5772                 struct rte_flow_item items[] = {
5773                         [0] = {
5774                                 .type = RTE_FLOW_ITEM_TYPE_END,
5775                         },
5776                 };
5777                 struct rte_flow_action actions[] = {
5778                         [0] = {
5779                                 .type = MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
5780                                 .conf = &(struct mlx5_flow_action_copy_mreg){
5781                                         .src = REG_C_1,
5782                                         .dst = idx,
5783                                 },
5784                         },
5785                         [1] = {
5786                                 .type = RTE_FLOW_ACTION_TYPE_JUMP,
5787                                 .conf = &(struct rte_flow_action_jump){
5788                                         .group = MLX5_FLOW_MREG_ACT_TABLE_GROUP,
5789                                 },
5790                         },
5791                         [2] = {
5792                                 .type = RTE_FLOW_ACTION_TYPE_END,
5793                         },
5794                 };
5795                 struct rte_flow *flow;
5796                 struct rte_flow_error error;
5797
5798                 if (!config->dv_flow_en)
5799                         break;
5800                 /* Create internal flow, validation skips copy action. */
5801                 flow = flow_list_create(dev, NULL, &attr, items,
5802                                         actions, false, &error);
5803                 if (!flow)
5804                         continue;
5805                 if (dev->data->dev_started || !flow_drv_apply(dev, flow, NULL))
5806                         config->flow_mreg_c[n++] = idx;
5807                 flow_list_destroy(dev, NULL, flow);
5808         }
5809         for (; n < MLX5_MREG_C_NUM; ++n)
5810                 config->flow_mreg_c[n] = REG_NONE;
5811         return 0;
5812 }
5813
5814 /**
5815  * Dump flow raw hw data to file
5816  *
5817  * @param[in] dev
5818  *    The pointer to Ethernet device.
5819  * @param[in] file
5820  *   A pointer to a file for output.
5821  * @param[out] error
5822  *   Perform verbose error reporting if not NULL. PMDs initialize this
5823  *   structure in case of error only.
5824  * @return
5825  *   0 on success, a nagative value otherwise.
5826  */
5827 int
5828 mlx5_flow_dev_dump(struct rte_eth_dev *dev,
5829                    FILE *file,
5830                    struct rte_flow_error *error __rte_unused)
5831 {
5832         struct mlx5_priv *priv = dev->data->dev_private;
5833         struct mlx5_ibv_shared *sh = priv->sh;
5834
5835         return mlx5_devx_cmd_flow_dump(sh->fdb_domain, sh->rx_domain,
5836                                        sh->tx_domain, file);
5837 }