net/mlx5: add flow validation of eCPRI header
[dpdk.git] / drivers / net / mlx5 / mlx5_flow.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2016 6WIND S.A.
3  * Copyright 2016 Mellanox Technologies, Ltd
4  */
5
6 #include <netinet/in.h>
7 #include <sys/queue.h>
8 #include <stdalign.h>
9 #include <stdint.h>
10 #include <string.h>
11 #include <stdbool.h>
12
13 /* Verbs header. */
14 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
15 #ifdef PEDANTIC
16 #pragma GCC diagnostic ignored "-Wpedantic"
17 #endif
18 #include <infiniband/verbs.h>
19 #ifdef PEDANTIC
20 #pragma GCC diagnostic error "-Wpedantic"
21 #endif
22
23 #include <rte_common.h>
24 #include <rte_ether.h>
25 #include <rte_ethdev_driver.h>
26 #include <rte_flow.h>
27 #include <rte_cycles.h>
28 #include <rte_flow_driver.h>
29 #include <rte_malloc.h>
30 #include <rte_ip.h>
31
32 #include <mlx5_glue.h>
33 #include <mlx5_devx_cmds.h>
34 #include <mlx5_prm.h>
35
36 #include "mlx5_defs.h"
37 #include "mlx5.h"
38 #include "mlx5_flow.h"
39 #include "mlx5_flow_os.h"
40 #include "mlx5_rxtx.h"
41
42 /** Device flow drivers. */
43 extern const struct mlx5_flow_driver_ops mlx5_flow_verbs_drv_ops;
44
45 const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops;
46
47 const struct mlx5_flow_driver_ops *flow_drv_ops[] = {
48         [MLX5_FLOW_TYPE_MIN] = &mlx5_flow_null_drv_ops,
49 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
50         [MLX5_FLOW_TYPE_DV] = &mlx5_flow_dv_drv_ops,
51 #endif
52         [MLX5_FLOW_TYPE_VERBS] = &mlx5_flow_verbs_drv_ops,
53         [MLX5_FLOW_TYPE_MAX] = &mlx5_flow_null_drv_ops
54 };
55
56 enum mlx5_expansion {
57         MLX5_EXPANSION_ROOT,
58         MLX5_EXPANSION_ROOT_OUTER,
59         MLX5_EXPANSION_ROOT_ETH_VLAN,
60         MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN,
61         MLX5_EXPANSION_OUTER_ETH,
62         MLX5_EXPANSION_OUTER_ETH_VLAN,
63         MLX5_EXPANSION_OUTER_VLAN,
64         MLX5_EXPANSION_OUTER_IPV4,
65         MLX5_EXPANSION_OUTER_IPV4_UDP,
66         MLX5_EXPANSION_OUTER_IPV4_TCP,
67         MLX5_EXPANSION_OUTER_IPV6,
68         MLX5_EXPANSION_OUTER_IPV6_UDP,
69         MLX5_EXPANSION_OUTER_IPV6_TCP,
70         MLX5_EXPANSION_VXLAN,
71         MLX5_EXPANSION_VXLAN_GPE,
72         MLX5_EXPANSION_GRE,
73         MLX5_EXPANSION_MPLS,
74         MLX5_EXPANSION_ETH,
75         MLX5_EXPANSION_ETH_VLAN,
76         MLX5_EXPANSION_VLAN,
77         MLX5_EXPANSION_IPV4,
78         MLX5_EXPANSION_IPV4_UDP,
79         MLX5_EXPANSION_IPV4_TCP,
80         MLX5_EXPANSION_IPV6,
81         MLX5_EXPANSION_IPV6_UDP,
82         MLX5_EXPANSION_IPV6_TCP,
83 };
84
85 /** Supported expansion of items. */
86 static const struct rte_flow_expand_node mlx5_support_expansion[] = {
87         [MLX5_EXPANSION_ROOT] = {
88                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
89                                                  MLX5_EXPANSION_IPV4,
90                                                  MLX5_EXPANSION_IPV6),
91                 .type = RTE_FLOW_ITEM_TYPE_END,
92         },
93         [MLX5_EXPANSION_ROOT_OUTER] = {
94                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_ETH,
95                                                  MLX5_EXPANSION_OUTER_IPV4,
96                                                  MLX5_EXPANSION_OUTER_IPV6),
97                 .type = RTE_FLOW_ITEM_TYPE_END,
98         },
99         [MLX5_EXPANSION_ROOT_ETH_VLAN] = {
100                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH_VLAN),
101                 .type = RTE_FLOW_ITEM_TYPE_END,
102         },
103         [MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN] = {
104                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_ETH_VLAN),
105                 .type = RTE_FLOW_ITEM_TYPE_END,
106         },
107         [MLX5_EXPANSION_OUTER_ETH] = {
108                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_IPV4,
109                                                  MLX5_EXPANSION_OUTER_IPV6,
110                                                  MLX5_EXPANSION_MPLS),
111                 .type = RTE_FLOW_ITEM_TYPE_ETH,
112                 .rss_types = 0,
113         },
114         [MLX5_EXPANSION_OUTER_ETH_VLAN] = {
115                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_VLAN),
116                 .type = RTE_FLOW_ITEM_TYPE_ETH,
117                 .rss_types = 0,
118         },
119         [MLX5_EXPANSION_OUTER_VLAN] = {
120                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_IPV4,
121                                                  MLX5_EXPANSION_OUTER_IPV6),
122                 .type = RTE_FLOW_ITEM_TYPE_VLAN,
123         },
124         [MLX5_EXPANSION_OUTER_IPV4] = {
125                 .next = RTE_FLOW_EXPAND_RSS_NEXT
126                         (MLX5_EXPANSION_OUTER_IPV4_UDP,
127                          MLX5_EXPANSION_OUTER_IPV4_TCP,
128                          MLX5_EXPANSION_GRE,
129                          MLX5_EXPANSION_IPV4,
130                          MLX5_EXPANSION_IPV6),
131                 .type = RTE_FLOW_ITEM_TYPE_IPV4,
132                 .rss_types = ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
133                         ETH_RSS_NONFRAG_IPV4_OTHER,
134         },
135         [MLX5_EXPANSION_OUTER_IPV4_UDP] = {
136                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN,
137                                                  MLX5_EXPANSION_VXLAN_GPE),
138                 .type = RTE_FLOW_ITEM_TYPE_UDP,
139                 .rss_types = ETH_RSS_NONFRAG_IPV4_UDP,
140         },
141         [MLX5_EXPANSION_OUTER_IPV4_TCP] = {
142                 .type = RTE_FLOW_ITEM_TYPE_TCP,
143                 .rss_types = ETH_RSS_NONFRAG_IPV4_TCP,
144         },
145         [MLX5_EXPANSION_OUTER_IPV6] = {
146                 .next = RTE_FLOW_EXPAND_RSS_NEXT
147                         (MLX5_EXPANSION_OUTER_IPV6_UDP,
148                          MLX5_EXPANSION_OUTER_IPV6_TCP,
149                          MLX5_EXPANSION_IPV4,
150                          MLX5_EXPANSION_IPV6),
151                 .type = RTE_FLOW_ITEM_TYPE_IPV6,
152                 .rss_types = ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 |
153                         ETH_RSS_NONFRAG_IPV6_OTHER,
154         },
155         [MLX5_EXPANSION_OUTER_IPV6_UDP] = {
156                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN,
157                                                  MLX5_EXPANSION_VXLAN_GPE),
158                 .type = RTE_FLOW_ITEM_TYPE_UDP,
159                 .rss_types = ETH_RSS_NONFRAG_IPV6_UDP,
160         },
161         [MLX5_EXPANSION_OUTER_IPV6_TCP] = {
162                 .type = RTE_FLOW_ITEM_TYPE_TCP,
163                 .rss_types = ETH_RSS_NONFRAG_IPV6_TCP,
164         },
165         [MLX5_EXPANSION_VXLAN] = {
166                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
167                                                  MLX5_EXPANSION_IPV4,
168                                                  MLX5_EXPANSION_IPV6),
169                 .type = RTE_FLOW_ITEM_TYPE_VXLAN,
170         },
171         [MLX5_EXPANSION_VXLAN_GPE] = {
172                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
173                                                  MLX5_EXPANSION_IPV4,
174                                                  MLX5_EXPANSION_IPV6),
175                 .type = RTE_FLOW_ITEM_TYPE_VXLAN_GPE,
176         },
177         [MLX5_EXPANSION_GRE] = {
178                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4),
179                 .type = RTE_FLOW_ITEM_TYPE_GRE,
180         },
181         [MLX5_EXPANSION_MPLS] = {
182                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
183                                                  MLX5_EXPANSION_IPV6),
184                 .type = RTE_FLOW_ITEM_TYPE_MPLS,
185         },
186         [MLX5_EXPANSION_ETH] = {
187                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
188                                                  MLX5_EXPANSION_IPV6),
189                 .type = RTE_FLOW_ITEM_TYPE_ETH,
190         },
191         [MLX5_EXPANSION_ETH_VLAN] = {
192                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VLAN),
193                 .type = RTE_FLOW_ITEM_TYPE_ETH,
194         },
195         [MLX5_EXPANSION_VLAN] = {
196                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
197                                                  MLX5_EXPANSION_IPV6),
198                 .type = RTE_FLOW_ITEM_TYPE_VLAN,
199         },
200         [MLX5_EXPANSION_IPV4] = {
201                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4_UDP,
202                                                  MLX5_EXPANSION_IPV4_TCP),
203                 .type = RTE_FLOW_ITEM_TYPE_IPV4,
204                 .rss_types = ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
205                         ETH_RSS_NONFRAG_IPV4_OTHER,
206         },
207         [MLX5_EXPANSION_IPV4_UDP] = {
208                 .type = RTE_FLOW_ITEM_TYPE_UDP,
209                 .rss_types = ETH_RSS_NONFRAG_IPV4_UDP,
210         },
211         [MLX5_EXPANSION_IPV4_TCP] = {
212                 .type = RTE_FLOW_ITEM_TYPE_TCP,
213                 .rss_types = ETH_RSS_NONFRAG_IPV4_TCP,
214         },
215         [MLX5_EXPANSION_IPV6] = {
216                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV6_UDP,
217                                                  MLX5_EXPANSION_IPV6_TCP),
218                 .type = RTE_FLOW_ITEM_TYPE_IPV6,
219                 .rss_types = ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 |
220                         ETH_RSS_NONFRAG_IPV6_OTHER,
221         },
222         [MLX5_EXPANSION_IPV6_UDP] = {
223                 .type = RTE_FLOW_ITEM_TYPE_UDP,
224                 .rss_types = ETH_RSS_NONFRAG_IPV6_UDP,
225         },
226         [MLX5_EXPANSION_IPV6_TCP] = {
227                 .type = RTE_FLOW_ITEM_TYPE_TCP,
228                 .rss_types = ETH_RSS_NONFRAG_IPV6_TCP,
229         },
230 };
231
232 static const struct rte_flow_ops mlx5_flow_ops = {
233         .validate = mlx5_flow_validate,
234         .create = mlx5_flow_create,
235         .destroy = mlx5_flow_destroy,
236         .flush = mlx5_flow_flush,
237         .isolate = mlx5_flow_isolate,
238         .query = mlx5_flow_query,
239         .dev_dump = mlx5_flow_dev_dump,
240         .get_aged_flows = mlx5_flow_get_aged_flows,
241 };
242
243 /* Convert FDIR request to Generic flow. */
244 struct mlx5_fdir {
245         struct rte_flow_attr attr;
246         struct rte_flow_item items[4];
247         struct rte_flow_item_eth l2;
248         struct rte_flow_item_eth l2_mask;
249         union {
250                 struct rte_flow_item_ipv4 ipv4;
251                 struct rte_flow_item_ipv6 ipv6;
252         } l3;
253         union {
254                 struct rte_flow_item_ipv4 ipv4;
255                 struct rte_flow_item_ipv6 ipv6;
256         } l3_mask;
257         union {
258                 struct rte_flow_item_udp udp;
259                 struct rte_flow_item_tcp tcp;
260         } l4;
261         union {
262                 struct rte_flow_item_udp udp;
263                 struct rte_flow_item_tcp tcp;
264         } l4_mask;
265         struct rte_flow_action actions[2];
266         struct rte_flow_action_queue queue;
267 };
268
269 /* Map of Verbs to Flow priority with 8 Verbs priorities. */
270 static const uint32_t priority_map_3[][MLX5_PRIORITY_MAP_MAX] = {
271         { 0, 1, 2 }, { 2, 3, 4 }, { 5, 6, 7 },
272 };
273
274 /* Map of Verbs to Flow priority with 16 Verbs priorities. */
275 static const uint32_t priority_map_5[][MLX5_PRIORITY_MAP_MAX] = {
276         { 0, 1, 2 }, { 3, 4, 5 }, { 6, 7, 8 },
277         { 9, 10, 11 }, { 12, 13, 14 },
278 };
279
280 /* Tunnel information. */
281 struct mlx5_flow_tunnel_info {
282         uint64_t tunnel; /**< Tunnel bit (see MLX5_FLOW_*). */
283         uint32_t ptype; /**< Tunnel Ptype (see RTE_PTYPE_*). */
284 };
285
286 static struct mlx5_flow_tunnel_info tunnels_info[] = {
287         {
288                 .tunnel = MLX5_FLOW_LAYER_VXLAN,
289                 .ptype = RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L4_UDP,
290         },
291         {
292                 .tunnel = MLX5_FLOW_LAYER_GENEVE,
293                 .ptype = RTE_PTYPE_TUNNEL_GENEVE | RTE_PTYPE_L4_UDP,
294         },
295         {
296                 .tunnel = MLX5_FLOW_LAYER_VXLAN_GPE,
297                 .ptype = RTE_PTYPE_TUNNEL_VXLAN_GPE | RTE_PTYPE_L4_UDP,
298         },
299         {
300                 .tunnel = MLX5_FLOW_LAYER_GRE,
301                 .ptype = RTE_PTYPE_TUNNEL_GRE,
302         },
303         {
304                 .tunnel = MLX5_FLOW_LAYER_MPLS | MLX5_FLOW_LAYER_OUTER_L4_UDP,
305                 .ptype = RTE_PTYPE_TUNNEL_MPLS_IN_UDP | RTE_PTYPE_L4_UDP,
306         },
307         {
308                 .tunnel = MLX5_FLOW_LAYER_MPLS,
309                 .ptype = RTE_PTYPE_TUNNEL_MPLS_IN_GRE,
310         },
311         {
312                 .tunnel = MLX5_FLOW_LAYER_NVGRE,
313                 .ptype = RTE_PTYPE_TUNNEL_NVGRE,
314         },
315         {
316                 .tunnel = MLX5_FLOW_LAYER_IPIP,
317                 .ptype = RTE_PTYPE_TUNNEL_IP,
318         },
319         {
320                 .tunnel = MLX5_FLOW_LAYER_IPV6_ENCAP,
321                 .ptype = RTE_PTYPE_TUNNEL_IP,
322         },
323         {
324                 .tunnel = MLX5_FLOW_LAYER_GTP,
325                 .ptype = RTE_PTYPE_TUNNEL_GTPU,
326         },
327 };
328
329 /**
330  * Translate tag ID to register.
331  *
332  * @param[in] dev
333  *   Pointer to the Ethernet device structure.
334  * @param[in] feature
335  *   The feature that request the register.
336  * @param[in] id
337  *   The request register ID.
338  * @param[out] error
339  *   Error description in case of any.
340  *
341  * @return
342  *   The request register on success, a negative errno
343  *   value otherwise and rte_errno is set.
344  */
345 int
346 mlx5_flow_get_reg_id(struct rte_eth_dev *dev,
347                      enum mlx5_feature_name feature,
348                      uint32_t id,
349                      struct rte_flow_error *error)
350 {
351         struct mlx5_priv *priv = dev->data->dev_private;
352         struct mlx5_dev_config *config = &priv->config;
353         enum modify_reg start_reg;
354         bool skip_mtr_reg = false;
355
356         switch (feature) {
357         case MLX5_HAIRPIN_RX:
358                 return REG_B;
359         case MLX5_HAIRPIN_TX:
360                 return REG_A;
361         case MLX5_METADATA_RX:
362                 switch (config->dv_xmeta_en) {
363                 case MLX5_XMETA_MODE_LEGACY:
364                         return REG_B;
365                 case MLX5_XMETA_MODE_META16:
366                         return REG_C_0;
367                 case MLX5_XMETA_MODE_META32:
368                         return REG_C_1;
369                 }
370                 break;
371         case MLX5_METADATA_TX:
372                 return REG_A;
373         case MLX5_METADATA_FDB:
374                 switch (config->dv_xmeta_en) {
375                 case MLX5_XMETA_MODE_LEGACY:
376                         return REG_NONE;
377                 case MLX5_XMETA_MODE_META16:
378                         return REG_C_0;
379                 case MLX5_XMETA_MODE_META32:
380                         return REG_C_1;
381                 }
382                 break;
383         case MLX5_FLOW_MARK:
384                 switch (config->dv_xmeta_en) {
385                 case MLX5_XMETA_MODE_LEGACY:
386                         return REG_NONE;
387                 case MLX5_XMETA_MODE_META16:
388                         return REG_C_1;
389                 case MLX5_XMETA_MODE_META32:
390                         return REG_C_0;
391                 }
392                 break;
393         case MLX5_MTR_SFX:
394                 /*
395                  * If meter color and flow match share one register, flow match
396                  * should use the meter color register for match.
397                  */
398                 if (priv->mtr_reg_share)
399                         return priv->mtr_color_reg;
400                 else
401                         return priv->mtr_color_reg != REG_C_2 ? REG_C_2 :
402                                REG_C_3;
403         case MLX5_MTR_COLOR:
404                 MLX5_ASSERT(priv->mtr_color_reg != REG_NONE);
405                 return priv->mtr_color_reg;
406         case MLX5_COPY_MARK:
407                 /*
408                  * Metadata COPY_MARK register using is in meter suffix sub
409                  * flow while with meter. It's safe to share the same register.
410                  */
411                 return priv->mtr_color_reg != REG_C_2 ? REG_C_2 : REG_C_3;
412         case MLX5_APP_TAG:
413                 /*
414                  * If meter is enable, it will engage the register for color
415                  * match and flow match. If meter color match is not using the
416                  * REG_C_2, need to skip the REG_C_x be used by meter color
417                  * match.
418                  * If meter is disable, free to use all available registers.
419                  */
420                 start_reg = priv->mtr_color_reg != REG_C_2 ? REG_C_2 :
421                             (priv->mtr_reg_share ? REG_C_3 : REG_C_4);
422                 skip_mtr_reg = !!(priv->mtr_en && start_reg == REG_C_2);
423                 if (id > (REG_C_7 - start_reg))
424                         return rte_flow_error_set(error, EINVAL,
425                                                   RTE_FLOW_ERROR_TYPE_ITEM,
426                                                   NULL, "invalid tag id");
427                 if (config->flow_mreg_c[id + start_reg - REG_C_0] == REG_NONE)
428                         return rte_flow_error_set(error, ENOTSUP,
429                                                   RTE_FLOW_ERROR_TYPE_ITEM,
430                                                   NULL, "unsupported tag id");
431                 /*
432                  * This case means meter is using the REG_C_x great than 2.
433                  * Take care not to conflict with meter color REG_C_x.
434                  * If the available index REG_C_y >= REG_C_x, skip the
435                  * color register.
436                  */
437                 if (skip_mtr_reg && config->flow_mreg_c
438                     [id + start_reg - REG_C_0] >= priv->mtr_color_reg) {
439                         if (id >= (REG_C_7 - start_reg))
440                                 return rte_flow_error_set(error, EINVAL,
441                                                        RTE_FLOW_ERROR_TYPE_ITEM,
442                                                         NULL, "invalid tag id");
443                         if (config->flow_mreg_c
444                             [id + 1 + start_reg - REG_C_0] != REG_NONE)
445                                 return config->flow_mreg_c
446                                                [id + 1 + start_reg - REG_C_0];
447                         return rte_flow_error_set(error, ENOTSUP,
448                                                   RTE_FLOW_ERROR_TYPE_ITEM,
449                                                   NULL, "unsupported tag id");
450                 }
451                 return config->flow_mreg_c[id + start_reg - REG_C_0];
452         }
453         MLX5_ASSERT(false);
454         return rte_flow_error_set(error, EINVAL,
455                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
456                                   NULL, "invalid feature name");
457 }
458
459 /**
460  * Check extensive flow metadata register support.
461  *
462  * @param dev
463  *   Pointer to rte_eth_dev structure.
464  *
465  * @return
466  *   True if device supports extensive flow metadata register, otherwise false.
467  */
468 bool
469 mlx5_flow_ext_mreg_supported(struct rte_eth_dev *dev)
470 {
471         struct mlx5_priv *priv = dev->data->dev_private;
472         struct mlx5_dev_config *config = &priv->config;
473
474         /*
475          * Having available reg_c can be regarded inclusively as supporting
476          * extensive flow metadata register, which could mean,
477          * - metadata register copy action by modify header.
478          * - 16 modify header actions is supported.
479          * - reg_c's are preserved across different domain (FDB and NIC) on
480          *   packet loopback by flow lookup miss.
481          */
482         return config->flow_mreg_c[2] != REG_NONE;
483 }
484
485 /**
486  * Discover the maximum number of priority available.
487  *
488  * @param[in] dev
489  *   Pointer to the Ethernet device structure.
490  *
491  * @return
492  *   number of supported flow priority on success, a negative errno
493  *   value otherwise and rte_errno is set.
494  */
495 int
496 mlx5_flow_discover_priorities(struct rte_eth_dev *dev)
497 {
498         struct mlx5_priv *priv = dev->data->dev_private;
499         struct {
500                 struct ibv_flow_attr attr;
501                 struct ibv_flow_spec_eth eth;
502                 struct ibv_flow_spec_action_drop drop;
503         } flow_attr = {
504                 .attr = {
505                         .num_of_specs = 2,
506                         .port = (uint8_t)priv->dev_port,
507                 },
508                 .eth = {
509                         .type = IBV_FLOW_SPEC_ETH,
510                         .size = sizeof(struct ibv_flow_spec_eth),
511                 },
512                 .drop = {
513                         .size = sizeof(struct ibv_flow_spec_action_drop),
514                         .type = IBV_FLOW_SPEC_ACTION_DROP,
515                 },
516         };
517         struct ibv_flow *flow;
518         struct mlx5_hrxq *drop = mlx5_hrxq_drop_new(dev);
519         uint16_t vprio[] = { 8, 16 };
520         int i;
521         int priority = 0;
522
523         if (!drop) {
524                 rte_errno = ENOTSUP;
525                 return -rte_errno;
526         }
527         for (i = 0; i != RTE_DIM(vprio); i++) {
528                 flow_attr.attr.priority = vprio[i] - 1;
529                 flow = mlx5_glue->create_flow(drop->qp, &flow_attr.attr);
530                 if (!flow)
531                         break;
532                 claim_zero(mlx5_glue->destroy_flow(flow));
533                 priority = vprio[i];
534         }
535         mlx5_hrxq_drop_release(dev);
536         switch (priority) {
537         case 8:
538                 priority = RTE_DIM(priority_map_3);
539                 break;
540         case 16:
541                 priority = RTE_DIM(priority_map_5);
542                 break;
543         default:
544                 rte_errno = ENOTSUP;
545                 DRV_LOG(ERR,
546                         "port %u verbs maximum priority: %d expected 8/16",
547                         dev->data->port_id, priority);
548                 return -rte_errno;
549         }
550         DRV_LOG(INFO, "port %u flow maximum priority: %d",
551                 dev->data->port_id, priority);
552         return priority;
553 }
554
555 /**
556  * Adjust flow priority based on the highest layer and the request priority.
557  *
558  * @param[in] dev
559  *   Pointer to the Ethernet device structure.
560  * @param[in] priority
561  *   The rule base priority.
562  * @param[in] subpriority
563  *   The priority based on the items.
564  *
565  * @return
566  *   The new priority.
567  */
568 uint32_t mlx5_flow_adjust_priority(struct rte_eth_dev *dev, int32_t priority,
569                                    uint32_t subpriority)
570 {
571         uint32_t res = 0;
572         struct mlx5_priv *priv = dev->data->dev_private;
573
574         switch (priv->config.flow_prio) {
575         case RTE_DIM(priority_map_3):
576                 res = priority_map_3[priority][subpriority];
577                 break;
578         case RTE_DIM(priority_map_5):
579                 res = priority_map_5[priority][subpriority];
580                 break;
581         }
582         return  res;
583 }
584
585 /**
586  * Verify the @p item specifications (spec, last, mask) are compatible with the
587  * NIC capabilities.
588  *
589  * @param[in] item
590  *   Item specification.
591  * @param[in] mask
592  *   @p item->mask or flow default bit-masks.
593  * @param[in] nic_mask
594  *   Bit-masks covering supported fields by the NIC to compare with user mask.
595  * @param[in] size
596  *   Bit-masks size in bytes.
597  * @param[out] error
598  *   Pointer to error structure.
599  *
600  * @return
601  *   0 on success, a negative errno value otherwise and rte_errno is set.
602  */
603 int
604 mlx5_flow_item_acceptable(const struct rte_flow_item *item,
605                           const uint8_t *mask,
606                           const uint8_t *nic_mask,
607                           unsigned int size,
608                           struct rte_flow_error *error)
609 {
610         unsigned int i;
611
612         MLX5_ASSERT(nic_mask);
613         for (i = 0; i < size; ++i)
614                 if ((nic_mask[i] | mask[i]) != nic_mask[i])
615                         return rte_flow_error_set(error, ENOTSUP,
616                                                   RTE_FLOW_ERROR_TYPE_ITEM,
617                                                   item,
618                                                   "mask enables non supported"
619                                                   " bits");
620         if (!item->spec && (item->mask || item->last))
621                 return rte_flow_error_set(error, EINVAL,
622                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
623                                           "mask/last without a spec is not"
624                                           " supported");
625         if (item->spec && item->last) {
626                 uint8_t spec[size];
627                 uint8_t last[size];
628                 unsigned int i;
629                 int ret;
630
631                 for (i = 0; i < size; ++i) {
632                         spec[i] = ((const uint8_t *)item->spec)[i] & mask[i];
633                         last[i] = ((const uint8_t *)item->last)[i] & mask[i];
634                 }
635                 ret = memcmp(spec, last, size);
636                 if (ret != 0)
637                         return rte_flow_error_set(error, EINVAL,
638                                                   RTE_FLOW_ERROR_TYPE_ITEM,
639                                                   item,
640                                                   "range is not valid");
641         }
642         return 0;
643 }
644
645 /**
646  * Adjust the hash fields according to the @p flow information.
647  *
648  * @param[in] dev_flow.
649  *   Pointer to the mlx5_flow.
650  * @param[in] tunnel
651  *   1 when the hash field is for a tunnel item.
652  * @param[in] layer_types
653  *   ETH_RSS_* types.
654  * @param[in] hash_fields
655  *   Item hash fields.
656  *
657  * @return
658  *   The hash fields that should be used.
659  */
660 uint64_t
661 mlx5_flow_hashfields_adjust(struct mlx5_flow_rss_desc *rss_desc,
662                             int tunnel __rte_unused, uint64_t layer_types,
663                             uint64_t hash_fields)
664 {
665 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
666         int rss_request_inner = rss_desc->level >= 2;
667
668         /* Check RSS hash level for tunnel. */
669         if (tunnel && rss_request_inner)
670                 hash_fields |= IBV_RX_HASH_INNER;
671         else if (tunnel || rss_request_inner)
672                 return 0;
673 #endif
674         /* Check if requested layer matches RSS hash fields. */
675         if (!(rss_desc->types & layer_types))
676                 return 0;
677         return hash_fields;
678 }
679
680 /**
681  * Lookup and set the ptype in the data Rx part.  A single Ptype can be used,
682  * if several tunnel rules are used on this queue, the tunnel ptype will be
683  * cleared.
684  *
685  * @param rxq_ctrl
686  *   Rx queue to update.
687  */
688 static void
689 flow_rxq_tunnel_ptype_update(struct mlx5_rxq_ctrl *rxq_ctrl)
690 {
691         unsigned int i;
692         uint32_t tunnel_ptype = 0;
693
694         /* Look up for the ptype to use. */
695         for (i = 0; i != MLX5_FLOW_TUNNEL; ++i) {
696                 if (!rxq_ctrl->flow_tunnels_n[i])
697                         continue;
698                 if (!tunnel_ptype) {
699                         tunnel_ptype = tunnels_info[i].ptype;
700                 } else {
701                         tunnel_ptype = 0;
702                         break;
703                 }
704         }
705         rxq_ctrl->rxq.tunnel = tunnel_ptype;
706 }
707
708 /**
709  * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) according to the devive
710  * flow.
711  *
712  * @param[in] dev
713  *   Pointer to the Ethernet device structure.
714  * @param[in] dev_handle
715  *   Pointer to device flow handle structure.
716  */
717 static void
718 flow_drv_rxq_flags_set(struct rte_eth_dev *dev,
719                        struct mlx5_flow_handle *dev_handle)
720 {
721         struct mlx5_priv *priv = dev->data->dev_private;
722         const int mark = dev_handle->mark;
723         const int tunnel = !!(dev_handle->layers & MLX5_FLOW_LAYER_TUNNEL);
724         struct mlx5_hrxq *hrxq;
725         unsigned int i;
726
727         if (dev_handle->fate_action != MLX5_FLOW_FATE_QUEUE)
728                 return;
729         hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ],
730                               dev_handle->rix_hrxq);
731         if (!hrxq)
732                 return;
733         for (i = 0; i != hrxq->ind_table->queues_n; ++i) {
734                 int idx = hrxq->ind_table->queues[i];
735                 struct mlx5_rxq_ctrl *rxq_ctrl =
736                         container_of((*priv->rxqs)[idx],
737                                      struct mlx5_rxq_ctrl, rxq);
738
739                 /*
740                  * To support metadata register copy on Tx loopback,
741                  * this must be always enabled (metadata may arive
742                  * from other port - not from local flows only.
743                  */
744                 if (priv->config.dv_flow_en &&
745                     priv->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY &&
746                     mlx5_flow_ext_mreg_supported(dev)) {
747                         rxq_ctrl->rxq.mark = 1;
748                         rxq_ctrl->flow_mark_n = 1;
749                 } else if (mark) {
750                         rxq_ctrl->rxq.mark = 1;
751                         rxq_ctrl->flow_mark_n++;
752                 }
753                 if (tunnel) {
754                         unsigned int j;
755
756                         /* Increase the counter matching the flow. */
757                         for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) {
758                                 if ((tunnels_info[j].tunnel &
759                                      dev_handle->layers) ==
760                                     tunnels_info[j].tunnel) {
761                                         rxq_ctrl->flow_tunnels_n[j]++;
762                                         break;
763                                 }
764                         }
765                         flow_rxq_tunnel_ptype_update(rxq_ctrl);
766                 }
767         }
768 }
769
770 /**
771  * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) for a flow
772  *
773  * @param[in] dev
774  *   Pointer to the Ethernet device structure.
775  * @param[in] flow
776  *   Pointer to flow structure.
777  */
778 static void
779 flow_rxq_flags_set(struct rte_eth_dev *dev, struct rte_flow *flow)
780 {
781         struct mlx5_priv *priv = dev->data->dev_private;
782         uint32_t handle_idx;
783         struct mlx5_flow_handle *dev_handle;
784
785         SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles,
786                        handle_idx, dev_handle, next)
787                 flow_drv_rxq_flags_set(dev, dev_handle);
788 }
789
790 /**
791  * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the
792  * device flow if no other flow uses it with the same kind of request.
793  *
794  * @param dev
795  *   Pointer to Ethernet device.
796  * @param[in] dev_handle
797  *   Pointer to the device flow handle structure.
798  */
799 static void
800 flow_drv_rxq_flags_trim(struct rte_eth_dev *dev,
801                         struct mlx5_flow_handle *dev_handle)
802 {
803         struct mlx5_priv *priv = dev->data->dev_private;
804         const int mark = dev_handle->mark;
805         const int tunnel = !!(dev_handle->layers & MLX5_FLOW_LAYER_TUNNEL);
806         struct mlx5_hrxq *hrxq;
807         unsigned int i;
808
809         if (dev_handle->fate_action != MLX5_FLOW_FATE_QUEUE)
810                 return;
811         hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ],
812                               dev_handle->rix_hrxq);
813         if (!hrxq)
814                 return;
815         MLX5_ASSERT(dev->data->dev_started);
816         for (i = 0; i != hrxq->ind_table->queues_n; ++i) {
817                 int idx = hrxq->ind_table->queues[i];
818                 struct mlx5_rxq_ctrl *rxq_ctrl =
819                         container_of((*priv->rxqs)[idx],
820                                      struct mlx5_rxq_ctrl, rxq);
821
822                 if (priv->config.dv_flow_en &&
823                     priv->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY &&
824                     mlx5_flow_ext_mreg_supported(dev)) {
825                         rxq_ctrl->rxq.mark = 1;
826                         rxq_ctrl->flow_mark_n = 1;
827                 } else if (mark) {
828                         rxq_ctrl->flow_mark_n--;
829                         rxq_ctrl->rxq.mark = !!rxq_ctrl->flow_mark_n;
830                 }
831                 if (tunnel) {
832                         unsigned int j;
833
834                         /* Decrease the counter matching the flow. */
835                         for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) {
836                                 if ((tunnels_info[j].tunnel &
837                                      dev_handle->layers) ==
838                                     tunnels_info[j].tunnel) {
839                                         rxq_ctrl->flow_tunnels_n[j]--;
840                                         break;
841                                 }
842                         }
843                         flow_rxq_tunnel_ptype_update(rxq_ctrl);
844                 }
845         }
846 }
847
848 /**
849  * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the
850  * @p flow if no other flow uses it with the same kind of request.
851  *
852  * @param dev
853  *   Pointer to Ethernet device.
854  * @param[in] flow
855  *   Pointer to the flow.
856  */
857 static void
858 flow_rxq_flags_trim(struct rte_eth_dev *dev, struct rte_flow *flow)
859 {
860         struct mlx5_priv *priv = dev->data->dev_private;
861         uint32_t handle_idx;
862         struct mlx5_flow_handle *dev_handle;
863
864         SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles,
865                        handle_idx, dev_handle, next)
866                 flow_drv_rxq_flags_trim(dev, dev_handle);
867 }
868
869 /**
870  * Clear the Mark/Flag and Tunnel ptype information in all Rx queues.
871  *
872  * @param dev
873  *   Pointer to Ethernet device.
874  */
875 static void
876 flow_rxq_flags_clear(struct rte_eth_dev *dev)
877 {
878         struct mlx5_priv *priv = dev->data->dev_private;
879         unsigned int i;
880
881         for (i = 0; i != priv->rxqs_n; ++i) {
882                 struct mlx5_rxq_ctrl *rxq_ctrl;
883                 unsigned int j;
884
885                 if (!(*priv->rxqs)[i])
886                         continue;
887                 rxq_ctrl = container_of((*priv->rxqs)[i],
888                                         struct mlx5_rxq_ctrl, rxq);
889                 rxq_ctrl->flow_mark_n = 0;
890                 rxq_ctrl->rxq.mark = 0;
891                 for (j = 0; j != MLX5_FLOW_TUNNEL; ++j)
892                         rxq_ctrl->flow_tunnels_n[j] = 0;
893                 rxq_ctrl->rxq.tunnel = 0;
894         }
895 }
896
897 /**
898  * Set the Rx queue dynamic metadata (mask and offset) for a flow
899  *
900  * @param[in] dev
901  *   Pointer to the Ethernet device structure.
902  */
903 void
904 mlx5_flow_rxq_dynf_metadata_set(struct rte_eth_dev *dev)
905 {
906         struct mlx5_priv *priv = dev->data->dev_private;
907         struct mlx5_rxq_data *data;
908         unsigned int i;
909
910         for (i = 0; i != priv->rxqs_n; ++i) {
911                 if (!(*priv->rxqs)[i])
912                         continue;
913                 data = (*priv->rxqs)[i];
914                 if (!rte_flow_dynf_metadata_avail()) {
915                         data->dynf_meta = 0;
916                         data->flow_meta_mask = 0;
917                         data->flow_meta_offset = -1;
918                 } else {
919                         data->dynf_meta = 1;
920                         data->flow_meta_mask = rte_flow_dynf_metadata_mask;
921                         data->flow_meta_offset = rte_flow_dynf_metadata_offs;
922                 }
923         }
924 }
925
926 /*
927  * return a pointer to the desired action in the list of actions.
928  *
929  * @param[in] actions
930  *   The list of actions to search the action in.
931  * @param[in] action
932  *   The action to find.
933  *
934  * @return
935  *   Pointer to the action in the list, if found. NULL otherwise.
936  */
937 const struct rte_flow_action *
938 mlx5_flow_find_action(const struct rte_flow_action *actions,
939                       enum rte_flow_action_type action)
940 {
941         if (actions == NULL)
942                 return NULL;
943         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++)
944                 if (actions->type == action)
945                         return actions;
946         return NULL;
947 }
948
949 /*
950  * Validate the flag action.
951  *
952  * @param[in] action_flags
953  *   Bit-fields that holds the actions detected until now.
954  * @param[in] attr
955  *   Attributes of flow that includes this action.
956  * @param[out] error
957  *   Pointer to error structure.
958  *
959  * @return
960  *   0 on success, a negative errno value otherwise and rte_errno is set.
961  */
962 int
963 mlx5_flow_validate_action_flag(uint64_t action_flags,
964                                const struct rte_flow_attr *attr,
965                                struct rte_flow_error *error)
966 {
967         if (action_flags & MLX5_FLOW_ACTION_MARK)
968                 return rte_flow_error_set(error, EINVAL,
969                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
970                                           "can't mark and flag in same flow");
971         if (action_flags & MLX5_FLOW_ACTION_FLAG)
972                 return rte_flow_error_set(error, EINVAL,
973                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
974                                           "can't have 2 flag"
975                                           " actions in same flow");
976         if (attr->egress)
977                 return rte_flow_error_set(error, ENOTSUP,
978                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
979                                           "flag action not supported for "
980                                           "egress");
981         return 0;
982 }
983
984 /*
985  * Validate the mark action.
986  *
987  * @param[in] action
988  *   Pointer to the queue action.
989  * @param[in] action_flags
990  *   Bit-fields that holds the actions detected until now.
991  * @param[in] attr
992  *   Attributes of flow that includes this action.
993  * @param[out] error
994  *   Pointer to error structure.
995  *
996  * @return
997  *   0 on success, a negative errno value otherwise and rte_errno is set.
998  */
999 int
1000 mlx5_flow_validate_action_mark(const struct rte_flow_action *action,
1001                                uint64_t action_flags,
1002                                const struct rte_flow_attr *attr,
1003                                struct rte_flow_error *error)
1004 {
1005         const struct rte_flow_action_mark *mark = action->conf;
1006
1007         if (!mark)
1008                 return rte_flow_error_set(error, EINVAL,
1009                                           RTE_FLOW_ERROR_TYPE_ACTION,
1010                                           action,
1011                                           "configuration cannot be null");
1012         if (mark->id >= MLX5_FLOW_MARK_MAX)
1013                 return rte_flow_error_set(error, EINVAL,
1014                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1015                                           &mark->id,
1016                                           "mark id must in 0 <= id < "
1017                                           RTE_STR(MLX5_FLOW_MARK_MAX));
1018         if (action_flags & MLX5_FLOW_ACTION_FLAG)
1019                 return rte_flow_error_set(error, EINVAL,
1020                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1021                                           "can't flag and mark in same flow");
1022         if (action_flags & MLX5_FLOW_ACTION_MARK)
1023                 return rte_flow_error_set(error, EINVAL,
1024                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1025                                           "can't have 2 mark actions in same"
1026                                           " flow");
1027         if (attr->egress)
1028                 return rte_flow_error_set(error, ENOTSUP,
1029                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1030                                           "mark action not supported for "
1031                                           "egress");
1032         return 0;
1033 }
1034
1035 /*
1036  * Validate the drop action.
1037  *
1038  * @param[in] action_flags
1039  *   Bit-fields that holds the actions detected until now.
1040  * @param[in] attr
1041  *   Attributes of flow that includes this action.
1042  * @param[out] error
1043  *   Pointer to error structure.
1044  *
1045  * @return
1046  *   0 on success, a negative errno value otherwise and rte_errno is set.
1047  */
1048 int
1049 mlx5_flow_validate_action_drop(uint64_t action_flags __rte_unused,
1050                                const struct rte_flow_attr *attr,
1051                                struct rte_flow_error *error)
1052 {
1053         if (attr->egress)
1054                 return rte_flow_error_set(error, ENOTSUP,
1055                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1056                                           "drop action not supported for "
1057                                           "egress");
1058         return 0;
1059 }
1060
1061 /*
1062  * Validate the queue action.
1063  *
1064  * @param[in] action
1065  *   Pointer to the queue action.
1066  * @param[in] action_flags
1067  *   Bit-fields that holds the actions detected until now.
1068  * @param[in] dev
1069  *   Pointer to the Ethernet device structure.
1070  * @param[in] attr
1071  *   Attributes of flow that includes this action.
1072  * @param[out] error
1073  *   Pointer to error structure.
1074  *
1075  * @return
1076  *   0 on success, a negative errno value otherwise and rte_errno is set.
1077  */
1078 int
1079 mlx5_flow_validate_action_queue(const struct rte_flow_action *action,
1080                                 uint64_t action_flags,
1081                                 struct rte_eth_dev *dev,
1082                                 const struct rte_flow_attr *attr,
1083                                 struct rte_flow_error *error)
1084 {
1085         struct mlx5_priv *priv = dev->data->dev_private;
1086         const struct rte_flow_action_queue *queue = action->conf;
1087
1088         if (action_flags & MLX5_FLOW_FATE_ACTIONS)
1089                 return rte_flow_error_set(error, EINVAL,
1090                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1091                                           "can't have 2 fate actions in"
1092                                           " same flow");
1093         if (!priv->rxqs_n)
1094                 return rte_flow_error_set(error, EINVAL,
1095                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1096                                           NULL, "No Rx queues configured");
1097         if (queue->index >= priv->rxqs_n)
1098                 return rte_flow_error_set(error, EINVAL,
1099                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1100                                           &queue->index,
1101                                           "queue index out of range");
1102         if (!(*priv->rxqs)[queue->index])
1103                 return rte_flow_error_set(error, EINVAL,
1104                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1105                                           &queue->index,
1106                                           "queue is not configured");
1107         if (attr->egress)
1108                 return rte_flow_error_set(error, ENOTSUP,
1109                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1110                                           "queue action not supported for "
1111                                           "egress");
1112         return 0;
1113 }
1114
1115 /*
1116  * Validate the rss action.
1117  *
1118  * @param[in] action
1119  *   Pointer to the queue action.
1120  * @param[in] action_flags
1121  *   Bit-fields that holds the actions detected until now.
1122  * @param[in] dev
1123  *   Pointer to the Ethernet device structure.
1124  * @param[in] attr
1125  *   Attributes of flow that includes this action.
1126  * @param[in] item_flags
1127  *   Items that were detected.
1128  * @param[out] error
1129  *   Pointer to error structure.
1130  *
1131  * @return
1132  *   0 on success, a negative errno value otherwise and rte_errno is set.
1133  */
1134 int
1135 mlx5_flow_validate_action_rss(const struct rte_flow_action *action,
1136                               uint64_t action_flags,
1137                               struct rte_eth_dev *dev,
1138                               const struct rte_flow_attr *attr,
1139                               uint64_t item_flags,
1140                               struct rte_flow_error *error)
1141 {
1142         struct mlx5_priv *priv = dev->data->dev_private;
1143         const struct rte_flow_action_rss *rss = action->conf;
1144         int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1145         unsigned int i;
1146
1147         if (action_flags & MLX5_FLOW_FATE_ACTIONS)
1148                 return rte_flow_error_set(error, EINVAL,
1149                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1150                                           "can't have 2 fate actions"
1151                                           " in same flow");
1152         if (rss->func != RTE_ETH_HASH_FUNCTION_DEFAULT &&
1153             rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ)
1154                 return rte_flow_error_set(error, ENOTSUP,
1155                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1156                                           &rss->func,
1157                                           "RSS hash function not supported");
1158 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
1159         if (rss->level > 2)
1160 #else
1161         if (rss->level > 1)
1162 #endif
1163                 return rte_flow_error_set(error, ENOTSUP,
1164                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1165                                           &rss->level,
1166                                           "tunnel RSS is not supported");
1167         /* allow RSS key_len 0 in case of NULL (default) RSS key. */
1168         if (rss->key_len == 0 && rss->key != NULL)
1169                 return rte_flow_error_set(error, ENOTSUP,
1170                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1171                                           &rss->key_len,
1172                                           "RSS hash key length 0");
1173         if (rss->key_len > 0 && rss->key_len < MLX5_RSS_HASH_KEY_LEN)
1174                 return rte_flow_error_set(error, ENOTSUP,
1175                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1176                                           &rss->key_len,
1177                                           "RSS hash key too small");
1178         if (rss->key_len > MLX5_RSS_HASH_KEY_LEN)
1179                 return rte_flow_error_set(error, ENOTSUP,
1180                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1181                                           &rss->key_len,
1182                                           "RSS hash key too large");
1183         if (rss->queue_num > priv->config.ind_table_max_size)
1184                 return rte_flow_error_set(error, ENOTSUP,
1185                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1186                                           &rss->queue_num,
1187                                           "number of queues too large");
1188         if (rss->types & MLX5_RSS_HF_MASK)
1189                 return rte_flow_error_set(error, ENOTSUP,
1190                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1191                                           &rss->types,
1192                                           "some RSS protocols are not"
1193                                           " supported");
1194         if ((rss->types & (ETH_RSS_L3_SRC_ONLY | ETH_RSS_L3_DST_ONLY)) &&
1195             !(rss->types & ETH_RSS_IP))
1196                 return rte_flow_error_set(error, EINVAL,
1197                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
1198                                           "L3 partial RSS requested but L3 RSS"
1199                                           " type not specified");
1200         if ((rss->types & (ETH_RSS_L4_SRC_ONLY | ETH_RSS_L4_DST_ONLY)) &&
1201             !(rss->types & (ETH_RSS_UDP | ETH_RSS_TCP)))
1202                 return rte_flow_error_set(error, EINVAL,
1203                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
1204                                           "L4 partial RSS requested but L4 RSS"
1205                                           " type not specified");
1206         if (!priv->rxqs_n)
1207                 return rte_flow_error_set(error, EINVAL,
1208                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1209                                           NULL, "No Rx queues configured");
1210         if (!rss->queue_num)
1211                 return rte_flow_error_set(error, EINVAL,
1212                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1213                                           NULL, "No queues configured");
1214         for (i = 0; i != rss->queue_num; ++i) {
1215                 if (rss->queue[i] >= priv->rxqs_n)
1216                         return rte_flow_error_set
1217                                 (error, EINVAL,
1218                                  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1219                                  &rss->queue[i], "queue index out of range");
1220                 if (!(*priv->rxqs)[rss->queue[i]])
1221                         return rte_flow_error_set
1222                                 (error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1223                                  &rss->queue[i], "queue is not configured");
1224         }
1225         if (attr->egress)
1226                 return rte_flow_error_set(error, ENOTSUP,
1227                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1228                                           "rss action not supported for "
1229                                           "egress");
1230         if (rss->level > 1 && !tunnel)
1231                 return rte_flow_error_set(error, EINVAL,
1232                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
1233                                           "inner RSS is not supported for "
1234                                           "non-tunnel flows");
1235         if ((item_flags & MLX5_FLOW_LAYER_ECPRI) &&
1236             !(item_flags & MLX5_FLOW_LAYER_INNER_L4_UDP)) {
1237                 return rte_flow_error_set(error, EINVAL,
1238                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
1239                                           "RSS on eCPRI is not supported now");
1240         }
1241         return 0;
1242 }
1243
1244 /*
1245  * Validate the default miss action.
1246  *
1247  * @param[in] action_flags
1248  *   Bit-fields that holds the actions detected until now.
1249  * @param[out] error
1250  *   Pointer to error structure.
1251  *
1252  * @return
1253  *   0 on success, a negative errno value otherwise and rte_errno is set.
1254  */
1255 int
1256 mlx5_flow_validate_action_default_miss(uint64_t action_flags,
1257                                 const struct rte_flow_attr *attr,
1258                                 struct rte_flow_error *error)
1259 {
1260         if (action_flags & MLX5_FLOW_FATE_ACTIONS)
1261                 return rte_flow_error_set(error, EINVAL,
1262                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1263                                           "can't have 2 fate actions in"
1264                                           " same flow");
1265         if (attr->egress)
1266                 return rte_flow_error_set(error, ENOTSUP,
1267                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1268                                           "default miss action not supported "
1269                                           "for egress");
1270         if (attr->group)
1271                 return rte_flow_error_set(error, ENOTSUP,
1272                                           RTE_FLOW_ERROR_TYPE_ATTR_GROUP, NULL,
1273                                           "only group 0 is supported");
1274         if (attr->transfer)
1275                 return rte_flow_error_set(error, ENOTSUP,
1276                                           RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
1277                                           NULL, "transfer is not supported");
1278         return 0;
1279 }
1280
1281 /*
1282  * Validate the count action.
1283  *
1284  * @param[in] dev
1285  *   Pointer to the Ethernet device structure.
1286  * @param[in] attr
1287  *   Attributes of flow that includes this action.
1288  * @param[out] error
1289  *   Pointer to error structure.
1290  *
1291  * @return
1292  *   0 on success, a negative errno value otherwise and rte_errno is set.
1293  */
1294 int
1295 mlx5_flow_validate_action_count(struct rte_eth_dev *dev __rte_unused,
1296                                 const struct rte_flow_attr *attr,
1297                                 struct rte_flow_error *error)
1298 {
1299         if (attr->egress)
1300                 return rte_flow_error_set(error, ENOTSUP,
1301                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1302                                           "count action not supported for "
1303                                           "egress");
1304         return 0;
1305 }
1306
1307 /**
1308  * Verify the @p attributes will be correctly understood by the NIC and store
1309  * them in the @p flow if everything is correct.
1310  *
1311  * @param[in] dev
1312  *   Pointer to the Ethernet device structure.
1313  * @param[in] attributes
1314  *   Pointer to flow attributes
1315  * @param[out] error
1316  *   Pointer to error structure.
1317  *
1318  * @return
1319  *   0 on success, a negative errno value otherwise and rte_errno is set.
1320  */
1321 int
1322 mlx5_flow_validate_attributes(struct rte_eth_dev *dev,
1323                               const struct rte_flow_attr *attributes,
1324                               struct rte_flow_error *error)
1325 {
1326         struct mlx5_priv *priv = dev->data->dev_private;
1327         uint32_t priority_max = priv->config.flow_prio - 1;
1328
1329         if (attributes->group)
1330                 return rte_flow_error_set(error, ENOTSUP,
1331                                           RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
1332                                           NULL, "groups is not supported");
1333         if (attributes->priority != MLX5_FLOW_PRIO_RSVD &&
1334             attributes->priority >= priority_max)
1335                 return rte_flow_error_set(error, ENOTSUP,
1336                                           RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
1337                                           NULL, "priority out of range");
1338         if (attributes->egress)
1339                 return rte_flow_error_set(error, ENOTSUP,
1340                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1341                                           "egress is not supported");
1342         if (attributes->transfer && !priv->config.dv_esw_en)
1343                 return rte_flow_error_set(error, ENOTSUP,
1344                                           RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
1345                                           NULL, "transfer is not supported");
1346         if (!attributes->ingress)
1347                 return rte_flow_error_set(error, EINVAL,
1348                                           RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
1349                                           NULL,
1350                                           "ingress attribute is mandatory");
1351         return 0;
1352 }
1353
1354 /**
1355  * Validate ICMP6 item.
1356  *
1357  * @param[in] item
1358  *   Item specification.
1359  * @param[in] item_flags
1360  *   Bit-fields that holds the items detected until now.
1361  * @param[out] error
1362  *   Pointer to error structure.
1363  *
1364  * @return
1365  *   0 on success, a negative errno value otherwise and rte_errno is set.
1366  */
1367 int
1368 mlx5_flow_validate_item_icmp6(const struct rte_flow_item *item,
1369                                uint64_t item_flags,
1370                                uint8_t target_protocol,
1371                                struct rte_flow_error *error)
1372 {
1373         const struct rte_flow_item_icmp6 *mask = item->mask;
1374         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1375         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
1376                                       MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1377         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
1378                                       MLX5_FLOW_LAYER_OUTER_L4;
1379         int ret;
1380
1381         if (target_protocol != 0xFF && target_protocol != IPPROTO_ICMPV6)
1382                 return rte_flow_error_set(error, EINVAL,
1383                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1384                                           "protocol filtering not compatible"
1385                                           " with ICMP6 layer");
1386         if (!(item_flags & l3m))
1387                 return rte_flow_error_set(error, EINVAL,
1388                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1389                                           "IPv6 is mandatory to filter on"
1390                                           " ICMP6");
1391         if (item_flags & l4m)
1392                 return rte_flow_error_set(error, EINVAL,
1393                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1394                                           "multiple L4 layers not supported");
1395         if (!mask)
1396                 mask = &rte_flow_item_icmp6_mask;
1397         ret = mlx5_flow_item_acceptable
1398                 (item, (const uint8_t *)mask,
1399                  (const uint8_t *)&rte_flow_item_icmp6_mask,
1400                  sizeof(struct rte_flow_item_icmp6), error);
1401         if (ret < 0)
1402                 return ret;
1403         return 0;
1404 }
1405
1406 /**
1407  * Validate ICMP item.
1408  *
1409  * @param[in] item
1410  *   Item specification.
1411  * @param[in] item_flags
1412  *   Bit-fields that holds the items detected until now.
1413  * @param[out] error
1414  *   Pointer to error structure.
1415  *
1416  * @return
1417  *   0 on success, a negative errno value otherwise and rte_errno is set.
1418  */
1419 int
1420 mlx5_flow_validate_item_icmp(const struct rte_flow_item *item,
1421                              uint64_t item_flags,
1422                              uint8_t target_protocol,
1423                              struct rte_flow_error *error)
1424 {
1425         const struct rte_flow_item_icmp *mask = item->mask;
1426         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1427         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
1428                                       MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1429         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
1430                                       MLX5_FLOW_LAYER_OUTER_L4;
1431         int ret;
1432
1433         if (target_protocol != 0xFF && target_protocol != IPPROTO_ICMP)
1434                 return rte_flow_error_set(error, EINVAL,
1435                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1436                                           "protocol filtering not compatible"
1437                                           " with ICMP layer");
1438         if (!(item_flags & l3m))
1439                 return rte_flow_error_set(error, EINVAL,
1440                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1441                                           "IPv4 is mandatory to filter"
1442                                           " on ICMP");
1443         if (item_flags & l4m)
1444                 return rte_flow_error_set(error, EINVAL,
1445                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1446                                           "multiple L4 layers not supported");
1447         if (!mask)
1448                 mask = &rte_flow_item_icmp_mask;
1449         ret = mlx5_flow_item_acceptable
1450                 (item, (const uint8_t *)mask,
1451                  (const uint8_t *)&rte_flow_item_icmp_mask,
1452                  sizeof(struct rte_flow_item_icmp), error);
1453         if (ret < 0)
1454                 return ret;
1455         return 0;
1456 }
1457
1458 /**
1459  * Validate Ethernet item.
1460  *
1461  * @param[in] item
1462  *   Item specification.
1463  * @param[in] item_flags
1464  *   Bit-fields that holds the items detected until now.
1465  * @param[out] error
1466  *   Pointer to error structure.
1467  *
1468  * @return
1469  *   0 on success, a negative errno value otherwise and rte_errno is set.
1470  */
1471 int
1472 mlx5_flow_validate_item_eth(const struct rte_flow_item *item,
1473                             uint64_t item_flags,
1474                             struct rte_flow_error *error)
1475 {
1476         const struct rte_flow_item_eth *mask = item->mask;
1477         const struct rte_flow_item_eth nic_mask = {
1478                 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1479                 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1480                 .type = RTE_BE16(0xffff),
1481         };
1482         int ret;
1483         int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1484         const uint64_t ethm = tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
1485                                        MLX5_FLOW_LAYER_OUTER_L2;
1486
1487         if (item_flags & ethm)
1488                 return rte_flow_error_set(error, ENOTSUP,
1489                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1490                                           "multiple L2 layers not supported");
1491         if ((!tunnel && (item_flags & MLX5_FLOW_LAYER_OUTER_L3)) ||
1492             (tunnel && (item_flags & MLX5_FLOW_LAYER_INNER_L3)))
1493                 return rte_flow_error_set(error, EINVAL,
1494                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1495                                           "L2 layer should not follow "
1496                                           "L3 layers");
1497         if ((!tunnel && (item_flags & MLX5_FLOW_LAYER_OUTER_VLAN)) ||
1498             (tunnel && (item_flags & MLX5_FLOW_LAYER_INNER_VLAN)))
1499                 return rte_flow_error_set(error, EINVAL,
1500                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1501                                           "L2 layer should not follow VLAN");
1502         if (!mask)
1503                 mask = &rte_flow_item_eth_mask;
1504         ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
1505                                         (const uint8_t *)&nic_mask,
1506                                         sizeof(struct rte_flow_item_eth),
1507                                         error);
1508         return ret;
1509 }
1510
1511 /**
1512  * Validate VLAN item.
1513  *
1514  * @param[in] item
1515  *   Item specification.
1516  * @param[in] item_flags
1517  *   Bit-fields that holds the items detected until now.
1518  * @param[in] dev
1519  *   Ethernet device flow is being created on.
1520  * @param[out] error
1521  *   Pointer to error structure.
1522  *
1523  * @return
1524  *   0 on success, a negative errno value otherwise and rte_errno is set.
1525  */
1526 int
1527 mlx5_flow_validate_item_vlan(const struct rte_flow_item *item,
1528                              uint64_t item_flags,
1529                              struct rte_eth_dev *dev,
1530                              struct rte_flow_error *error)
1531 {
1532         const struct rte_flow_item_vlan *spec = item->spec;
1533         const struct rte_flow_item_vlan *mask = item->mask;
1534         const struct rte_flow_item_vlan nic_mask = {
1535                 .tci = RTE_BE16(UINT16_MAX),
1536                 .inner_type = RTE_BE16(UINT16_MAX),
1537         };
1538         uint16_t vlan_tag = 0;
1539         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1540         int ret;
1541         const uint64_t l34m = tunnel ? (MLX5_FLOW_LAYER_INNER_L3 |
1542                                         MLX5_FLOW_LAYER_INNER_L4) :
1543                                        (MLX5_FLOW_LAYER_OUTER_L3 |
1544                                         MLX5_FLOW_LAYER_OUTER_L4);
1545         const uint64_t vlanm = tunnel ? MLX5_FLOW_LAYER_INNER_VLAN :
1546                                         MLX5_FLOW_LAYER_OUTER_VLAN;
1547
1548         if (item_flags & vlanm)
1549                 return rte_flow_error_set(error, EINVAL,
1550                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1551                                           "multiple VLAN layers not supported");
1552         else if ((item_flags & l34m) != 0)
1553                 return rte_flow_error_set(error, EINVAL,
1554                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1555                                           "VLAN cannot follow L3/L4 layer");
1556         if (!mask)
1557                 mask = &rte_flow_item_vlan_mask;
1558         ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
1559                                         (const uint8_t *)&nic_mask,
1560                                         sizeof(struct rte_flow_item_vlan),
1561                                         error);
1562         if (ret)
1563                 return ret;
1564         if (!tunnel && mask->tci != RTE_BE16(0x0fff)) {
1565                 struct mlx5_priv *priv = dev->data->dev_private;
1566
1567                 if (priv->vmwa_context) {
1568                         /*
1569                          * Non-NULL context means we have a virtual machine
1570                          * and SR-IOV enabled, we have to create VLAN interface
1571                          * to make hypervisor to setup E-Switch vport
1572                          * context correctly. We avoid creating the multiple
1573                          * VLAN interfaces, so we cannot support VLAN tag mask.
1574                          */
1575                         return rte_flow_error_set(error, EINVAL,
1576                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1577                                                   item,
1578                                                   "VLAN tag mask is not"
1579                                                   " supported in virtual"
1580                                                   " environment");
1581                 }
1582         }
1583         if (spec) {
1584                 vlan_tag = spec->tci;
1585                 vlan_tag &= mask->tci;
1586         }
1587         /*
1588          * From verbs perspective an empty VLAN is equivalent
1589          * to a packet without VLAN layer.
1590          */
1591         if (!vlan_tag)
1592                 return rte_flow_error_set(error, EINVAL,
1593                                           RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
1594                                           item->spec,
1595                                           "VLAN cannot be empty");
1596         return 0;
1597 }
1598
1599 /**
1600  * Validate IPV4 item.
1601  *
1602  * @param[in] item
1603  *   Item specification.
1604  * @param[in] item_flags
1605  *   Bit-fields that holds the items detected until now.
1606  * @param[in] last_item
1607  *   Previous validated item in the pattern items.
1608  * @param[in] ether_type
1609  *   Type in the ethernet layer header (including dot1q).
1610  * @param[in] acc_mask
1611  *   Acceptable mask, if NULL default internal default mask
1612  *   will be used to check whether item fields are supported.
1613  * @param[out] error
1614  *   Pointer to error structure.
1615  *
1616  * @return
1617  *   0 on success, a negative errno value otherwise and rte_errno is set.
1618  */
1619 int
1620 mlx5_flow_validate_item_ipv4(const struct rte_flow_item *item,
1621                              uint64_t item_flags,
1622                              uint64_t last_item,
1623                              uint16_t ether_type,
1624                              const struct rte_flow_item_ipv4 *acc_mask,
1625                              struct rte_flow_error *error)
1626 {
1627         const struct rte_flow_item_ipv4 *mask = item->mask;
1628         const struct rte_flow_item_ipv4 *spec = item->spec;
1629         const struct rte_flow_item_ipv4 nic_mask = {
1630                 .hdr = {
1631                         .src_addr = RTE_BE32(0xffffffff),
1632                         .dst_addr = RTE_BE32(0xffffffff),
1633                         .type_of_service = 0xff,
1634                         .next_proto_id = 0xff,
1635                 },
1636         };
1637         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1638         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
1639                                       MLX5_FLOW_LAYER_OUTER_L3;
1640         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
1641                                       MLX5_FLOW_LAYER_OUTER_L4;
1642         int ret;
1643         uint8_t next_proto = 0xFF;
1644         const uint64_t l2_vlan = (MLX5_FLOW_LAYER_L2 |
1645                                   MLX5_FLOW_LAYER_OUTER_VLAN |
1646                                   MLX5_FLOW_LAYER_INNER_VLAN);
1647
1648         if ((last_item & l2_vlan) && ether_type &&
1649             ether_type != RTE_ETHER_TYPE_IPV4)
1650                 return rte_flow_error_set(error, EINVAL,
1651                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1652                                           "IPv4 cannot follow L2/VLAN layer "
1653                                           "which ether type is not IPv4");
1654         if (item_flags & MLX5_FLOW_LAYER_IPIP) {
1655                 if (mask && spec)
1656                         next_proto = mask->hdr.next_proto_id &
1657                                      spec->hdr.next_proto_id;
1658                 if (next_proto == IPPROTO_IPIP || next_proto == IPPROTO_IPV6)
1659                         return rte_flow_error_set(error, EINVAL,
1660                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1661                                                   item,
1662                                                   "multiple tunnel "
1663                                                   "not supported");
1664         }
1665         if (item_flags & MLX5_FLOW_LAYER_IPV6_ENCAP)
1666                 return rte_flow_error_set(error, EINVAL,
1667                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1668                                           "wrong tunnel type - IPv6 specified "
1669                                           "but IPv4 item provided");
1670         if (item_flags & l3m)
1671                 return rte_flow_error_set(error, ENOTSUP,
1672                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1673                                           "multiple L3 layers not supported");
1674         else if (item_flags & l4m)
1675                 return rte_flow_error_set(error, EINVAL,
1676                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1677                                           "L3 cannot follow an L4 layer.");
1678         else if ((item_flags & MLX5_FLOW_LAYER_NVGRE) &&
1679                   !(item_flags & MLX5_FLOW_LAYER_INNER_L2))
1680                 return rte_flow_error_set(error, EINVAL,
1681                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1682                                           "L3 cannot follow an NVGRE layer.");
1683         if (!mask)
1684                 mask = &rte_flow_item_ipv4_mask;
1685         else if (mask->hdr.next_proto_id != 0 &&
1686                  mask->hdr.next_proto_id != 0xff)
1687                 return rte_flow_error_set(error, EINVAL,
1688                                           RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
1689                                           "partial mask is not supported"
1690                                           " for protocol");
1691         ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
1692                                         acc_mask ? (const uint8_t *)acc_mask
1693                                                  : (const uint8_t *)&nic_mask,
1694                                         sizeof(struct rte_flow_item_ipv4),
1695                                         error);
1696         if (ret < 0)
1697                 return ret;
1698         return 0;
1699 }
1700
1701 /**
1702  * Validate IPV6 item.
1703  *
1704  * @param[in] item
1705  *   Item specification.
1706  * @param[in] item_flags
1707  *   Bit-fields that holds the items detected until now.
1708  * @param[in] last_item
1709  *   Previous validated item in the pattern items.
1710  * @param[in] ether_type
1711  *   Type in the ethernet layer header (including dot1q).
1712  * @param[in] acc_mask
1713  *   Acceptable mask, if NULL default internal default mask
1714  *   will be used to check whether item fields are supported.
1715  * @param[out] error
1716  *   Pointer to error structure.
1717  *
1718  * @return
1719  *   0 on success, a negative errno value otherwise and rte_errno is set.
1720  */
1721 int
1722 mlx5_flow_validate_item_ipv6(const struct rte_flow_item *item,
1723                              uint64_t item_flags,
1724                              uint64_t last_item,
1725                              uint16_t ether_type,
1726                              const struct rte_flow_item_ipv6 *acc_mask,
1727                              struct rte_flow_error *error)
1728 {
1729         const struct rte_flow_item_ipv6 *mask = item->mask;
1730         const struct rte_flow_item_ipv6 *spec = item->spec;
1731         const struct rte_flow_item_ipv6 nic_mask = {
1732                 .hdr = {
1733                         .src_addr =
1734                                 "\xff\xff\xff\xff\xff\xff\xff\xff"
1735                                 "\xff\xff\xff\xff\xff\xff\xff\xff",
1736                         .dst_addr =
1737                                 "\xff\xff\xff\xff\xff\xff\xff\xff"
1738                                 "\xff\xff\xff\xff\xff\xff\xff\xff",
1739                         .vtc_flow = RTE_BE32(0xffffffff),
1740                         .proto = 0xff,
1741                 },
1742         };
1743         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1744         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
1745                                       MLX5_FLOW_LAYER_OUTER_L3;
1746         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
1747                                       MLX5_FLOW_LAYER_OUTER_L4;
1748         int ret;
1749         uint8_t next_proto = 0xFF;
1750         const uint64_t l2_vlan = (MLX5_FLOW_LAYER_L2 |
1751                                   MLX5_FLOW_LAYER_OUTER_VLAN |
1752                                   MLX5_FLOW_LAYER_INNER_VLAN);
1753
1754         if ((last_item & l2_vlan) && ether_type &&
1755             ether_type != RTE_ETHER_TYPE_IPV6)
1756                 return rte_flow_error_set(error, EINVAL,
1757                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1758                                           "IPv6 cannot follow L2/VLAN layer "
1759                                           "which ether type is not IPv6");
1760         if (item_flags & MLX5_FLOW_LAYER_IPV6_ENCAP) {
1761                 if (mask && spec)
1762                         next_proto = mask->hdr.proto & spec->hdr.proto;
1763                 if (next_proto == IPPROTO_IPIP || next_proto == IPPROTO_IPV6)
1764                         return rte_flow_error_set(error, EINVAL,
1765                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1766                                                   item,
1767                                                   "multiple tunnel "
1768                                                   "not supported");
1769         }
1770         if (item_flags & MLX5_FLOW_LAYER_IPIP)
1771                 return rte_flow_error_set(error, EINVAL,
1772                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1773                                           "wrong tunnel type - IPv4 specified "
1774                                           "but IPv6 item provided");
1775         if (item_flags & l3m)
1776                 return rte_flow_error_set(error, ENOTSUP,
1777                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1778                                           "multiple L3 layers not supported");
1779         else if (item_flags & l4m)
1780                 return rte_flow_error_set(error, EINVAL,
1781                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1782                                           "L3 cannot follow an L4 layer.");
1783         else if ((item_flags & MLX5_FLOW_LAYER_NVGRE) &&
1784                   !(item_flags & MLX5_FLOW_LAYER_INNER_L2))
1785                 return rte_flow_error_set(error, EINVAL,
1786                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1787                                           "L3 cannot follow an NVGRE layer.");
1788         if (!mask)
1789                 mask = &rte_flow_item_ipv6_mask;
1790         ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
1791                                         acc_mask ? (const uint8_t *)acc_mask
1792                                                  : (const uint8_t *)&nic_mask,
1793                                         sizeof(struct rte_flow_item_ipv6),
1794                                         error);
1795         if (ret < 0)
1796                 return ret;
1797         return 0;
1798 }
1799
1800 /**
1801  * Validate UDP item.
1802  *
1803  * @param[in] item
1804  *   Item specification.
1805  * @param[in] item_flags
1806  *   Bit-fields that holds the items detected until now.
1807  * @param[in] target_protocol
1808  *   The next protocol in the previous item.
1809  * @param[in] flow_mask
1810  *   mlx5 flow-specific (DV, verbs, etc.) supported header fields mask.
1811  * @param[out] error
1812  *   Pointer to error structure.
1813  *
1814  * @return
1815  *   0 on success, a negative errno value otherwise and rte_errno is set.
1816  */
1817 int
1818 mlx5_flow_validate_item_udp(const struct rte_flow_item *item,
1819                             uint64_t item_flags,
1820                             uint8_t target_protocol,
1821                             struct rte_flow_error *error)
1822 {
1823         const struct rte_flow_item_udp *mask = item->mask;
1824         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1825         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
1826                                       MLX5_FLOW_LAYER_OUTER_L3;
1827         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
1828                                       MLX5_FLOW_LAYER_OUTER_L4;
1829         int ret;
1830
1831         if (target_protocol != 0xff && target_protocol != IPPROTO_UDP)
1832                 return rte_flow_error_set(error, EINVAL,
1833                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1834                                           "protocol filtering not compatible"
1835                                           " with UDP layer");
1836         if (!(item_flags & l3m))
1837                 return rte_flow_error_set(error, EINVAL,
1838                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1839                                           "L3 is mandatory to filter on L4");
1840         if (item_flags & l4m)
1841                 return rte_flow_error_set(error, EINVAL,
1842                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1843                                           "multiple L4 layers not supported");
1844         if (!mask)
1845                 mask = &rte_flow_item_udp_mask;
1846         ret = mlx5_flow_item_acceptable
1847                 (item, (const uint8_t *)mask,
1848                  (const uint8_t *)&rte_flow_item_udp_mask,
1849                  sizeof(struct rte_flow_item_udp), error);
1850         if (ret < 0)
1851                 return ret;
1852         return 0;
1853 }
1854
1855 /**
1856  * Validate TCP item.
1857  *
1858  * @param[in] item
1859  *   Item specification.
1860  * @param[in] item_flags
1861  *   Bit-fields that holds the items detected until now.
1862  * @param[in] target_protocol
1863  *   The next protocol in the previous item.
1864  * @param[out] error
1865  *   Pointer to error structure.
1866  *
1867  * @return
1868  *   0 on success, a negative errno value otherwise and rte_errno is set.
1869  */
1870 int
1871 mlx5_flow_validate_item_tcp(const struct rte_flow_item *item,
1872                             uint64_t item_flags,
1873                             uint8_t target_protocol,
1874                             const struct rte_flow_item_tcp *flow_mask,
1875                             struct rte_flow_error *error)
1876 {
1877         const struct rte_flow_item_tcp *mask = item->mask;
1878         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1879         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
1880                                       MLX5_FLOW_LAYER_OUTER_L3;
1881         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
1882                                       MLX5_FLOW_LAYER_OUTER_L4;
1883         int ret;
1884
1885         MLX5_ASSERT(flow_mask);
1886         if (target_protocol != 0xff && target_protocol != IPPROTO_TCP)
1887                 return rte_flow_error_set(error, EINVAL,
1888                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1889                                           "protocol filtering not compatible"
1890                                           " with TCP layer");
1891         if (!(item_flags & l3m))
1892                 return rte_flow_error_set(error, EINVAL,
1893                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1894                                           "L3 is mandatory to filter on L4");
1895         if (item_flags & l4m)
1896                 return rte_flow_error_set(error, EINVAL,
1897                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1898                                           "multiple L4 layers not supported");
1899         if (!mask)
1900                 mask = &rte_flow_item_tcp_mask;
1901         ret = mlx5_flow_item_acceptable
1902                 (item, (const uint8_t *)mask,
1903                  (const uint8_t *)flow_mask,
1904                  sizeof(struct rte_flow_item_tcp), error);
1905         if (ret < 0)
1906                 return ret;
1907         return 0;
1908 }
1909
1910 /**
1911  * Validate VXLAN item.
1912  *
1913  * @param[in] item
1914  *   Item specification.
1915  * @param[in] item_flags
1916  *   Bit-fields that holds the items detected until now.
1917  * @param[in] target_protocol
1918  *   The next protocol in the previous item.
1919  * @param[out] error
1920  *   Pointer to error structure.
1921  *
1922  * @return
1923  *   0 on success, a negative errno value otherwise and rte_errno is set.
1924  */
1925 int
1926 mlx5_flow_validate_item_vxlan(const struct rte_flow_item *item,
1927                               uint64_t item_flags,
1928                               struct rte_flow_error *error)
1929 {
1930         const struct rte_flow_item_vxlan *spec = item->spec;
1931         const struct rte_flow_item_vxlan *mask = item->mask;
1932         int ret;
1933         union vni {
1934                 uint32_t vlan_id;
1935                 uint8_t vni[4];
1936         } id = { .vlan_id = 0, };
1937
1938
1939         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
1940                 return rte_flow_error_set(error, ENOTSUP,
1941                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1942                                           "multiple tunnel layers not"
1943                                           " supported");
1944         /*
1945          * Verify only UDPv4 is present as defined in
1946          * https://tools.ietf.org/html/rfc7348
1947          */
1948         if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
1949                 return rte_flow_error_set(error, EINVAL,
1950                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1951                                           "no outer UDP layer found");
1952         if (!mask)
1953                 mask = &rte_flow_item_vxlan_mask;
1954         ret = mlx5_flow_item_acceptable
1955                 (item, (const uint8_t *)mask,
1956                  (const uint8_t *)&rte_flow_item_vxlan_mask,
1957                  sizeof(struct rte_flow_item_vxlan),
1958                  error);
1959         if (ret < 0)
1960                 return ret;
1961         if (spec) {
1962                 memcpy(&id.vni[1], spec->vni, 3);
1963                 memcpy(&id.vni[1], mask->vni, 3);
1964         }
1965         if (!(item_flags & MLX5_FLOW_LAYER_OUTER))
1966                 return rte_flow_error_set(error, ENOTSUP,
1967                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1968                                           "VXLAN tunnel must be fully defined");
1969         return 0;
1970 }
1971
1972 /**
1973  * Validate VXLAN_GPE item.
1974  *
1975  * @param[in] item
1976  *   Item specification.
1977  * @param[in] item_flags
1978  *   Bit-fields that holds the items detected until now.
1979  * @param[in] priv
1980  *   Pointer to the private data structure.
1981  * @param[in] target_protocol
1982  *   The next protocol in the previous item.
1983  * @param[out] error
1984  *   Pointer to error structure.
1985  *
1986  * @return
1987  *   0 on success, a negative errno value otherwise and rte_errno is set.
1988  */
1989 int
1990 mlx5_flow_validate_item_vxlan_gpe(const struct rte_flow_item *item,
1991                                   uint64_t item_flags,
1992                                   struct rte_eth_dev *dev,
1993                                   struct rte_flow_error *error)
1994 {
1995         struct mlx5_priv *priv = dev->data->dev_private;
1996         const struct rte_flow_item_vxlan_gpe *spec = item->spec;
1997         const struct rte_flow_item_vxlan_gpe *mask = item->mask;
1998         int ret;
1999         union vni {
2000                 uint32_t vlan_id;
2001                 uint8_t vni[4];
2002         } id = { .vlan_id = 0, };
2003
2004         if (!priv->config.l3_vxlan_en)
2005                 return rte_flow_error_set(error, ENOTSUP,
2006                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2007                                           "L3 VXLAN is not enabled by device"
2008                                           " parameter and/or not configured in"
2009                                           " firmware");
2010         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2011                 return rte_flow_error_set(error, ENOTSUP,
2012                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2013                                           "multiple tunnel layers not"
2014                                           " supported");
2015         /*
2016          * Verify only UDPv4 is present as defined in
2017          * https://tools.ietf.org/html/rfc7348
2018          */
2019         if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
2020                 return rte_flow_error_set(error, EINVAL,
2021                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2022                                           "no outer UDP layer found");
2023         if (!mask)
2024                 mask = &rte_flow_item_vxlan_gpe_mask;
2025         ret = mlx5_flow_item_acceptable
2026                 (item, (const uint8_t *)mask,
2027                  (const uint8_t *)&rte_flow_item_vxlan_gpe_mask,
2028                  sizeof(struct rte_flow_item_vxlan_gpe),
2029                  error);
2030         if (ret < 0)
2031                 return ret;
2032         if (spec) {
2033                 if (spec->protocol)
2034                         return rte_flow_error_set(error, ENOTSUP,
2035                                                   RTE_FLOW_ERROR_TYPE_ITEM,
2036                                                   item,
2037                                                   "VxLAN-GPE protocol"
2038                                                   " not supported");
2039                 memcpy(&id.vni[1], spec->vni, 3);
2040                 memcpy(&id.vni[1], mask->vni, 3);
2041         }
2042         if (!(item_flags & MLX5_FLOW_LAYER_OUTER))
2043                 return rte_flow_error_set(error, ENOTSUP,
2044                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2045                                           "VXLAN-GPE tunnel must be fully"
2046                                           " defined");
2047         return 0;
2048 }
2049 /**
2050  * Validate GRE Key item.
2051  *
2052  * @param[in] item
2053  *   Item specification.
2054  * @param[in] item_flags
2055  *   Bit flags to mark detected items.
2056  * @param[in] gre_item
2057  *   Pointer to gre_item
2058  * @param[out] error
2059  *   Pointer to error structure.
2060  *
2061  * @return
2062  *   0 on success, a negative errno value otherwise and rte_errno is set.
2063  */
2064 int
2065 mlx5_flow_validate_item_gre_key(const struct rte_flow_item *item,
2066                                 uint64_t item_flags,
2067                                 const struct rte_flow_item *gre_item,
2068                                 struct rte_flow_error *error)
2069 {
2070         const rte_be32_t *mask = item->mask;
2071         int ret = 0;
2072         rte_be32_t gre_key_default_mask = RTE_BE32(UINT32_MAX);
2073         const struct rte_flow_item_gre *gre_spec;
2074         const struct rte_flow_item_gre *gre_mask;
2075
2076         if (item_flags & MLX5_FLOW_LAYER_GRE_KEY)
2077                 return rte_flow_error_set(error, ENOTSUP,
2078                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2079                                           "Multiple GRE key not support");
2080         if (!(item_flags & MLX5_FLOW_LAYER_GRE))
2081                 return rte_flow_error_set(error, ENOTSUP,
2082                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2083                                           "No preceding GRE header");
2084         if (item_flags & MLX5_FLOW_LAYER_INNER)
2085                 return rte_flow_error_set(error, ENOTSUP,
2086                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2087                                           "GRE key following a wrong item");
2088         gre_mask = gre_item->mask;
2089         if (!gre_mask)
2090                 gre_mask = &rte_flow_item_gre_mask;
2091         gre_spec = gre_item->spec;
2092         if (gre_spec && (gre_mask->c_rsvd0_ver & RTE_BE16(0x2000)) &&
2093                          !(gre_spec->c_rsvd0_ver & RTE_BE16(0x2000)))
2094                 return rte_flow_error_set(error, EINVAL,
2095                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2096                                           "Key bit must be on");
2097
2098         if (!mask)
2099                 mask = &gre_key_default_mask;
2100         ret = mlx5_flow_item_acceptable
2101                 (item, (const uint8_t *)mask,
2102                  (const uint8_t *)&gre_key_default_mask,
2103                  sizeof(rte_be32_t), error);
2104         return ret;
2105 }
2106
2107 /**
2108  * Validate GRE item.
2109  *
2110  * @param[in] item
2111  *   Item specification.
2112  * @param[in] item_flags
2113  *   Bit flags to mark detected items.
2114  * @param[in] target_protocol
2115  *   The next protocol in the previous item.
2116  * @param[out] error
2117  *   Pointer to error structure.
2118  *
2119  * @return
2120  *   0 on success, a negative errno value otherwise and rte_errno is set.
2121  */
2122 int
2123 mlx5_flow_validate_item_gre(const struct rte_flow_item *item,
2124                             uint64_t item_flags,
2125                             uint8_t target_protocol,
2126                             struct rte_flow_error *error)
2127 {
2128         const struct rte_flow_item_gre *spec __rte_unused = item->spec;
2129         const struct rte_flow_item_gre *mask = item->mask;
2130         int ret;
2131         const struct rte_flow_item_gre nic_mask = {
2132                 .c_rsvd0_ver = RTE_BE16(0xB000),
2133                 .protocol = RTE_BE16(UINT16_MAX),
2134         };
2135
2136         if (target_protocol != 0xff && target_protocol != IPPROTO_GRE)
2137                 return rte_flow_error_set(error, EINVAL,
2138                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2139                                           "protocol filtering not compatible"
2140                                           " with this GRE layer");
2141         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2142                 return rte_flow_error_set(error, ENOTSUP,
2143                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2144                                           "multiple tunnel layers not"
2145                                           " supported");
2146         if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3))
2147                 return rte_flow_error_set(error, ENOTSUP,
2148                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2149                                           "L3 Layer is missing");
2150         if (!mask)
2151                 mask = &rte_flow_item_gre_mask;
2152         ret = mlx5_flow_item_acceptable
2153                 (item, (const uint8_t *)mask,
2154                  (const uint8_t *)&nic_mask,
2155                  sizeof(struct rte_flow_item_gre), error);
2156         if (ret < 0)
2157                 return ret;
2158 #ifndef HAVE_MLX5DV_DR
2159 #ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT
2160         if (spec && (spec->protocol & mask->protocol))
2161                 return rte_flow_error_set(error, ENOTSUP,
2162                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2163                                           "without MPLS support the"
2164                                           " specification cannot be used for"
2165                                           " filtering");
2166 #endif
2167 #endif
2168         return 0;
2169 }
2170
2171 /**
2172  * Validate Geneve item.
2173  *
2174  * @param[in] item
2175  *   Item specification.
2176  * @param[in] itemFlags
2177  *   Bit-fields that holds the items detected until now.
2178  * @param[in] enPriv
2179  *   Pointer to the private data structure.
2180  * @param[out] error
2181  *   Pointer to error structure.
2182  *
2183  * @return
2184  *   0 on success, a negative errno value otherwise and rte_errno is set.
2185  */
2186
2187 int
2188 mlx5_flow_validate_item_geneve(const struct rte_flow_item *item,
2189                                uint64_t item_flags,
2190                                struct rte_eth_dev *dev,
2191                                struct rte_flow_error *error)
2192 {
2193         struct mlx5_priv *priv = dev->data->dev_private;
2194         const struct rte_flow_item_geneve *spec = item->spec;
2195         const struct rte_flow_item_geneve *mask = item->mask;
2196         int ret;
2197         uint16_t gbhdr;
2198         uint8_t opt_len = priv->config.hca_attr.geneve_max_opt_len ?
2199                           MLX5_GENEVE_OPT_LEN_1 : MLX5_GENEVE_OPT_LEN_0;
2200         const struct rte_flow_item_geneve nic_mask = {
2201                 .ver_opt_len_o_c_rsvd0 = RTE_BE16(0x3f80),
2202                 .vni = "\xff\xff\xff",
2203                 .protocol = RTE_BE16(UINT16_MAX),
2204         };
2205
2206         if (!priv->config.hca_attr.tunnel_stateless_geneve_rx)
2207                 return rte_flow_error_set(error, ENOTSUP,
2208                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2209                                           "L3 Geneve is not enabled by device"
2210                                           " parameter and/or not configured in"
2211                                           " firmware");
2212         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2213                 return rte_flow_error_set(error, ENOTSUP,
2214                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2215                                           "multiple tunnel layers not"
2216                                           " supported");
2217         /*
2218          * Verify only UDPv4 is present as defined in
2219          * https://tools.ietf.org/html/rfc7348
2220          */
2221         if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
2222                 return rte_flow_error_set(error, EINVAL,
2223                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2224                                           "no outer UDP layer found");
2225         if (!mask)
2226                 mask = &rte_flow_item_geneve_mask;
2227         ret = mlx5_flow_item_acceptable
2228                                   (item, (const uint8_t *)mask,
2229                                    (const uint8_t *)&nic_mask,
2230                                    sizeof(struct rte_flow_item_geneve), error);
2231         if (ret)
2232                 return ret;
2233         if (spec) {
2234                 gbhdr = rte_be_to_cpu_16(spec->ver_opt_len_o_c_rsvd0);
2235                 if (MLX5_GENEVE_VER_VAL(gbhdr) ||
2236                      MLX5_GENEVE_CRITO_VAL(gbhdr) ||
2237                      MLX5_GENEVE_RSVD_VAL(gbhdr) || spec->rsvd1)
2238                         return rte_flow_error_set(error, ENOTSUP,
2239                                                   RTE_FLOW_ERROR_TYPE_ITEM,
2240                                                   item,
2241                                                   "Geneve protocol unsupported"
2242                                                   " fields are being used");
2243                 if (MLX5_GENEVE_OPTLEN_VAL(gbhdr) > opt_len)
2244                         return rte_flow_error_set
2245                                         (error, ENOTSUP,
2246                                          RTE_FLOW_ERROR_TYPE_ITEM,
2247                                          item,
2248                                          "Unsupported Geneve options length");
2249         }
2250         if (!(item_flags & MLX5_FLOW_LAYER_OUTER))
2251                 return rte_flow_error_set
2252                                     (error, ENOTSUP,
2253                                      RTE_FLOW_ERROR_TYPE_ITEM, item,
2254                                      "Geneve tunnel must be fully defined");
2255         return 0;
2256 }
2257
2258 /**
2259  * Validate MPLS item.
2260  *
2261  * @param[in] dev
2262  *   Pointer to the rte_eth_dev structure.
2263  * @param[in] item
2264  *   Item specification.
2265  * @param[in] item_flags
2266  *   Bit-fields that holds the items detected until now.
2267  * @param[in] prev_layer
2268  *   The protocol layer indicated in previous item.
2269  * @param[out] error
2270  *   Pointer to error structure.
2271  *
2272  * @return
2273  *   0 on success, a negative errno value otherwise and rte_errno is set.
2274  */
2275 int
2276 mlx5_flow_validate_item_mpls(struct rte_eth_dev *dev __rte_unused,
2277                              const struct rte_flow_item *item __rte_unused,
2278                              uint64_t item_flags __rte_unused,
2279                              uint64_t prev_layer __rte_unused,
2280                              struct rte_flow_error *error)
2281 {
2282 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
2283         const struct rte_flow_item_mpls *mask = item->mask;
2284         struct mlx5_priv *priv = dev->data->dev_private;
2285         int ret;
2286
2287         if (!priv->config.mpls_en)
2288                 return rte_flow_error_set(error, ENOTSUP,
2289                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2290                                           "MPLS not supported or"
2291                                           " disabled in firmware"
2292                                           " configuration.");
2293         /* MPLS over IP, UDP, GRE is allowed */
2294         if (!(prev_layer & (MLX5_FLOW_LAYER_OUTER_L3 |
2295                             MLX5_FLOW_LAYER_OUTER_L4_UDP |
2296                             MLX5_FLOW_LAYER_GRE)))
2297                 return rte_flow_error_set(error, EINVAL,
2298                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2299                                           "protocol filtering not compatible"
2300                                           " with MPLS layer");
2301         /* Multi-tunnel isn't allowed but MPLS over GRE is an exception. */
2302         if ((item_flags & MLX5_FLOW_LAYER_TUNNEL) &&
2303             !(item_flags & MLX5_FLOW_LAYER_GRE))
2304                 return rte_flow_error_set(error, ENOTSUP,
2305                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2306                                           "multiple tunnel layers not"
2307                                           " supported");
2308         if (!mask)
2309                 mask = &rte_flow_item_mpls_mask;
2310         ret = mlx5_flow_item_acceptable
2311                 (item, (const uint8_t *)mask,
2312                  (const uint8_t *)&rte_flow_item_mpls_mask,
2313                  sizeof(struct rte_flow_item_mpls), error);
2314         if (ret < 0)
2315                 return ret;
2316         return 0;
2317 #else
2318         return rte_flow_error_set(error, ENOTSUP,
2319                                   RTE_FLOW_ERROR_TYPE_ITEM, item,
2320                                   "MPLS is not supported by Verbs, please"
2321                                   " update.");
2322 #endif
2323 }
2324
2325 /**
2326  * Validate NVGRE item.
2327  *
2328  * @param[in] item
2329  *   Item specification.
2330  * @param[in] item_flags
2331  *   Bit flags to mark detected items.
2332  * @param[in] target_protocol
2333  *   The next protocol in the previous item.
2334  * @param[out] error
2335  *   Pointer to error structure.
2336  *
2337  * @return
2338  *   0 on success, a negative errno value otherwise and rte_errno is set.
2339  */
2340 int
2341 mlx5_flow_validate_item_nvgre(const struct rte_flow_item *item,
2342                               uint64_t item_flags,
2343                               uint8_t target_protocol,
2344                               struct rte_flow_error *error)
2345 {
2346         const struct rte_flow_item_nvgre *mask = item->mask;
2347         int ret;
2348
2349         if (target_protocol != 0xff && target_protocol != IPPROTO_GRE)
2350                 return rte_flow_error_set(error, EINVAL,
2351                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2352                                           "protocol filtering not compatible"
2353                                           " with this GRE layer");
2354         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2355                 return rte_flow_error_set(error, ENOTSUP,
2356                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2357                                           "multiple tunnel layers not"
2358                                           " supported");
2359         if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3))
2360                 return rte_flow_error_set(error, ENOTSUP,
2361                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2362                                           "L3 Layer is missing");
2363         if (!mask)
2364                 mask = &rte_flow_item_nvgre_mask;
2365         ret = mlx5_flow_item_acceptable
2366                 (item, (const uint8_t *)mask,
2367                  (const uint8_t *)&rte_flow_item_nvgre_mask,
2368                  sizeof(struct rte_flow_item_nvgre), error);
2369         if (ret < 0)
2370                 return ret;
2371         return 0;
2372 }
2373
2374 /**
2375  * Validate eCPRI item.
2376  *
2377  * @param[in] item
2378  *   Item specification.
2379  * @param[in] item_flags
2380  *   Bit-fields that holds the items detected until now.
2381  * @param[in] last_item
2382  *   Previous validated item in the pattern items.
2383  * @param[in] ether_type
2384  *   Type in the ethernet layer header (including dot1q).
2385  * @param[in] acc_mask
2386  *   Acceptable mask, if NULL default internal default mask
2387  *   will be used to check whether item fields are supported.
2388  * @param[out] error
2389  *   Pointer to error structure.
2390  *
2391  * @return
2392  *   0 on success, a negative errno value otherwise and rte_errno is set.
2393  */
2394 int
2395 mlx5_flow_validate_item_ecpri(const struct rte_flow_item *item,
2396                               uint64_t item_flags,
2397                               uint64_t last_item,
2398                               uint16_t ether_type,
2399                               const struct rte_flow_item_ecpri *acc_mask,
2400                               struct rte_flow_error *error)
2401 {
2402         const struct rte_flow_item_ecpri *mask = item->mask;
2403         const struct rte_flow_item_ecpri nic_mask = {
2404                 .hdr = {
2405                         .common = {
2406                                 .u32 =
2407                                 RTE_BE32(((const struct rte_ecpri_common_hdr) {
2408                                         .type = 0xFF,
2409                                         }).u32),
2410                         },
2411                         .dummy[0] = 0xFFFFFFFF,
2412                 },
2413         };
2414         const uint64_t outer_l2_vlan = (MLX5_FLOW_LAYER_OUTER_L2 |
2415                                         MLX5_FLOW_LAYER_OUTER_VLAN);
2416         struct rte_flow_item_ecpri mask_lo;
2417
2418         if ((last_item & outer_l2_vlan) && ether_type &&
2419             ether_type != RTE_ETHER_TYPE_ECPRI)
2420                 return rte_flow_error_set(error, EINVAL,
2421                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2422                                           "eCPRI cannot follow L2/VLAN layer "
2423                                           "which ether type is not 0xAEFE.");
2424         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2425                 return rte_flow_error_set(error, EINVAL,
2426                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2427                                           "eCPRI with tunnel is not supported "
2428                                           "right now.");
2429         if (item_flags & MLX5_FLOW_LAYER_OUTER_L3)
2430                 return rte_flow_error_set(error, ENOTSUP,
2431                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2432                                           "multiple L3 layers not supported");
2433         else if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_TCP)
2434                 return rte_flow_error_set(error, EINVAL,
2435                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2436                                           "eCPRI cannot follow a TCP layer.");
2437         /* In specification, eCPRI could be over UDP layer. */
2438         else if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)
2439                 return rte_flow_error_set(error, EINVAL,
2440                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2441                                           "eCPRI over UDP layer is not yet "
2442                                           "supported right now.");
2443         /* Mask for type field in common header could be zero. */
2444         if (!mask)
2445                 mask = &rte_flow_item_ecpri_mask;
2446         mask_lo.hdr.common.u32 = rte_be_to_cpu_32(mask->hdr.common.u32);
2447         /* Input mask is in big-endian format. */
2448         if (mask_lo.hdr.common.type != 0 && mask_lo.hdr.common.type != 0xff)
2449                 return rte_flow_error_set(error, EINVAL,
2450                                           RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
2451                                           "partial mask is not supported "
2452                                           "for protocol");
2453         else if (mask_lo.hdr.common.type == 0 && mask->hdr.dummy[0] != 0)
2454                 return rte_flow_error_set(error, EINVAL,
2455                                           RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
2456                                           "message header mask must be after "
2457                                           "a type mask");
2458         return mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
2459                                          acc_mask ? (const uint8_t *)acc_mask
2460                                                   : (const uint8_t *)&nic_mask,
2461                                          sizeof(struct rte_flow_item_ecpri),
2462                                          error);
2463 }
2464
2465 /* Allocate unique ID for the split Q/RSS subflows. */
2466 static uint32_t
2467 flow_qrss_get_id(struct rte_eth_dev *dev)
2468 {
2469         struct mlx5_priv *priv = dev->data->dev_private;
2470         uint32_t qrss_id, ret;
2471
2472         ret = mlx5_flow_id_get(priv->qrss_id_pool, &qrss_id);
2473         if (ret)
2474                 return 0;
2475         MLX5_ASSERT(qrss_id);
2476         return qrss_id;
2477 }
2478
2479 /* Free unique ID for the split Q/RSS subflows. */
2480 static void
2481 flow_qrss_free_id(struct rte_eth_dev *dev,  uint32_t qrss_id)
2482 {
2483         struct mlx5_priv *priv = dev->data->dev_private;
2484
2485         if (qrss_id)
2486                 mlx5_flow_id_release(priv->qrss_id_pool, qrss_id);
2487 }
2488
2489 /**
2490  * Release resource related QUEUE/RSS action split.
2491  *
2492  * @param dev
2493  *   Pointer to Ethernet device.
2494  * @param flow
2495  *   Flow to release id's from.
2496  */
2497 static void
2498 flow_mreg_split_qrss_release(struct rte_eth_dev *dev,
2499                              struct rte_flow *flow)
2500 {
2501         struct mlx5_priv *priv = dev->data->dev_private;
2502         uint32_t handle_idx;
2503         struct mlx5_flow_handle *dev_handle;
2504
2505         SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles,
2506                        handle_idx, dev_handle, next)
2507                 if (dev_handle->split_flow_id)
2508                         flow_qrss_free_id(dev, dev_handle->split_flow_id);
2509 }
2510
2511 static int
2512 flow_null_validate(struct rte_eth_dev *dev __rte_unused,
2513                    const struct rte_flow_attr *attr __rte_unused,
2514                    const struct rte_flow_item items[] __rte_unused,
2515                    const struct rte_flow_action actions[] __rte_unused,
2516                    bool external __rte_unused,
2517                    int hairpin __rte_unused,
2518                    struct rte_flow_error *error)
2519 {
2520         return rte_flow_error_set(error, ENOTSUP,
2521                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
2522 }
2523
2524 static struct mlx5_flow *
2525 flow_null_prepare(struct rte_eth_dev *dev __rte_unused,
2526                   const struct rte_flow_attr *attr __rte_unused,
2527                   const struct rte_flow_item items[] __rte_unused,
2528                   const struct rte_flow_action actions[] __rte_unused,
2529                   struct rte_flow_error *error)
2530 {
2531         rte_flow_error_set(error, ENOTSUP,
2532                            RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
2533         return NULL;
2534 }
2535
2536 static int
2537 flow_null_translate(struct rte_eth_dev *dev __rte_unused,
2538                     struct mlx5_flow *dev_flow __rte_unused,
2539                     const struct rte_flow_attr *attr __rte_unused,
2540                     const struct rte_flow_item items[] __rte_unused,
2541                     const struct rte_flow_action actions[] __rte_unused,
2542                     struct rte_flow_error *error)
2543 {
2544         return rte_flow_error_set(error, ENOTSUP,
2545                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
2546 }
2547
2548 static int
2549 flow_null_apply(struct rte_eth_dev *dev __rte_unused,
2550                 struct rte_flow *flow __rte_unused,
2551                 struct rte_flow_error *error)
2552 {
2553         return rte_flow_error_set(error, ENOTSUP,
2554                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
2555 }
2556
2557 static void
2558 flow_null_remove(struct rte_eth_dev *dev __rte_unused,
2559                  struct rte_flow *flow __rte_unused)
2560 {
2561 }
2562
2563 static void
2564 flow_null_destroy(struct rte_eth_dev *dev __rte_unused,
2565                   struct rte_flow *flow __rte_unused)
2566 {
2567 }
2568
2569 static int
2570 flow_null_query(struct rte_eth_dev *dev __rte_unused,
2571                 struct rte_flow *flow __rte_unused,
2572                 const struct rte_flow_action *actions __rte_unused,
2573                 void *data __rte_unused,
2574                 struct rte_flow_error *error)
2575 {
2576         return rte_flow_error_set(error, ENOTSUP,
2577                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
2578 }
2579
2580 /* Void driver to protect from null pointer reference. */
2581 const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops = {
2582         .validate = flow_null_validate,
2583         .prepare = flow_null_prepare,
2584         .translate = flow_null_translate,
2585         .apply = flow_null_apply,
2586         .remove = flow_null_remove,
2587         .destroy = flow_null_destroy,
2588         .query = flow_null_query,
2589 };
2590
2591 /**
2592  * Select flow driver type according to flow attributes and device
2593  * configuration.
2594  *
2595  * @param[in] dev
2596  *   Pointer to the dev structure.
2597  * @param[in] attr
2598  *   Pointer to the flow attributes.
2599  *
2600  * @return
2601  *   flow driver type, MLX5_FLOW_TYPE_MAX otherwise.
2602  */
2603 static enum mlx5_flow_drv_type
2604 flow_get_drv_type(struct rte_eth_dev *dev, const struct rte_flow_attr *attr)
2605 {
2606         struct mlx5_priv *priv = dev->data->dev_private;
2607         /* The OS can determine first a specific flow type (DV, VERBS) */
2608         enum mlx5_flow_drv_type type = mlx5_flow_os_get_type();
2609
2610         if (type != MLX5_FLOW_TYPE_MAX)
2611                 return type;
2612         /* If no OS specific type - continue with DV/VERBS selection */
2613         if (attr->transfer && priv->config.dv_esw_en)
2614                 type = MLX5_FLOW_TYPE_DV;
2615         if (!attr->transfer)
2616                 type = priv->config.dv_flow_en ? MLX5_FLOW_TYPE_DV :
2617                                                  MLX5_FLOW_TYPE_VERBS;
2618         return type;
2619 }
2620
2621 #define flow_get_drv_ops(type) flow_drv_ops[type]
2622
2623 /**
2624  * Flow driver validation API. This abstracts calling driver specific functions.
2625  * The type of flow driver is determined according to flow attributes.
2626  *
2627  * @param[in] dev
2628  *   Pointer to the dev structure.
2629  * @param[in] attr
2630  *   Pointer to the flow attributes.
2631  * @param[in] items
2632  *   Pointer to the list of items.
2633  * @param[in] actions
2634  *   Pointer to the list of actions.
2635  * @param[in] external
2636  *   This flow rule is created by request external to PMD.
2637  * @param[in] hairpin
2638  *   Number of hairpin TX actions, 0 means classic flow.
2639  * @param[out] error
2640  *   Pointer to the error structure.
2641  *
2642  * @return
2643  *   0 on success, a negative errno value otherwise and rte_errno is set.
2644  */
2645 static inline int
2646 flow_drv_validate(struct rte_eth_dev *dev,
2647                   const struct rte_flow_attr *attr,
2648                   const struct rte_flow_item items[],
2649                   const struct rte_flow_action actions[],
2650                   bool external, int hairpin, struct rte_flow_error *error)
2651 {
2652         const struct mlx5_flow_driver_ops *fops;
2653         enum mlx5_flow_drv_type type = flow_get_drv_type(dev, attr);
2654
2655         fops = flow_get_drv_ops(type);
2656         return fops->validate(dev, attr, items, actions, external,
2657                               hairpin, error);
2658 }
2659
2660 /**
2661  * Flow driver preparation API. This abstracts calling driver specific
2662  * functions. Parent flow (rte_flow) should have driver type (drv_type). It
2663  * calculates the size of memory required for device flow, allocates the memory,
2664  * initializes the device flow and returns the pointer.
2665  *
2666  * @note
2667  *   This function initializes device flow structure such as dv or verbs in
2668  *   struct mlx5_flow. However, it is caller's responsibility to initialize the
2669  *   rest. For example, adding returning device flow to flow->dev_flow list and
2670  *   setting backward reference to the flow should be done out of this function.
2671  *   layers field is not filled either.
2672  *
2673  * @param[in] dev
2674  *   Pointer to the dev structure.
2675  * @param[in] attr
2676  *   Pointer to the flow attributes.
2677  * @param[in] items
2678  *   Pointer to the list of items.
2679  * @param[in] actions
2680  *   Pointer to the list of actions.
2681  * @param[in] flow_idx
2682  *   This memory pool index to the flow.
2683  * @param[out] error
2684  *   Pointer to the error structure.
2685  *
2686  * @return
2687  *   Pointer to device flow on success, otherwise NULL and rte_errno is set.
2688  */
2689 static inline struct mlx5_flow *
2690 flow_drv_prepare(struct rte_eth_dev *dev,
2691                  const struct rte_flow *flow,
2692                  const struct rte_flow_attr *attr,
2693                  const struct rte_flow_item items[],
2694                  const struct rte_flow_action actions[],
2695                  uint32_t flow_idx,
2696                  struct rte_flow_error *error)
2697 {
2698         const struct mlx5_flow_driver_ops *fops;
2699         enum mlx5_flow_drv_type type = flow->drv_type;
2700         struct mlx5_flow *mlx5_flow = NULL;
2701
2702         MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
2703         fops = flow_get_drv_ops(type);
2704         mlx5_flow = fops->prepare(dev, attr, items, actions, error);
2705         if (mlx5_flow)
2706                 mlx5_flow->flow_idx = flow_idx;
2707         return mlx5_flow;
2708 }
2709
2710 /**
2711  * Flow driver translation API. This abstracts calling driver specific
2712  * functions. Parent flow (rte_flow) should have driver type (drv_type). It
2713  * translates a generic flow into a driver flow. flow_drv_prepare() must
2714  * precede.
2715  *
2716  * @note
2717  *   dev_flow->layers could be filled as a result of parsing during translation
2718  *   if needed by flow_drv_apply(). dev_flow->flow->actions can also be filled
2719  *   if necessary. As a flow can have multiple dev_flows by RSS flow expansion,
2720  *   flow->actions could be overwritten even though all the expanded dev_flows
2721  *   have the same actions.
2722  *
2723  * @param[in] dev
2724  *   Pointer to the rte dev structure.
2725  * @param[in, out] dev_flow
2726  *   Pointer to the mlx5 flow.
2727  * @param[in] attr
2728  *   Pointer to the flow attributes.
2729  * @param[in] items
2730  *   Pointer to the list of items.
2731  * @param[in] actions
2732  *   Pointer to the list of actions.
2733  * @param[out] error
2734  *   Pointer to the error structure.
2735  *
2736  * @return
2737  *   0 on success, a negative errno value otherwise and rte_errno is set.
2738  */
2739 static inline int
2740 flow_drv_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
2741                    const struct rte_flow_attr *attr,
2742                    const struct rte_flow_item items[],
2743                    const struct rte_flow_action actions[],
2744                    struct rte_flow_error *error)
2745 {
2746         const struct mlx5_flow_driver_ops *fops;
2747         enum mlx5_flow_drv_type type = dev_flow->flow->drv_type;
2748
2749         MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
2750         fops = flow_get_drv_ops(type);
2751         return fops->translate(dev, dev_flow, attr, items, actions, error);
2752 }
2753
2754 /**
2755  * Flow driver apply API. This abstracts calling driver specific functions.
2756  * Parent flow (rte_flow) should have driver type (drv_type). It applies
2757  * translated driver flows on to device. flow_drv_translate() must precede.
2758  *
2759  * @param[in] dev
2760  *   Pointer to Ethernet device structure.
2761  * @param[in, out] flow
2762  *   Pointer to flow structure.
2763  * @param[out] error
2764  *   Pointer to error structure.
2765  *
2766  * @return
2767  *   0 on success, a negative errno value otherwise and rte_errno is set.
2768  */
2769 static inline int
2770 flow_drv_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
2771                struct rte_flow_error *error)
2772 {
2773         const struct mlx5_flow_driver_ops *fops;
2774         enum mlx5_flow_drv_type type = flow->drv_type;
2775
2776         MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
2777         fops = flow_get_drv_ops(type);
2778         return fops->apply(dev, flow, error);
2779 }
2780
2781 /**
2782  * Flow driver remove API. This abstracts calling driver specific functions.
2783  * Parent flow (rte_flow) should have driver type (drv_type). It removes a flow
2784  * on device. All the resources of the flow should be freed by calling
2785  * flow_drv_destroy().
2786  *
2787  * @param[in] dev
2788  *   Pointer to Ethernet device.
2789  * @param[in, out] flow
2790  *   Pointer to flow structure.
2791  */
2792 static inline void
2793 flow_drv_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
2794 {
2795         const struct mlx5_flow_driver_ops *fops;
2796         enum mlx5_flow_drv_type type = flow->drv_type;
2797
2798         MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
2799         fops = flow_get_drv_ops(type);
2800         fops->remove(dev, flow);
2801 }
2802
2803 /**
2804  * Flow driver destroy API. This abstracts calling driver specific functions.
2805  * Parent flow (rte_flow) should have driver type (drv_type). It removes a flow
2806  * on device and releases resources of the flow.
2807  *
2808  * @param[in] dev
2809  *   Pointer to Ethernet device.
2810  * @param[in, out] flow
2811  *   Pointer to flow structure.
2812  */
2813 static inline void
2814 flow_drv_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
2815 {
2816         const struct mlx5_flow_driver_ops *fops;
2817         enum mlx5_flow_drv_type type = flow->drv_type;
2818
2819         flow_mreg_split_qrss_release(dev, flow);
2820         MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
2821         fops = flow_get_drv_ops(type);
2822         fops->destroy(dev, flow);
2823 }
2824
2825 /**
2826  * Get RSS action from the action list.
2827  *
2828  * @param[in] actions
2829  *   Pointer to the list of actions.
2830  *
2831  * @return
2832  *   Pointer to the RSS action if exist, else return NULL.
2833  */
2834 static const struct rte_flow_action_rss*
2835 flow_get_rss_action(const struct rte_flow_action actions[])
2836 {
2837         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
2838                 switch (actions->type) {
2839                 case RTE_FLOW_ACTION_TYPE_RSS:
2840                         return (const struct rte_flow_action_rss *)
2841                                actions->conf;
2842                 default:
2843                         break;
2844                 }
2845         }
2846         return NULL;
2847 }
2848
2849 static unsigned int
2850 find_graph_root(const struct rte_flow_item pattern[], uint32_t rss_level)
2851 {
2852         const struct rte_flow_item *item;
2853         unsigned int has_vlan = 0;
2854
2855         for (item = pattern; item->type != RTE_FLOW_ITEM_TYPE_END; item++) {
2856                 if (item->type == RTE_FLOW_ITEM_TYPE_VLAN) {
2857                         has_vlan = 1;
2858                         break;
2859                 }
2860         }
2861         if (has_vlan)
2862                 return rss_level < 2 ? MLX5_EXPANSION_ROOT_ETH_VLAN :
2863                                        MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN;
2864         return rss_level < 2 ? MLX5_EXPANSION_ROOT :
2865                                MLX5_EXPANSION_ROOT_OUTER;
2866 }
2867
2868 /**
2869  *  Get layer flags from the prefix flow.
2870  *
2871  *  Some flows may be split to several subflows, the prefix subflow gets the
2872  *  match items and the suffix sub flow gets the actions.
2873  *  Some actions need the user defined match item flags to get the detail for
2874  *  the action.
2875  *  This function helps the suffix flow to get the item layer flags from prefix
2876  *  subflow.
2877  *
2878  * @param[in] dev_flow
2879  *   Pointer the created preifx subflow.
2880  *
2881  * @return
2882  *   The layers get from prefix subflow.
2883  */
2884 static inline uint64_t
2885 flow_get_prefix_layer_flags(struct mlx5_flow *dev_flow)
2886 {
2887         uint64_t layers = 0;
2888
2889         /*
2890          * Layers bits could be localization, but usually the compiler will
2891          * help to do the optimization work for source code.
2892          * If no decap actions, use the layers directly.
2893          */
2894         if (!(dev_flow->act_flags & MLX5_FLOW_ACTION_DECAP))
2895                 return dev_flow->handle->layers;
2896         /* Convert L3 layers with decap action. */
2897         if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L3_IPV4)
2898                 layers |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
2899         else if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L3_IPV6)
2900                 layers |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
2901         /* Convert L4 layers with decap action.  */
2902         if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L4_TCP)
2903                 layers |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
2904         else if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L4_UDP)
2905                 layers |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
2906         return layers;
2907 }
2908
2909 /**
2910  * Get metadata split action information.
2911  *
2912  * @param[in] actions
2913  *   Pointer to the list of actions.
2914  * @param[out] qrss
2915  *   Pointer to the return pointer.
2916  * @param[out] qrss_type
2917  *   Pointer to the action type to return. RTE_FLOW_ACTION_TYPE_END is returned
2918  *   if no QUEUE/RSS is found.
2919  * @param[out] encap_idx
2920  *   Pointer to the index of the encap action if exists, otherwise the last
2921  *   action index.
2922  *
2923  * @return
2924  *   Total number of actions.
2925  */
2926 static int
2927 flow_parse_metadata_split_actions_info(const struct rte_flow_action actions[],
2928                                        const struct rte_flow_action **qrss,
2929                                        int *encap_idx)
2930 {
2931         const struct rte_flow_action_raw_encap *raw_encap;
2932         int actions_n = 0;
2933         int raw_decap_idx = -1;
2934
2935         *encap_idx = -1;
2936         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
2937                 switch (actions->type) {
2938                 case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
2939                 case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
2940                         *encap_idx = actions_n;
2941                         break;
2942                 case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
2943                         raw_decap_idx = actions_n;
2944                         break;
2945                 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
2946                         raw_encap = actions->conf;
2947                         if (raw_encap->size > MLX5_ENCAPSULATION_DECISION_SIZE)
2948                                 *encap_idx = raw_decap_idx != -1 ?
2949                                                       raw_decap_idx : actions_n;
2950                         break;
2951                 case RTE_FLOW_ACTION_TYPE_QUEUE:
2952                 case RTE_FLOW_ACTION_TYPE_RSS:
2953                         *qrss = actions;
2954                         break;
2955                 default:
2956                         break;
2957                 }
2958                 actions_n++;
2959         }
2960         if (*encap_idx == -1)
2961                 *encap_idx = actions_n;
2962         /* Count RTE_FLOW_ACTION_TYPE_END. */
2963         return actions_n + 1;
2964 }
2965
2966 /**
2967  * Check meter action from the action list.
2968  *
2969  * @param[in] actions
2970  *   Pointer to the list of actions.
2971  * @param[out] mtr
2972  *   Pointer to the meter exist flag.
2973  *
2974  * @return
2975  *   Total number of actions.
2976  */
2977 static int
2978 flow_check_meter_action(const struct rte_flow_action actions[], uint32_t *mtr)
2979 {
2980         int actions_n = 0;
2981
2982         MLX5_ASSERT(mtr);
2983         *mtr = 0;
2984         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
2985                 switch (actions->type) {
2986                 case RTE_FLOW_ACTION_TYPE_METER:
2987                         *mtr = 1;
2988                         break;
2989                 default:
2990                         break;
2991                 }
2992                 actions_n++;
2993         }
2994         /* Count RTE_FLOW_ACTION_TYPE_END. */
2995         return actions_n + 1;
2996 }
2997
2998 /**
2999  * Check if the flow should be splited due to hairpin.
3000  * The reason for the split is that in current HW we can't
3001  * support encap on Rx, so if a flow have encap we move it
3002  * to Tx.
3003  *
3004  * @param dev
3005  *   Pointer to Ethernet device.
3006  * @param[in] attr
3007  *   Flow rule attributes.
3008  * @param[in] actions
3009  *   Associated actions (list terminated by the END action).
3010  *
3011  * @return
3012  *   > 0 the number of actions and the flow should be split,
3013  *   0 when no split required.
3014  */
3015 static int
3016 flow_check_hairpin_split(struct rte_eth_dev *dev,
3017                          const struct rte_flow_attr *attr,
3018                          const struct rte_flow_action actions[])
3019 {
3020         int queue_action = 0;
3021         int action_n = 0;
3022         int encap = 0;
3023         const struct rte_flow_action_queue *queue;
3024         const struct rte_flow_action_rss *rss;
3025         const struct rte_flow_action_raw_encap *raw_encap;
3026
3027         if (!attr->ingress)
3028                 return 0;
3029         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
3030                 switch (actions->type) {
3031                 case RTE_FLOW_ACTION_TYPE_QUEUE:
3032                         queue = actions->conf;
3033                         if (queue == NULL)
3034                                 return 0;
3035                         if (mlx5_rxq_get_type(dev, queue->index) !=
3036                             MLX5_RXQ_TYPE_HAIRPIN)
3037                                 return 0;
3038                         queue_action = 1;
3039                         action_n++;
3040                         break;
3041                 case RTE_FLOW_ACTION_TYPE_RSS:
3042                         rss = actions->conf;
3043                         if (rss == NULL || rss->queue_num == 0)
3044                                 return 0;
3045                         if (mlx5_rxq_get_type(dev, rss->queue[0]) !=
3046                             MLX5_RXQ_TYPE_HAIRPIN)
3047                                 return 0;
3048                         queue_action = 1;
3049                         action_n++;
3050                         break;
3051                 case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
3052                 case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
3053                         encap = 1;
3054                         action_n++;
3055                         break;
3056                 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
3057                         raw_encap = actions->conf;
3058                         if (raw_encap->size >
3059                             (sizeof(struct rte_flow_item_eth) +
3060                              sizeof(struct rte_flow_item_ipv4)))
3061                                 encap = 1;
3062                         action_n++;
3063                         break;
3064                 default:
3065                         action_n++;
3066                         break;
3067                 }
3068         }
3069         if (encap == 1 && queue_action)
3070                 return action_n;
3071         return 0;
3072 }
3073
3074 /* Declare flow create/destroy prototype in advance. */
3075 static uint32_t
3076 flow_list_create(struct rte_eth_dev *dev, uint32_t *list,
3077                  const struct rte_flow_attr *attr,
3078                  const struct rte_flow_item items[],
3079                  const struct rte_flow_action actions[],
3080                  bool external, struct rte_flow_error *error);
3081
3082 static void
3083 flow_list_destroy(struct rte_eth_dev *dev, uint32_t *list,
3084                   uint32_t flow_idx);
3085
3086 /**
3087  * Add a flow of copying flow metadata registers in RX_CP_TBL.
3088  *
3089  * As mark_id is unique, if there's already a registered flow for the mark_id,
3090  * return by increasing the reference counter of the resource. Otherwise, create
3091  * the resource (mcp_res) and flow.
3092  *
3093  * Flow looks like,
3094  *   - If ingress port is ANY and reg_c[1] is mark_id,
3095  *     flow_tag := mark_id, reg_b := reg_c[0] and jump to RX_ACT_TBL.
3096  *
3097  * For default flow (zero mark_id), flow is like,
3098  *   - If ingress port is ANY,
3099  *     reg_b := reg_c[0] and jump to RX_ACT_TBL.
3100  *
3101  * @param dev
3102  *   Pointer to Ethernet device.
3103  * @param mark_id
3104  *   ID of MARK action, zero means default flow for META.
3105  * @param[out] error
3106  *   Perform verbose error reporting if not NULL.
3107  *
3108  * @return
3109  *   Associated resource on success, NULL otherwise and rte_errno is set.
3110  */
3111 static struct mlx5_flow_mreg_copy_resource *
3112 flow_mreg_add_copy_action(struct rte_eth_dev *dev, uint32_t mark_id,
3113                           struct rte_flow_error *error)
3114 {
3115         struct mlx5_priv *priv = dev->data->dev_private;
3116         struct rte_flow_attr attr = {
3117                 .group = MLX5_FLOW_MREG_CP_TABLE_GROUP,
3118                 .ingress = 1,
3119         };
3120         struct mlx5_rte_flow_item_tag tag_spec = {
3121                 .data = mark_id,
3122         };
3123         struct rte_flow_item items[] = {
3124                 [1] = { .type = RTE_FLOW_ITEM_TYPE_END, },
3125         };
3126         struct rte_flow_action_mark ftag = {
3127                 .id = mark_id,
3128         };
3129         struct mlx5_flow_action_copy_mreg cp_mreg = {
3130                 .dst = REG_B,
3131                 .src = 0,
3132         };
3133         struct rte_flow_action_jump jump = {
3134                 .group = MLX5_FLOW_MREG_ACT_TABLE_GROUP,
3135         };
3136         struct rte_flow_action actions[] = {
3137                 [3] = { .type = RTE_FLOW_ACTION_TYPE_END, },
3138         };
3139         struct mlx5_flow_mreg_copy_resource *mcp_res;
3140         uint32_t idx = 0;
3141         int ret;
3142
3143         /* Fill the register fileds in the flow. */
3144         ret = mlx5_flow_get_reg_id(dev, MLX5_FLOW_MARK, 0, error);
3145         if (ret < 0)
3146                 return NULL;
3147         tag_spec.id = ret;
3148         ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_RX, 0, error);
3149         if (ret < 0)
3150                 return NULL;
3151         cp_mreg.src = ret;
3152         /* Check if already registered. */
3153         MLX5_ASSERT(priv->mreg_cp_tbl);
3154         mcp_res = (void *)mlx5_hlist_lookup(priv->mreg_cp_tbl, mark_id);
3155         if (mcp_res) {
3156                 /* For non-default rule. */
3157                 if (mark_id != MLX5_DEFAULT_COPY_ID)
3158                         mcp_res->refcnt++;
3159                 MLX5_ASSERT(mark_id != MLX5_DEFAULT_COPY_ID ||
3160                             mcp_res->refcnt == 1);
3161                 return mcp_res;
3162         }
3163         /* Provide the full width of FLAG specific value. */
3164         if (mark_id == (priv->sh->dv_regc0_mask & MLX5_FLOW_MARK_DEFAULT))
3165                 tag_spec.data = MLX5_FLOW_MARK_DEFAULT;
3166         /* Build a new flow. */
3167         if (mark_id != MLX5_DEFAULT_COPY_ID) {
3168                 items[0] = (struct rte_flow_item){
3169                         .type = (enum rte_flow_item_type)
3170                                 MLX5_RTE_FLOW_ITEM_TYPE_TAG,
3171                         .spec = &tag_spec,
3172                 };
3173                 items[1] = (struct rte_flow_item){
3174                         .type = RTE_FLOW_ITEM_TYPE_END,
3175                 };
3176                 actions[0] = (struct rte_flow_action){
3177                         .type = (enum rte_flow_action_type)
3178                                 MLX5_RTE_FLOW_ACTION_TYPE_MARK,
3179                         .conf = &ftag,
3180                 };
3181                 actions[1] = (struct rte_flow_action){
3182                         .type = (enum rte_flow_action_type)
3183                                 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
3184                         .conf = &cp_mreg,
3185                 };
3186                 actions[2] = (struct rte_flow_action){
3187                         .type = RTE_FLOW_ACTION_TYPE_JUMP,
3188                         .conf = &jump,
3189                 };
3190                 actions[3] = (struct rte_flow_action){
3191                         .type = RTE_FLOW_ACTION_TYPE_END,
3192                 };
3193         } else {
3194                 /* Default rule, wildcard match. */
3195                 attr.priority = MLX5_FLOW_PRIO_RSVD;
3196                 items[0] = (struct rte_flow_item){
3197                         .type = RTE_FLOW_ITEM_TYPE_END,
3198                 };
3199                 actions[0] = (struct rte_flow_action){
3200                         .type = (enum rte_flow_action_type)
3201                                 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
3202                         .conf = &cp_mreg,
3203                 };
3204                 actions[1] = (struct rte_flow_action){
3205                         .type = RTE_FLOW_ACTION_TYPE_JUMP,
3206                         .conf = &jump,
3207                 };
3208                 actions[2] = (struct rte_flow_action){
3209                         .type = RTE_FLOW_ACTION_TYPE_END,
3210                 };
3211         }
3212         /* Build a new entry. */
3213         mcp_res = mlx5_ipool_zmalloc(priv->sh->ipool[MLX5_IPOOL_MCP], &idx);
3214         if (!mcp_res) {
3215                 rte_errno = ENOMEM;
3216                 return NULL;
3217         }
3218         mcp_res->idx = idx;
3219         /*
3220          * The copy Flows are not included in any list. There
3221          * ones are referenced from other Flows and can not
3222          * be applied, removed, deleted in ardbitrary order
3223          * by list traversing.
3224          */
3225         mcp_res->rix_flow = flow_list_create(dev, NULL, &attr, items,
3226                                          actions, false, error);
3227         if (!mcp_res->rix_flow)
3228                 goto error;
3229         mcp_res->refcnt++;
3230         mcp_res->hlist_ent.key = mark_id;
3231         ret = mlx5_hlist_insert(priv->mreg_cp_tbl,
3232                                 &mcp_res->hlist_ent);
3233         MLX5_ASSERT(!ret);
3234         if (ret)
3235                 goto error;
3236         return mcp_res;
3237 error:
3238         if (mcp_res->rix_flow)
3239                 flow_list_destroy(dev, NULL, mcp_res->rix_flow);
3240         mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MCP], mcp_res->idx);
3241         return NULL;
3242 }
3243
3244 /**
3245  * Release flow in RX_CP_TBL.
3246  *
3247  * @param dev
3248  *   Pointer to Ethernet device.
3249  * @flow
3250  *   Parent flow for wich copying is provided.
3251  */
3252 static void
3253 flow_mreg_del_copy_action(struct rte_eth_dev *dev,
3254                           struct rte_flow *flow)
3255 {
3256         struct mlx5_flow_mreg_copy_resource *mcp_res;
3257         struct mlx5_priv *priv = dev->data->dev_private;
3258
3259         if (!flow->rix_mreg_copy)
3260                 return;
3261         mcp_res = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_MCP],
3262                                  flow->rix_mreg_copy);
3263         if (!mcp_res || !priv->mreg_cp_tbl)
3264                 return;
3265         if (flow->copy_applied) {
3266                 MLX5_ASSERT(mcp_res->appcnt);
3267                 flow->copy_applied = 0;
3268                 --mcp_res->appcnt;
3269                 if (!mcp_res->appcnt) {
3270                         struct rte_flow *mcp_flow = mlx5_ipool_get
3271                                         (priv->sh->ipool[MLX5_IPOOL_RTE_FLOW],
3272                                         mcp_res->rix_flow);
3273
3274                         if (mcp_flow)
3275                                 flow_drv_remove(dev, mcp_flow);
3276                 }
3277         }
3278         /*
3279          * We do not check availability of metadata registers here,
3280          * because copy resources are not allocated in this case.
3281          */
3282         if (--mcp_res->refcnt)
3283                 return;
3284         MLX5_ASSERT(mcp_res->rix_flow);
3285         flow_list_destroy(dev, NULL, mcp_res->rix_flow);
3286         mlx5_hlist_remove(priv->mreg_cp_tbl, &mcp_res->hlist_ent);
3287         mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MCP], mcp_res->idx);
3288         flow->rix_mreg_copy = 0;
3289 }
3290
3291 /**
3292  * Start flow in RX_CP_TBL.
3293  *
3294  * @param dev
3295  *   Pointer to Ethernet device.
3296  * @flow
3297  *   Parent flow for wich copying is provided.
3298  *
3299  * @return
3300  *   0 on success, a negative errno value otherwise and rte_errno is set.
3301  */
3302 static int
3303 flow_mreg_start_copy_action(struct rte_eth_dev *dev,
3304                             struct rte_flow *flow)
3305 {
3306         struct mlx5_flow_mreg_copy_resource *mcp_res;
3307         struct mlx5_priv *priv = dev->data->dev_private;
3308         int ret;
3309
3310         if (!flow->rix_mreg_copy || flow->copy_applied)
3311                 return 0;
3312         mcp_res = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_MCP],
3313                                  flow->rix_mreg_copy);
3314         if (!mcp_res)
3315                 return 0;
3316         if (!mcp_res->appcnt) {
3317                 struct rte_flow *mcp_flow = mlx5_ipool_get
3318                                 (priv->sh->ipool[MLX5_IPOOL_RTE_FLOW],
3319                                 mcp_res->rix_flow);
3320
3321                 if (mcp_flow) {
3322                         ret = flow_drv_apply(dev, mcp_flow, NULL);
3323                         if (ret)
3324                                 return ret;
3325                 }
3326         }
3327         ++mcp_res->appcnt;
3328         flow->copy_applied = 1;
3329         return 0;
3330 }
3331
3332 /**
3333  * Stop flow in RX_CP_TBL.
3334  *
3335  * @param dev
3336  *   Pointer to Ethernet device.
3337  * @flow
3338  *   Parent flow for wich copying is provided.
3339  */
3340 static void
3341 flow_mreg_stop_copy_action(struct rte_eth_dev *dev,
3342                            struct rte_flow *flow)
3343 {
3344         struct mlx5_flow_mreg_copy_resource *mcp_res;
3345         struct mlx5_priv *priv = dev->data->dev_private;
3346
3347         if (!flow->rix_mreg_copy || !flow->copy_applied)
3348                 return;
3349         mcp_res = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_MCP],
3350                                  flow->rix_mreg_copy);
3351         if (!mcp_res)
3352                 return;
3353         MLX5_ASSERT(mcp_res->appcnt);
3354         --mcp_res->appcnt;
3355         flow->copy_applied = 0;
3356         if (!mcp_res->appcnt) {
3357                 struct rte_flow *mcp_flow = mlx5_ipool_get
3358                                 (priv->sh->ipool[MLX5_IPOOL_RTE_FLOW],
3359                                 mcp_res->rix_flow);
3360
3361                 if (mcp_flow)
3362                         flow_drv_remove(dev, mcp_flow);
3363         }
3364 }
3365
3366 /**
3367  * Remove the default copy action from RX_CP_TBL.
3368  *
3369  * @param dev
3370  *   Pointer to Ethernet device.
3371  */
3372 static void
3373 flow_mreg_del_default_copy_action(struct rte_eth_dev *dev)
3374 {
3375         struct mlx5_flow_mreg_copy_resource *mcp_res;
3376         struct mlx5_priv *priv = dev->data->dev_private;
3377
3378         /* Check if default flow is registered. */
3379         if (!priv->mreg_cp_tbl)
3380                 return;
3381         mcp_res = (void *)mlx5_hlist_lookup(priv->mreg_cp_tbl,
3382                                             MLX5_DEFAULT_COPY_ID);
3383         if (!mcp_res)
3384                 return;
3385         MLX5_ASSERT(mcp_res->rix_flow);
3386         flow_list_destroy(dev, NULL, mcp_res->rix_flow);
3387         mlx5_hlist_remove(priv->mreg_cp_tbl, &mcp_res->hlist_ent);
3388         mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MCP], mcp_res->idx);
3389 }
3390
3391 /**
3392  * Add the default copy action in in RX_CP_TBL.
3393  *
3394  * @param dev
3395  *   Pointer to Ethernet device.
3396  * @param[out] error
3397  *   Perform verbose error reporting if not NULL.
3398  *
3399  * @return
3400  *   0 for success, negative value otherwise and rte_errno is set.
3401  */
3402 static int
3403 flow_mreg_add_default_copy_action(struct rte_eth_dev *dev,
3404                                   struct rte_flow_error *error)
3405 {
3406         struct mlx5_priv *priv = dev->data->dev_private;
3407         struct mlx5_flow_mreg_copy_resource *mcp_res;
3408
3409         /* Check whether extensive metadata feature is engaged. */
3410         if (!priv->config.dv_flow_en ||
3411             priv->config.dv_xmeta_en == MLX5_XMETA_MODE_LEGACY ||
3412             !mlx5_flow_ext_mreg_supported(dev) ||
3413             !priv->sh->dv_regc0_mask)
3414                 return 0;
3415         mcp_res = flow_mreg_add_copy_action(dev, MLX5_DEFAULT_COPY_ID, error);
3416         if (!mcp_res)
3417                 return -rte_errno;
3418         return 0;
3419 }
3420
3421 /**
3422  * Add a flow of copying flow metadata registers in RX_CP_TBL.
3423  *
3424  * All the flow having Q/RSS action should be split by
3425  * flow_mreg_split_qrss_prep() to pass by RX_CP_TBL. A flow in the RX_CP_TBL
3426  * performs the following,
3427  *   - CQE->flow_tag := reg_c[1] (MARK)
3428  *   - CQE->flow_table_metadata (reg_b) := reg_c[0] (META)
3429  * As CQE's flow_tag is not a register, it can't be simply copied from reg_c[1]
3430  * but there should be a flow per each MARK ID set by MARK action.
3431  *
3432  * For the aforementioned reason, if there's a MARK action in flow's action
3433  * list, a corresponding flow should be added to the RX_CP_TBL in order to copy
3434  * the MARK ID to CQE's flow_tag like,
3435  *   - If reg_c[1] is mark_id,
3436  *     flow_tag := mark_id, reg_b := reg_c[0] and jump to RX_ACT_TBL.
3437  *
3438  * For SET_META action which stores value in reg_c[0], as the destination is
3439  * also a flow metadata register (reg_b), adding a default flow is enough. Zero
3440  * MARK ID means the default flow. The default flow looks like,
3441  *   - For all flow, reg_b := reg_c[0] and jump to RX_ACT_TBL.
3442  *
3443  * @param dev
3444  *   Pointer to Ethernet device.
3445  * @param flow
3446  *   Pointer to flow structure.
3447  * @param[in] actions
3448  *   Pointer to the list of actions.
3449  * @param[out] error
3450  *   Perform verbose error reporting if not NULL.
3451  *
3452  * @return
3453  *   0 on success, negative value otherwise and rte_errno is set.
3454  */
3455 static int
3456 flow_mreg_update_copy_table(struct rte_eth_dev *dev,
3457                             struct rte_flow *flow,
3458                             const struct rte_flow_action *actions,
3459                             struct rte_flow_error *error)
3460 {
3461         struct mlx5_priv *priv = dev->data->dev_private;
3462         struct mlx5_dev_config *config = &priv->config;
3463         struct mlx5_flow_mreg_copy_resource *mcp_res;
3464         const struct rte_flow_action_mark *mark;
3465
3466         /* Check whether extensive metadata feature is engaged. */
3467         if (!config->dv_flow_en ||
3468             config->dv_xmeta_en == MLX5_XMETA_MODE_LEGACY ||
3469             !mlx5_flow_ext_mreg_supported(dev) ||
3470             !priv->sh->dv_regc0_mask)
3471                 return 0;
3472         /* Find MARK action. */
3473         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
3474                 switch (actions->type) {
3475                 case RTE_FLOW_ACTION_TYPE_FLAG:
3476                         mcp_res = flow_mreg_add_copy_action
3477                                 (dev, MLX5_FLOW_MARK_DEFAULT, error);
3478                         if (!mcp_res)
3479                                 return -rte_errno;
3480                         flow->rix_mreg_copy = mcp_res->idx;
3481                         if (dev->data->dev_started) {
3482                                 mcp_res->appcnt++;
3483                                 flow->copy_applied = 1;
3484                         }
3485                         return 0;
3486                 case RTE_FLOW_ACTION_TYPE_MARK:
3487                         mark = (const struct rte_flow_action_mark *)
3488                                 actions->conf;
3489                         mcp_res =
3490                                 flow_mreg_add_copy_action(dev, mark->id, error);
3491                         if (!mcp_res)
3492                                 return -rte_errno;
3493                         flow->rix_mreg_copy = mcp_res->idx;
3494                         if (dev->data->dev_started) {
3495                                 mcp_res->appcnt++;
3496                                 flow->copy_applied = 1;
3497                         }
3498                         return 0;
3499                 default:
3500                         break;
3501                 }
3502         }
3503         return 0;
3504 }
3505
3506 #define MLX5_MAX_SPLIT_ACTIONS 24
3507 #define MLX5_MAX_SPLIT_ITEMS 24
3508
3509 /**
3510  * Split the hairpin flow.
3511  * Since HW can't support encap on Rx we move the encap to Tx.
3512  * If the count action is after the encap then we also
3513  * move the count action. in this case the count will also measure
3514  * the outer bytes.
3515  *
3516  * @param dev
3517  *   Pointer to Ethernet device.
3518  * @param[in] actions
3519  *   Associated actions (list terminated by the END action).
3520  * @param[out] actions_rx
3521  *   Rx flow actions.
3522  * @param[out] actions_tx
3523  *   Tx flow actions..
3524  * @param[out] pattern_tx
3525  *   The pattern items for the Tx flow.
3526  * @param[out] flow_id
3527  *   The flow ID connected to this flow.
3528  *
3529  * @return
3530  *   0 on success.
3531  */
3532 static int
3533 flow_hairpin_split(struct rte_eth_dev *dev,
3534                    const struct rte_flow_action actions[],
3535                    struct rte_flow_action actions_rx[],
3536                    struct rte_flow_action actions_tx[],
3537                    struct rte_flow_item pattern_tx[],
3538                    uint32_t *flow_id)
3539 {
3540         struct mlx5_priv *priv = dev->data->dev_private;
3541         const struct rte_flow_action_raw_encap *raw_encap;
3542         const struct rte_flow_action_raw_decap *raw_decap;
3543         struct mlx5_rte_flow_action_set_tag *set_tag;
3544         struct rte_flow_action *tag_action;
3545         struct mlx5_rte_flow_item_tag *tag_item;
3546         struct rte_flow_item *item;
3547         char *addr;
3548         int encap = 0;
3549
3550         mlx5_flow_id_get(priv->sh->flow_id_pool, flow_id);
3551         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
3552                 switch (actions->type) {
3553                 case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
3554                 case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
3555                         rte_memcpy(actions_tx, actions,
3556                                sizeof(struct rte_flow_action));
3557                         actions_tx++;
3558                         break;
3559                 case RTE_FLOW_ACTION_TYPE_COUNT:
3560                         if (encap) {
3561                                 rte_memcpy(actions_tx, actions,
3562                                            sizeof(struct rte_flow_action));
3563                                 actions_tx++;
3564                         } else {
3565                                 rte_memcpy(actions_rx, actions,
3566                                            sizeof(struct rte_flow_action));
3567                                 actions_rx++;
3568                         }
3569                         break;
3570                 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
3571                         raw_encap = actions->conf;
3572                         if (raw_encap->size >
3573                             (sizeof(struct rte_flow_item_eth) +
3574                              sizeof(struct rte_flow_item_ipv4))) {
3575                                 memcpy(actions_tx, actions,
3576                                        sizeof(struct rte_flow_action));
3577                                 actions_tx++;
3578                                 encap = 1;
3579                         } else {
3580                                 rte_memcpy(actions_rx, actions,
3581                                            sizeof(struct rte_flow_action));
3582                                 actions_rx++;
3583                         }
3584                         break;
3585                 case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
3586                         raw_decap = actions->conf;
3587                         if (raw_decap->size <
3588                             (sizeof(struct rte_flow_item_eth) +
3589                              sizeof(struct rte_flow_item_ipv4))) {
3590                                 memcpy(actions_tx, actions,
3591                                        sizeof(struct rte_flow_action));
3592                                 actions_tx++;
3593                         } else {
3594                                 rte_memcpy(actions_rx, actions,
3595                                            sizeof(struct rte_flow_action));
3596                                 actions_rx++;
3597                         }
3598                         break;
3599                 default:
3600                         rte_memcpy(actions_rx, actions,
3601                                    sizeof(struct rte_flow_action));
3602                         actions_rx++;
3603                         break;
3604                 }
3605         }
3606         /* Add set meta action and end action for the Rx flow. */
3607         tag_action = actions_rx;
3608         tag_action->type = (enum rte_flow_action_type)
3609                            MLX5_RTE_FLOW_ACTION_TYPE_TAG;
3610         actions_rx++;
3611         rte_memcpy(actions_rx, actions, sizeof(struct rte_flow_action));
3612         actions_rx++;
3613         set_tag = (void *)actions_rx;
3614         set_tag->id = mlx5_flow_get_reg_id(dev, MLX5_HAIRPIN_RX, 0, NULL);
3615         MLX5_ASSERT(set_tag->id > REG_NONE);
3616         set_tag->data = *flow_id;
3617         tag_action->conf = set_tag;
3618         /* Create Tx item list. */
3619         rte_memcpy(actions_tx, actions, sizeof(struct rte_flow_action));
3620         addr = (void *)&pattern_tx[2];
3621         item = pattern_tx;
3622         item->type = (enum rte_flow_item_type)
3623                      MLX5_RTE_FLOW_ITEM_TYPE_TAG;
3624         tag_item = (void *)addr;
3625         tag_item->data = *flow_id;
3626         tag_item->id = mlx5_flow_get_reg_id(dev, MLX5_HAIRPIN_TX, 0, NULL);
3627         MLX5_ASSERT(set_tag->id > REG_NONE);
3628         item->spec = tag_item;
3629         addr += sizeof(struct mlx5_rte_flow_item_tag);
3630         tag_item = (void *)addr;
3631         tag_item->data = UINT32_MAX;
3632         tag_item->id = UINT16_MAX;
3633         item->mask = tag_item;
3634         addr += sizeof(struct mlx5_rte_flow_item_tag);
3635         item->last = NULL;
3636         item++;
3637         item->type = RTE_FLOW_ITEM_TYPE_END;
3638         return 0;
3639 }
3640
3641 /**
3642  * The last stage of splitting chain, just creates the subflow
3643  * without any modification.
3644  *
3645  * @param[in] dev
3646  *   Pointer to Ethernet device.
3647  * @param[in] flow
3648  *   Parent flow structure pointer.
3649  * @param[in, out] sub_flow
3650  *   Pointer to return the created subflow, may be NULL.
3651  * @param[in] prefix_layers
3652  *   Prefix subflow layers, may be 0.
3653  * @param[in] attr
3654  *   Flow rule attributes.
3655  * @param[in] items
3656  *   Pattern specification (list terminated by the END pattern item).
3657  * @param[in] actions
3658  *   Associated actions (list terminated by the END action).
3659  * @param[in] external
3660  *   This flow rule is created by request external to PMD.
3661  * @param[in] flow_idx
3662  *   This memory pool index to the flow.
3663  * @param[out] error
3664  *   Perform verbose error reporting if not NULL.
3665  * @return
3666  *   0 on success, negative value otherwise
3667  */
3668 static int
3669 flow_create_split_inner(struct rte_eth_dev *dev,
3670                         struct rte_flow *flow,
3671                         struct mlx5_flow **sub_flow,
3672                         uint64_t prefix_layers,
3673                         const struct rte_flow_attr *attr,
3674                         const struct rte_flow_item items[],
3675                         const struct rte_flow_action actions[],
3676                         bool external, uint32_t flow_idx,
3677                         struct rte_flow_error *error)
3678 {
3679         struct mlx5_flow *dev_flow;
3680
3681         dev_flow = flow_drv_prepare(dev, flow, attr, items, actions,
3682                 flow_idx, error);
3683         if (!dev_flow)
3684                 return -rte_errno;
3685         dev_flow->flow = flow;
3686         dev_flow->external = external;
3687         /* Subflow object was created, we must include one in the list. */
3688         SILIST_INSERT(&flow->dev_handles, dev_flow->handle_idx,
3689                       dev_flow->handle, next);
3690         /*
3691          * If dev_flow is as one of the suffix flow, some actions in suffix
3692          * flow may need some user defined item layer flags.
3693          */
3694         if (prefix_layers)
3695                 dev_flow->handle->layers = prefix_layers;
3696         if (sub_flow)
3697                 *sub_flow = dev_flow;
3698         return flow_drv_translate(dev, dev_flow, attr, items, actions, error);
3699 }
3700
3701 /**
3702  * Split the meter flow.
3703  *
3704  * As meter flow will split to three sub flow, other than meter
3705  * action, the other actions make sense to only meter accepts
3706  * the packet. If it need to be dropped, no other additional
3707  * actions should be take.
3708  *
3709  * One kind of special action which decapsulates the L3 tunnel
3710  * header will be in the prefix sub flow, as not to take the
3711  * L3 tunnel header into account.
3712  *
3713  * @param dev
3714  *   Pointer to Ethernet device.
3715  * @param[in] items
3716  *   Pattern specification (list terminated by the END pattern item).
3717  * @param[out] sfx_items
3718  *   Suffix flow match items (list terminated by the END pattern item).
3719  * @param[in] actions
3720  *   Associated actions (list terminated by the END action).
3721  * @param[out] actions_sfx
3722  *   Suffix flow actions.
3723  * @param[out] actions_pre
3724  *   Prefix flow actions.
3725  * @param[out] pattern_sfx
3726  *   The pattern items for the suffix flow.
3727  * @param[out] tag_sfx
3728  *   Pointer to suffix flow tag.
3729  *
3730  * @return
3731  *   0 on success.
3732  */
3733 static int
3734 flow_meter_split_prep(struct rte_eth_dev *dev,
3735                  const struct rte_flow_item items[],
3736                  struct rte_flow_item sfx_items[],
3737                  const struct rte_flow_action actions[],
3738                  struct rte_flow_action actions_sfx[],
3739                  struct rte_flow_action actions_pre[])
3740 {
3741         struct rte_flow_action *tag_action = NULL;
3742         struct rte_flow_item *tag_item;
3743         struct mlx5_rte_flow_action_set_tag *set_tag;
3744         struct rte_flow_error error;
3745         const struct rte_flow_action_raw_encap *raw_encap;
3746         const struct rte_flow_action_raw_decap *raw_decap;
3747         struct mlx5_rte_flow_item_tag *tag_spec;
3748         struct mlx5_rte_flow_item_tag *tag_mask;
3749         uint32_t tag_id;
3750         bool copy_vlan = false;
3751
3752         /* Prepare the actions for prefix and suffix flow. */
3753         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
3754                 struct rte_flow_action **action_cur = NULL;
3755
3756                 switch (actions->type) {
3757                 case RTE_FLOW_ACTION_TYPE_METER:
3758                         /* Add the extra tag action first. */
3759                         tag_action = actions_pre;
3760                         tag_action->type = (enum rte_flow_action_type)
3761                                            MLX5_RTE_FLOW_ACTION_TYPE_TAG;
3762                         actions_pre++;
3763                         action_cur = &actions_pre;
3764                         break;
3765                 case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP:
3766                 case RTE_FLOW_ACTION_TYPE_NVGRE_DECAP:
3767                         action_cur = &actions_pre;
3768                         break;
3769                 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
3770                         raw_encap = actions->conf;
3771                         if (raw_encap->size < MLX5_ENCAPSULATION_DECISION_SIZE)
3772                                 action_cur = &actions_pre;
3773                         break;
3774                 case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
3775                         raw_decap = actions->conf;
3776                         if (raw_decap->size > MLX5_ENCAPSULATION_DECISION_SIZE)
3777                                 action_cur = &actions_pre;
3778                         break;
3779                 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
3780                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
3781                         copy_vlan = true;
3782                         break;
3783                 default:
3784                         break;
3785                 }
3786                 if (!action_cur)
3787                         action_cur = &actions_sfx;
3788                 memcpy(*action_cur, actions, sizeof(struct rte_flow_action));
3789                 (*action_cur)++;
3790         }
3791         /* Add end action to the actions. */
3792         actions_sfx->type = RTE_FLOW_ACTION_TYPE_END;
3793         actions_pre->type = RTE_FLOW_ACTION_TYPE_END;
3794         actions_pre++;
3795         /* Set the tag. */
3796         set_tag = (void *)actions_pre;
3797         set_tag->id = mlx5_flow_get_reg_id(dev, MLX5_MTR_SFX, 0, &error);
3798         /*
3799          * Get the id from the qrss_pool to make qrss share the id with meter.
3800          */
3801         tag_id = flow_qrss_get_id(dev);
3802         set_tag->data = tag_id << MLX5_MTR_COLOR_BITS;
3803         assert(tag_action);
3804         tag_action->conf = set_tag;
3805         /* Prepare the suffix subflow items. */
3806         tag_item = sfx_items++;
3807         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
3808                 int item_type = items->type;
3809
3810                 switch (item_type) {
3811                 case RTE_FLOW_ITEM_TYPE_PORT_ID:
3812                         memcpy(sfx_items, items, sizeof(*sfx_items));
3813                         sfx_items++;
3814                         break;
3815                 case RTE_FLOW_ITEM_TYPE_VLAN:
3816                         if (copy_vlan) {
3817                                 memcpy(sfx_items, items, sizeof(*sfx_items));
3818                                 /*
3819                                  * Convert to internal match item, it is used
3820                                  * for vlan push and set vid.
3821                                  */
3822                                 sfx_items->type = (enum rte_flow_item_type)
3823                                                   MLX5_RTE_FLOW_ITEM_TYPE_VLAN;
3824                                 sfx_items++;
3825                         }
3826                         break;
3827                 default:
3828                         break;
3829                 }
3830         }
3831         sfx_items->type = RTE_FLOW_ITEM_TYPE_END;
3832         sfx_items++;
3833         tag_spec = (struct mlx5_rte_flow_item_tag *)sfx_items;
3834         tag_spec->data = tag_id << MLX5_MTR_COLOR_BITS;
3835         tag_spec->id = mlx5_flow_get_reg_id(dev, MLX5_MTR_SFX, 0, &error);
3836         tag_mask = tag_spec + 1;
3837         tag_mask->data = 0xffffff00;
3838         tag_item->type = (enum rte_flow_item_type)
3839                          MLX5_RTE_FLOW_ITEM_TYPE_TAG;
3840         tag_item->spec = tag_spec;
3841         tag_item->last = NULL;
3842         tag_item->mask = tag_mask;
3843         return tag_id;
3844 }
3845
3846 /**
3847  * Split action list having QUEUE/RSS for metadata register copy.
3848  *
3849  * Once Q/RSS action is detected in user's action list, the flow action
3850  * should be split in order to copy metadata registers, which will happen in
3851  * RX_CP_TBL like,
3852  *   - CQE->flow_tag := reg_c[1] (MARK)
3853  *   - CQE->flow_table_metadata (reg_b) := reg_c[0] (META)
3854  * The Q/RSS action will be performed on RX_ACT_TBL after passing by RX_CP_TBL.
3855  * This is because the last action of each flow must be a terminal action
3856  * (QUEUE, RSS or DROP).
3857  *
3858  * Flow ID must be allocated to identify actions in the RX_ACT_TBL and it is
3859  * stored and kept in the mlx5_flow structure per each sub_flow.
3860  *
3861  * The Q/RSS action is replaced with,
3862  *   - SET_TAG, setting the allocated flow ID to reg_c[2].
3863  * And the following JUMP action is added at the end,
3864  *   - JUMP, to RX_CP_TBL.
3865  *
3866  * A flow to perform remained Q/RSS action will be created in RX_ACT_TBL by
3867  * flow_create_split_metadata() routine. The flow will look like,
3868  *   - If flow ID matches (reg_c[2]), perform Q/RSS.
3869  *
3870  * @param dev
3871  *   Pointer to Ethernet device.
3872  * @param[out] split_actions
3873  *   Pointer to store split actions to jump to CP_TBL.
3874  * @param[in] actions
3875  *   Pointer to the list of original flow actions.
3876  * @param[in] qrss
3877  *   Pointer to the Q/RSS action.
3878  * @param[in] actions_n
3879  *   Number of original actions.
3880  * @param[out] error
3881  *   Perform verbose error reporting if not NULL.
3882  *
3883  * @return
3884  *   non-zero unique flow_id on success, otherwise 0 and
3885  *   error/rte_error are set.
3886  */
3887 static uint32_t
3888 flow_mreg_split_qrss_prep(struct rte_eth_dev *dev,
3889                           struct rte_flow_action *split_actions,
3890                           const struct rte_flow_action *actions,
3891                           const struct rte_flow_action *qrss,
3892                           int actions_n, struct rte_flow_error *error)
3893 {
3894         struct mlx5_rte_flow_action_set_tag *set_tag;
3895         struct rte_flow_action_jump *jump;
3896         const int qrss_idx = qrss - actions;
3897         uint32_t flow_id = 0;
3898         int ret = 0;
3899
3900         /*
3901          * Given actions will be split
3902          * - Replace QUEUE/RSS action with SET_TAG to set flow ID.
3903          * - Add jump to mreg CP_TBL.
3904          * As a result, there will be one more action.
3905          */
3906         ++actions_n;
3907         memcpy(split_actions, actions, sizeof(*split_actions) * actions_n);
3908         set_tag = (void *)(split_actions + actions_n);
3909         /*
3910          * If tag action is not set to void(it means we are not the meter
3911          * suffix flow), add the tag action. Since meter suffix flow already
3912          * has the tag added.
3913          */
3914         if (split_actions[qrss_idx].type != RTE_FLOW_ACTION_TYPE_VOID) {
3915                 /*
3916                  * Allocate the new subflow ID. This one is unique within
3917                  * device and not shared with representors. Otherwise,
3918                  * we would have to resolve multi-thread access synch
3919                  * issue. Each flow on the shared device is appended
3920                  * with source vport identifier, so the resulting
3921                  * flows will be unique in the shared (by master and
3922                  * representors) domain even if they have coinciding
3923                  * IDs.
3924                  */
3925                 flow_id = flow_qrss_get_id(dev);
3926                 if (!flow_id)
3927                         return rte_flow_error_set(error, ENOMEM,
3928                                                   RTE_FLOW_ERROR_TYPE_ACTION,
3929                                                   NULL, "can't allocate id "
3930                                                   "for split Q/RSS subflow");
3931                 /* Internal SET_TAG action to set flow ID. */
3932                 *set_tag = (struct mlx5_rte_flow_action_set_tag){
3933                         .data = flow_id,
3934                 };
3935                 ret = mlx5_flow_get_reg_id(dev, MLX5_COPY_MARK, 0, error);
3936                 if (ret < 0)
3937                         return ret;
3938                 set_tag->id = ret;
3939                 /* Construct new actions array. */
3940                 /* Replace QUEUE/RSS action. */
3941                 split_actions[qrss_idx] = (struct rte_flow_action){
3942                         .type = (enum rte_flow_action_type)
3943                                 MLX5_RTE_FLOW_ACTION_TYPE_TAG,
3944                         .conf = set_tag,
3945                 };
3946         }
3947         /* JUMP action to jump to mreg copy table (CP_TBL). */
3948         jump = (void *)(set_tag + 1);
3949         *jump = (struct rte_flow_action_jump){
3950                 .group = MLX5_FLOW_MREG_CP_TABLE_GROUP,
3951         };
3952         split_actions[actions_n - 2] = (struct rte_flow_action){
3953                 .type = RTE_FLOW_ACTION_TYPE_JUMP,
3954                 .conf = jump,
3955         };
3956         split_actions[actions_n - 1] = (struct rte_flow_action){
3957                 .type = RTE_FLOW_ACTION_TYPE_END,
3958         };
3959         return flow_id;
3960 }
3961
3962 /**
3963  * Extend the given action list for Tx metadata copy.
3964  *
3965  * Copy the given action list to the ext_actions and add flow metadata register
3966  * copy action in order to copy reg_a set by WQE to reg_c[0].
3967  *
3968  * @param[out] ext_actions
3969  *   Pointer to the extended action list.
3970  * @param[in] actions
3971  *   Pointer to the list of actions.
3972  * @param[in] actions_n
3973  *   Number of actions in the list.
3974  * @param[out] error
3975  *   Perform verbose error reporting if not NULL.
3976  * @param[in] encap_idx
3977  *   The encap action inndex.
3978  *
3979  * @return
3980  *   0 on success, negative value otherwise
3981  */
3982 static int
3983 flow_mreg_tx_copy_prep(struct rte_eth_dev *dev,
3984                        struct rte_flow_action *ext_actions,
3985                        const struct rte_flow_action *actions,
3986                        int actions_n, struct rte_flow_error *error,
3987                        int encap_idx)
3988 {
3989         struct mlx5_flow_action_copy_mreg *cp_mreg =
3990                 (struct mlx5_flow_action_copy_mreg *)
3991                         (ext_actions + actions_n + 1);
3992         int ret;
3993
3994         ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_RX, 0, error);
3995         if (ret < 0)
3996                 return ret;
3997         cp_mreg->dst = ret;
3998         ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_TX, 0, error);
3999         if (ret < 0)
4000                 return ret;
4001         cp_mreg->src = ret;
4002         if (encap_idx != 0)
4003                 memcpy(ext_actions, actions, sizeof(*ext_actions) * encap_idx);
4004         if (encap_idx == actions_n - 1) {
4005                 ext_actions[actions_n - 1] = (struct rte_flow_action){
4006                         .type = (enum rte_flow_action_type)
4007                                 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
4008                         .conf = cp_mreg,
4009                 };
4010                 ext_actions[actions_n] = (struct rte_flow_action){
4011                         .type = RTE_FLOW_ACTION_TYPE_END,
4012                 };
4013         } else {
4014                 ext_actions[encap_idx] = (struct rte_flow_action){
4015                         .type = (enum rte_flow_action_type)
4016                                 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
4017                         .conf = cp_mreg,
4018                 };
4019                 memcpy(ext_actions + encap_idx + 1, actions + encap_idx,
4020                                 sizeof(*ext_actions) * (actions_n - encap_idx));
4021         }
4022         return 0;
4023 }
4024
4025 /**
4026  * The splitting for metadata feature.
4027  *
4028  * - Q/RSS action on NIC Rx should be split in order to pass by
4029  *   the mreg copy table (RX_CP_TBL) and then it jumps to the
4030  *   action table (RX_ACT_TBL) which has the split Q/RSS action.
4031  *
4032  * - All the actions on NIC Tx should have a mreg copy action to
4033  *   copy reg_a from WQE to reg_c[0].
4034  *
4035  * @param dev
4036  *   Pointer to Ethernet device.
4037  * @param[in] flow
4038  *   Parent flow structure pointer.
4039  * @param[in] prefix_layers
4040  *   Prefix flow layer flags.
4041  * @param[in] attr
4042  *   Flow rule attributes.
4043  * @param[in] items
4044  *   Pattern specification (list terminated by the END pattern item).
4045  * @param[in] actions
4046  *   Associated actions (list terminated by the END action).
4047  * @param[in] external
4048  *   This flow rule is created by request external to PMD.
4049  * @param[in] flow_idx
4050  *   This memory pool index to the flow.
4051  * @param[out] error
4052  *   Perform verbose error reporting if not NULL.
4053  * @return
4054  *   0 on success, negative value otherwise
4055  */
4056 static int
4057 flow_create_split_metadata(struct rte_eth_dev *dev,
4058                            struct rte_flow *flow,
4059                            uint64_t prefix_layers,
4060                            const struct rte_flow_attr *attr,
4061                            const struct rte_flow_item items[],
4062                            const struct rte_flow_action actions[],
4063                            bool external, uint32_t flow_idx,
4064                            struct rte_flow_error *error)
4065 {
4066         struct mlx5_priv *priv = dev->data->dev_private;
4067         struct mlx5_dev_config *config = &priv->config;
4068         const struct rte_flow_action *qrss = NULL;
4069         struct rte_flow_action *ext_actions = NULL;
4070         struct mlx5_flow *dev_flow = NULL;
4071         uint32_t qrss_id = 0;
4072         int mtr_sfx = 0;
4073         size_t act_size;
4074         int actions_n;
4075         int encap_idx;
4076         int ret;
4077
4078         /* Check whether extensive metadata feature is engaged. */
4079         if (!config->dv_flow_en ||
4080             config->dv_xmeta_en == MLX5_XMETA_MODE_LEGACY ||
4081             !mlx5_flow_ext_mreg_supported(dev))
4082                 return flow_create_split_inner(dev, flow, NULL, prefix_layers,
4083                                                attr, items, actions, external,
4084                                                flow_idx, error);
4085         actions_n = flow_parse_metadata_split_actions_info(actions, &qrss,
4086                                                            &encap_idx);
4087         if (qrss) {
4088                 /* Exclude hairpin flows from splitting. */
4089                 if (qrss->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
4090                         const struct rte_flow_action_queue *queue;
4091
4092                         queue = qrss->conf;
4093                         if (mlx5_rxq_get_type(dev, queue->index) ==
4094                             MLX5_RXQ_TYPE_HAIRPIN)
4095                                 qrss = NULL;
4096                 } else if (qrss->type == RTE_FLOW_ACTION_TYPE_RSS) {
4097                         const struct rte_flow_action_rss *rss;
4098
4099                         rss = qrss->conf;
4100                         if (mlx5_rxq_get_type(dev, rss->queue[0]) ==
4101                             MLX5_RXQ_TYPE_HAIRPIN)
4102                                 qrss = NULL;
4103                 }
4104         }
4105         if (qrss) {
4106                 /* Check if it is in meter suffix table. */
4107                 mtr_sfx = attr->group == (attr->transfer ?
4108                           (MLX5_FLOW_TABLE_LEVEL_SUFFIX - 1) :
4109                           MLX5_FLOW_TABLE_LEVEL_SUFFIX);
4110                 /*
4111                  * Q/RSS action on NIC Rx should be split in order to pass by
4112                  * the mreg copy table (RX_CP_TBL) and then it jumps to the
4113                  * action table (RX_ACT_TBL) which has the split Q/RSS action.
4114                  */
4115                 act_size = sizeof(struct rte_flow_action) * (actions_n + 1) +
4116                            sizeof(struct rte_flow_action_set_tag) +
4117                            sizeof(struct rte_flow_action_jump);
4118                 ext_actions = rte_zmalloc(__func__, act_size, 0);
4119                 if (!ext_actions)
4120                         return rte_flow_error_set(error, ENOMEM,
4121                                                   RTE_FLOW_ERROR_TYPE_ACTION,
4122                                                   NULL, "no memory to split "
4123                                                   "metadata flow");
4124                 /*
4125                  * If we are the suffix flow of meter, tag already exist.
4126                  * Set the tag action to void.
4127                  */
4128                 if (mtr_sfx)
4129                         ext_actions[qrss - actions].type =
4130                                                 RTE_FLOW_ACTION_TYPE_VOID;
4131                 else
4132                         ext_actions[qrss - actions].type =
4133                                                 (enum rte_flow_action_type)
4134                                                 MLX5_RTE_FLOW_ACTION_TYPE_TAG;
4135                 /*
4136                  * Create the new actions list with removed Q/RSS action
4137                  * and appended set tag and jump to register copy table
4138                  * (RX_CP_TBL). We should preallocate unique tag ID here
4139                  * in advance, because it is needed for set tag action.
4140                  */
4141                 qrss_id = flow_mreg_split_qrss_prep(dev, ext_actions, actions,
4142                                                     qrss, actions_n, error);
4143                 if (!mtr_sfx && !qrss_id) {
4144                         ret = -rte_errno;
4145                         goto exit;
4146                 }
4147         } else if (attr->egress && !attr->transfer) {
4148                 /*
4149                  * All the actions on NIC Tx should have a metadata register
4150                  * copy action to copy reg_a from WQE to reg_c[meta]
4151                  */
4152                 act_size = sizeof(struct rte_flow_action) * (actions_n + 1) +
4153                            sizeof(struct mlx5_flow_action_copy_mreg);
4154                 ext_actions = rte_zmalloc(__func__, act_size, 0);
4155                 if (!ext_actions)
4156                         return rte_flow_error_set(error, ENOMEM,
4157                                                   RTE_FLOW_ERROR_TYPE_ACTION,
4158                                                   NULL, "no memory to split "
4159                                                   "metadata flow");
4160                 /* Create the action list appended with copy register. */
4161                 ret = flow_mreg_tx_copy_prep(dev, ext_actions, actions,
4162                                              actions_n, error, encap_idx);
4163                 if (ret < 0)
4164                         goto exit;
4165         }
4166         /* Add the unmodified original or prefix subflow. */
4167         ret = flow_create_split_inner(dev, flow, &dev_flow, prefix_layers, attr,
4168                                       items, ext_actions ? ext_actions :
4169                                       actions, external, flow_idx, error);
4170         if (ret < 0)
4171                 goto exit;
4172         MLX5_ASSERT(dev_flow);
4173         if (qrss) {
4174                 const struct rte_flow_attr q_attr = {
4175                         .group = MLX5_FLOW_MREG_ACT_TABLE_GROUP,
4176                         .ingress = 1,
4177                 };
4178                 /* Internal PMD action to set register. */
4179                 struct mlx5_rte_flow_item_tag q_tag_spec = {
4180                         .data = qrss_id,
4181                         .id = 0,
4182                 };
4183                 struct rte_flow_item q_items[] = {
4184                         {
4185                                 .type = (enum rte_flow_item_type)
4186                                         MLX5_RTE_FLOW_ITEM_TYPE_TAG,
4187                                 .spec = &q_tag_spec,
4188                                 .last = NULL,
4189                                 .mask = NULL,
4190                         },
4191                         {
4192                                 .type = RTE_FLOW_ITEM_TYPE_END,
4193                         },
4194                 };
4195                 struct rte_flow_action q_actions[] = {
4196                         {
4197                                 .type = qrss->type,
4198                                 .conf = qrss->conf,
4199                         },
4200                         {
4201                                 .type = RTE_FLOW_ACTION_TYPE_END,
4202                         },
4203                 };
4204                 uint64_t layers = flow_get_prefix_layer_flags(dev_flow);
4205
4206                 /*
4207                  * Configure the tag item only if there is no meter subflow.
4208                  * Since tag is already marked in the meter suffix subflow
4209                  * we can just use the meter suffix items as is.
4210                  */
4211                 if (qrss_id) {
4212                         /* Not meter subflow. */
4213                         MLX5_ASSERT(!mtr_sfx);
4214                         /*
4215                          * Put unique id in prefix flow due to it is destroyed
4216                          * after suffix flow and id will be freed after there
4217                          * is no actual flows with this id and identifier
4218                          * reallocation becomes possible (for example, for
4219                          * other flows in other threads).
4220                          */
4221                         dev_flow->handle->split_flow_id = qrss_id;
4222                         ret = mlx5_flow_get_reg_id(dev, MLX5_COPY_MARK, 0,
4223                                                    error);
4224                         if (ret < 0)
4225                                 goto exit;
4226                         q_tag_spec.id = ret;
4227                 }
4228                 dev_flow = NULL;
4229                 /* Add suffix subflow to execute Q/RSS. */
4230                 ret = flow_create_split_inner(dev, flow, &dev_flow, layers,
4231                                               &q_attr, mtr_sfx ? items :
4232                                               q_items, q_actions,
4233                                               external, flow_idx, error);
4234                 if (ret < 0)
4235                         goto exit;
4236                 /* qrss ID should be freed if failed. */
4237                 qrss_id = 0;
4238                 MLX5_ASSERT(dev_flow);
4239         }
4240
4241 exit:
4242         /*
4243          * We do not destroy the partially created sub_flows in case of error.
4244          * These ones are included into parent flow list and will be destroyed
4245          * by flow_drv_destroy.
4246          */
4247         flow_qrss_free_id(dev, qrss_id);
4248         rte_free(ext_actions);
4249         return ret;
4250 }
4251
4252 /**
4253  * The splitting for meter feature.
4254  *
4255  * - The meter flow will be split to two flows as prefix and
4256  *   suffix flow. The packets make sense only it pass the prefix
4257  *   meter action.
4258  *
4259  * - Reg_C_5 is used for the packet to match betweend prefix and
4260  *   suffix flow.
4261  *
4262  * @param dev
4263  *   Pointer to Ethernet device.
4264  * @param[in] flow
4265  *   Parent flow structure pointer.
4266  * @param[in] attr
4267  *   Flow rule attributes.
4268  * @param[in] items
4269  *   Pattern specification (list terminated by the END pattern item).
4270  * @param[in] actions
4271  *   Associated actions (list terminated by the END action).
4272  * @param[in] external
4273  *   This flow rule is created by request external to PMD.
4274  * @param[in] flow_idx
4275  *   This memory pool index to the flow.
4276  * @param[out] error
4277  *   Perform verbose error reporting if not NULL.
4278  * @return
4279  *   0 on success, negative value otherwise
4280  */
4281 static int
4282 flow_create_split_meter(struct rte_eth_dev *dev,
4283                            struct rte_flow *flow,
4284                            const struct rte_flow_attr *attr,
4285                            const struct rte_flow_item items[],
4286                            const struct rte_flow_action actions[],
4287                            bool external, uint32_t flow_idx,
4288                            struct rte_flow_error *error)
4289 {
4290         struct mlx5_priv *priv = dev->data->dev_private;
4291         struct rte_flow_action *sfx_actions = NULL;
4292         struct rte_flow_action *pre_actions = NULL;
4293         struct rte_flow_item *sfx_items = NULL;
4294         struct mlx5_flow *dev_flow = NULL;
4295         struct rte_flow_attr sfx_attr = *attr;
4296         uint32_t mtr = 0;
4297         uint32_t mtr_tag_id = 0;
4298         size_t act_size;
4299         size_t item_size;
4300         int actions_n = 0;
4301         int ret;
4302
4303         if (priv->mtr_en)
4304                 actions_n = flow_check_meter_action(actions, &mtr);
4305         if (mtr) {
4306                 /* The five prefix actions: meter, decap, encap, tag, end. */
4307                 act_size = sizeof(struct rte_flow_action) * (actions_n + 5) +
4308                            sizeof(struct mlx5_rte_flow_action_set_tag);
4309                 /* tag, vlan, port id, end. */
4310 #define METER_SUFFIX_ITEM 4
4311                 item_size = sizeof(struct rte_flow_item) * METER_SUFFIX_ITEM +
4312                             sizeof(struct mlx5_rte_flow_item_tag) * 2;
4313                 sfx_actions = rte_zmalloc(__func__, (act_size + item_size), 0);
4314                 if (!sfx_actions)
4315                         return rte_flow_error_set(error, ENOMEM,
4316                                                   RTE_FLOW_ERROR_TYPE_ACTION,
4317                                                   NULL, "no memory to split "
4318                                                   "meter flow");
4319                 sfx_items = (struct rte_flow_item *)((char *)sfx_actions +
4320                              act_size);
4321                 pre_actions = sfx_actions + actions_n;
4322                 mtr_tag_id = flow_meter_split_prep(dev, items, sfx_items,
4323                                                    actions, sfx_actions,
4324                                                    pre_actions);
4325                 if (!mtr_tag_id) {
4326                         ret = -rte_errno;
4327                         goto exit;
4328                 }
4329                 /* Add the prefix subflow. */
4330                 ret = flow_create_split_inner(dev, flow, &dev_flow, 0, attr,
4331                                               items, pre_actions, external,
4332                                               flow_idx, error);
4333                 if (ret) {
4334                         ret = -rte_errno;
4335                         goto exit;
4336                 }
4337                 dev_flow->handle->split_flow_id = mtr_tag_id;
4338                 /* Setting the sfx group atrr. */
4339                 sfx_attr.group = sfx_attr.transfer ?
4340                                 (MLX5_FLOW_TABLE_LEVEL_SUFFIX - 1) :
4341                                  MLX5_FLOW_TABLE_LEVEL_SUFFIX;
4342         }
4343         /* Add the prefix subflow. */
4344         ret = flow_create_split_metadata(dev, flow, dev_flow ?
4345                                          flow_get_prefix_layer_flags(dev_flow) :
4346                                          0, &sfx_attr,
4347                                          sfx_items ? sfx_items : items,
4348                                          sfx_actions ? sfx_actions : actions,
4349                                          external, flow_idx, error);
4350 exit:
4351         if (sfx_actions)
4352                 rte_free(sfx_actions);
4353         return ret;
4354 }
4355
4356 /**
4357  * Split the flow to subflow set. The splitters might be linked
4358  * in the chain, like this:
4359  * flow_create_split_outer() calls:
4360  *   flow_create_split_meter() calls:
4361  *     flow_create_split_metadata(meter_subflow_0) calls:
4362  *       flow_create_split_inner(metadata_subflow_0)
4363  *       flow_create_split_inner(metadata_subflow_1)
4364  *       flow_create_split_inner(metadata_subflow_2)
4365  *     flow_create_split_metadata(meter_subflow_1) calls:
4366  *       flow_create_split_inner(metadata_subflow_0)
4367  *       flow_create_split_inner(metadata_subflow_1)
4368  *       flow_create_split_inner(metadata_subflow_2)
4369  *
4370  * This provide flexible way to add new levels of flow splitting.
4371  * The all of successfully created subflows are included to the
4372  * parent flow dev_flow list.
4373  *
4374  * @param dev
4375  *   Pointer to Ethernet device.
4376  * @param[in] flow
4377  *   Parent flow structure pointer.
4378  * @param[in] attr
4379  *   Flow rule attributes.
4380  * @param[in] items
4381  *   Pattern specification (list terminated by the END pattern item).
4382  * @param[in] actions
4383  *   Associated actions (list terminated by the END action).
4384  * @param[in] external
4385  *   This flow rule is created by request external to PMD.
4386  * @param[in] flow_idx
4387  *   This memory pool index to the flow.
4388  * @param[out] error
4389  *   Perform verbose error reporting if not NULL.
4390  * @return
4391  *   0 on success, negative value otherwise
4392  */
4393 static int
4394 flow_create_split_outer(struct rte_eth_dev *dev,
4395                         struct rte_flow *flow,
4396                         const struct rte_flow_attr *attr,
4397                         const struct rte_flow_item items[],
4398                         const struct rte_flow_action actions[],
4399                         bool external, uint32_t flow_idx,
4400                         struct rte_flow_error *error)
4401 {
4402         int ret;
4403
4404         ret = flow_create_split_meter(dev, flow, attr, items,
4405                                          actions, external, flow_idx, error);
4406         MLX5_ASSERT(ret <= 0);
4407         return ret;
4408 }
4409
4410 /**
4411  * Create a flow and add it to @p list.
4412  *
4413  * @param dev
4414  *   Pointer to Ethernet device.
4415  * @param list
4416  *   Pointer to a TAILQ flow list. If this parameter NULL,
4417  *   no list insertion occurred, flow is just created,
4418  *   this is caller's responsibility to track the
4419  *   created flow.
4420  * @param[in] attr
4421  *   Flow rule attributes.
4422  * @param[in] items
4423  *   Pattern specification (list terminated by the END pattern item).
4424  * @param[in] actions
4425  *   Associated actions (list terminated by the END action).
4426  * @param[in] external
4427  *   This flow rule is created by request external to PMD.
4428  * @param[out] error
4429  *   Perform verbose error reporting if not NULL.
4430  *
4431  * @return
4432  *   A flow index on success, 0 otherwise and rte_errno is set.
4433  */
4434 static uint32_t
4435 flow_list_create(struct rte_eth_dev *dev, uint32_t *list,
4436                  const struct rte_flow_attr *attr,
4437                  const struct rte_flow_item items[],
4438                  const struct rte_flow_action actions[],
4439                  bool external, struct rte_flow_error *error)
4440 {
4441         struct mlx5_priv *priv = dev->data->dev_private;
4442         struct rte_flow *flow = NULL;
4443         struct mlx5_flow *dev_flow;
4444         const struct rte_flow_action_rss *rss;
4445         union {
4446                 struct rte_flow_expand_rss buf;
4447                 uint8_t buffer[2048];
4448         } expand_buffer;
4449         union {
4450                 struct rte_flow_action actions[MLX5_MAX_SPLIT_ACTIONS];
4451                 uint8_t buffer[2048];
4452         } actions_rx;
4453         union {
4454                 struct rte_flow_action actions[MLX5_MAX_SPLIT_ACTIONS];
4455                 uint8_t buffer[2048];
4456         } actions_hairpin_tx;
4457         union {
4458                 struct rte_flow_item items[MLX5_MAX_SPLIT_ITEMS];
4459                 uint8_t buffer[2048];
4460         } items_tx;
4461         struct rte_flow_expand_rss *buf = &expand_buffer.buf;
4462         struct mlx5_flow_rss_desc *rss_desc = &((struct mlx5_flow_rss_desc *)
4463                                               priv->rss_desc)[!!priv->flow_idx];
4464         const struct rte_flow_action *p_actions_rx = actions;
4465         uint32_t i;
4466         uint32_t idx = 0;
4467         int hairpin_flow;
4468         uint32_t hairpin_id = 0;
4469         struct rte_flow_attr attr_tx = { .priority = 0 };
4470         int ret;
4471
4472         hairpin_flow = flow_check_hairpin_split(dev, attr, actions);
4473         ret = flow_drv_validate(dev, attr, items, p_actions_rx,
4474                                 external, hairpin_flow, error);
4475         if (ret < 0)
4476                 return 0;
4477         if (hairpin_flow > 0) {
4478                 if (hairpin_flow > MLX5_MAX_SPLIT_ACTIONS) {
4479                         rte_errno = EINVAL;
4480                         return 0;
4481                 }
4482                 flow_hairpin_split(dev, actions, actions_rx.actions,
4483                                    actions_hairpin_tx.actions, items_tx.items,
4484                                    &hairpin_id);
4485                 p_actions_rx = actions_rx.actions;
4486         }
4487         flow = mlx5_ipool_zmalloc(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], &idx);
4488         if (!flow) {
4489                 rte_errno = ENOMEM;
4490                 goto error_before_flow;
4491         }
4492         flow->drv_type = flow_get_drv_type(dev, attr);
4493         if (hairpin_id != 0)
4494                 flow->hairpin_flow_id = hairpin_id;
4495         MLX5_ASSERT(flow->drv_type > MLX5_FLOW_TYPE_MIN &&
4496                     flow->drv_type < MLX5_FLOW_TYPE_MAX);
4497         memset(rss_desc, 0, sizeof(*rss_desc));
4498         rss = flow_get_rss_action(p_actions_rx);
4499         if (rss) {
4500                 /*
4501                  * The following information is required by
4502                  * mlx5_flow_hashfields_adjust() in advance.
4503                  */
4504                 rss_desc->level = rss->level;
4505                 /* RSS type 0 indicates default RSS type (ETH_RSS_IP). */
4506                 rss_desc->types = !rss->types ? ETH_RSS_IP : rss->types;
4507         }
4508         flow->dev_handles = 0;
4509         if (rss && rss->types) {
4510                 unsigned int graph_root;
4511
4512                 graph_root = find_graph_root(items, rss->level);
4513                 ret = rte_flow_expand_rss(buf, sizeof(expand_buffer.buffer),
4514                                           items, rss->types,
4515                                           mlx5_support_expansion,
4516                                           graph_root);
4517                 MLX5_ASSERT(ret > 0 &&
4518                        (unsigned int)ret < sizeof(expand_buffer.buffer));
4519         } else {
4520                 buf->entries = 1;
4521                 buf->entry[0].pattern = (void *)(uintptr_t)items;
4522         }
4523         /*
4524          * Record the start index when there is a nested call. All sub-flows
4525          * need to be translated before another calling.
4526          * No need to use ping-pong buffer to save memory here.
4527          */
4528         if (priv->flow_idx) {
4529                 MLX5_ASSERT(!priv->flow_nested_idx);
4530                 priv->flow_nested_idx = priv->flow_idx;
4531         }
4532         for (i = 0; i < buf->entries; ++i) {
4533                 /*
4534                  * The splitter may create multiple dev_flows,
4535                  * depending on configuration. In the simplest
4536                  * case it just creates unmodified original flow.
4537                  */
4538                 ret = flow_create_split_outer(dev, flow, attr,
4539                                               buf->entry[i].pattern,
4540                                               p_actions_rx, external, idx,
4541                                               error);
4542                 if (ret < 0)
4543                         goto error;
4544         }
4545         /* Create the tx flow. */
4546         if (hairpin_flow) {
4547                 attr_tx.group = MLX5_HAIRPIN_TX_TABLE;
4548                 attr_tx.ingress = 0;
4549                 attr_tx.egress = 1;
4550                 dev_flow = flow_drv_prepare(dev, flow, &attr_tx, items_tx.items,
4551                                          actions_hairpin_tx.actions,
4552                                          idx, error);
4553                 if (!dev_flow)
4554                         goto error;
4555                 dev_flow->flow = flow;
4556                 dev_flow->external = 0;
4557                 SILIST_INSERT(&flow->dev_handles, dev_flow->handle_idx,
4558                               dev_flow->handle, next);
4559                 ret = flow_drv_translate(dev, dev_flow, &attr_tx,
4560                                          items_tx.items,
4561                                          actions_hairpin_tx.actions, error);
4562                 if (ret < 0)
4563                         goto error;
4564         }
4565         /*
4566          * Update the metadata register copy table. If extensive
4567          * metadata feature is enabled and registers are supported
4568          * we might create the extra rte_flow for each unique
4569          * MARK/FLAG action ID.
4570          *
4571          * The table is updated for ingress Flows only, because
4572          * the egress Flows belong to the different device and
4573          * copy table should be updated in peer NIC Rx domain.
4574          */
4575         if (attr->ingress &&
4576             (external || attr->group != MLX5_FLOW_MREG_CP_TABLE_GROUP)) {
4577                 ret = flow_mreg_update_copy_table(dev, flow, actions, error);
4578                 if (ret)
4579                         goto error;
4580         }
4581         /*
4582          * If the flow is external (from application) OR device is started, then
4583          * the flow will be applied immediately.
4584          */
4585         if (external || dev->data->dev_started) {
4586                 ret = flow_drv_apply(dev, flow, error);
4587                 if (ret < 0)
4588                         goto error;
4589         }
4590         if (list)
4591                 ILIST_INSERT(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], list, idx,
4592                              flow, next);
4593         flow_rxq_flags_set(dev, flow);
4594         /* Nested flow creation index recovery. */
4595         priv->flow_idx = priv->flow_nested_idx;
4596         if (priv->flow_nested_idx)
4597                 priv->flow_nested_idx = 0;
4598         return idx;
4599 error:
4600         MLX5_ASSERT(flow);
4601         ret = rte_errno; /* Save rte_errno before cleanup. */
4602         flow_mreg_del_copy_action(dev, flow);
4603         flow_drv_destroy(dev, flow);
4604         mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], idx);
4605         rte_errno = ret; /* Restore rte_errno. */
4606 error_before_flow:
4607         ret = rte_errno;
4608         if (hairpin_id)
4609                 mlx5_flow_id_release(priv->sh->flow_id_pool,
4610                                      hairpin_id);
4611         rte_errno = ret;
4612         priv->flow_idx = priv->flow_nested_idx;
4613         if (priv->flow_nested_idx)
4614                 priv->flow_nested_idx = 0;
4615         return 0;
4616 }
4617
4618 /**
4619  * Create a dedicated flow rule on e-switch table 0 (root table), to direct all
4620  * incoming packets to table 1.
4621  *
4622  * Other flow rules, requested for group n, will be created in
4623  * e-switch table n+1.
4624  * Jump action to e-switch group n will be created to group n+1.
4625  *
4626  * Used when working in switchdev mode, to utilise advantages of table 1
4627  * and above.
4628  *
4629  * @param dev
4630  *   Pointer to Ethernet device.
4631  *
4632  * @return
4633  *   Pointer to flow on success, NULL otherwise and rte_errno is set.
4634  */
4635 struct rte_flow *
4636 mlx5_flow_create_esw_table_zero_flow(struct rte_eth_dev *dev)
4637 {
4638         const struct rte_flow_attr attr = {
4639                 .group = 0,
4640                 .priority = 0,
4641                 .ingress = 1,
4642                 .egress = 0,
4643                 .transfer = 1,
4644         };
4645         const struct rte_flow_item pattern = {
4646                 .type = RTE_FLOW_ITEM_TYPE_END,
4647         };
4648         struct rte_flow_action_jump jump = {
4649                 .group = 1,
4650         };
4651         const struct rte_flow_action actions[] = {
4652                 {
4653                         .type = RTE_FLOW_ACTION_TYPE_JUMP,
4654                         .conf = &jump,
4655                 },
4656                 {
4657                         .type = RTE_FLOW_ACTION_TYPE_END,
4658                 },
4659         };
4660         struct mlx5_priv *priv = dev->data->dev_private;
4661         struct rte_flow_error error;
4662
4663         return (void *)(uintptr_t)flow_list_create(dev, &priv->ctrl_flows,
4664                                                    &attr, &pattern,
4665                                                    actions, false, &error);
4666 }
4667
4668 /**
4669  * Validate a flow supported by the NIC.
4670  *
4671  * @see rte_flow_validate()
4672  * @see rte_flow_ops
4673  */
4674 int
4675 mlx5_flow_validate(struct rte_eth_dev *dev,
4676                    const struct rte_flow_attr *attr,
4677                    const struct rte_flow_item items[],
4678                    const struct rte_flow_action actions[],
4679                    struct rte_flow_error *error)
4680 {
4681         int hairpin_flow;
4682
4683         hairpin_flow = flow_check_hairpin_split(dev, attr, actions);
4684         return flow_drv_validate(dev, attr, items, actions,
4685                                 true, hairpin_flow, error);
4686 }
4687
4688 /**
4689  * Create a flow.
4690  *
4691  * @see rte_flow_create()
4692  * @see rte_flow_ops
4693  */
4694 struct rte_flow *
4695 mlx5_flow_create(struct rte_eth_dev *dev,
4696                  const struct rte_flow_attr *attr,
4697                  const struct rte_flow_item items[],
4698                  const struct rte_flow_action actions[],
4699                  struct rte_flow_error *error)
4700 {
4701         struct mlx5_priv *priv = dev->data->dev_private;
4702
4703         /*
4704          * If the device is not started yet, it is not allowed to created a
4705          * flow from application. PMD default flows and traffic control flows
4706          * are not affected.
4707          */
4708         if (unlikely(!dev->data->dev_started)) {
4709                 DRV_LOG(DEBUG, "port %u is not started when "
4710                         "inserting a flow", dev->data->port_id);
4711                 rte_flow_error_set(error, ENODEV,
4712                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
4713                                    NULL,
4714                                    "port not started");
4715                 return NULL;
4716         }
4717         return (void *)(uintptr_t)flow_list_create(dev, &priv->flows,
4718                                   attr, items, actions, true, error);
4719 }
4720
4721 /**
4722  * Destroy a flow in a list.
4723  *
4724  * @param dev
4725  *   Pointer to Ethernet device.
4726  * @param list
4727  *   Pointer to the Indexed flow list. If this parameter NULL,
4728  *   there is no flow removal from the list. Be noted that as
4729  *   flow is add to the indexed list, memory of the indexed
4730  *   list points to maybe changed as flow destroyed.
4731  * @param[in] flow_idx
4732  *   Index of flow to destroy.
4733  */
4734 static void
4735 flow_list_destroy(struct rte_eth_dev *dev, uint32_t *list,
4736                   uint32_t flow_idx)
4737 {
4738         struct mlx5_priv *priv = dev->data->dev_private;
4739         struct mlx5_fdir_flow *priv_fdir_flow = NULL;
4740         struct rte_flow *flow = mlx5_ipool_get(priv->sh->ipool
4741                                                [MLX5_IPOOL_RTE_FLOW], flow_idx);
4742
4743         if (!flow)
4744                 return;
4745         /*
4746          * Update RX queue flags only if port is started, otherwise it is
4747          * already clean.
4748          */
4749         if (dev->data->dev_started)
4750                 flow_rxq_flags_trim(dev, flow);
4751         if (flow->hairpin_flow_id)
4752                 mlx5_flow_id_release(priv->sh->flow_id_pool,
4753                                      flow->hairpin_flow_id);
4754         flow_drv_destroy(dev, flow);
4755         if (list)
4756                 ILIST_REMOVE(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], list,
4757                              flow_idx, flow, next);
4758         flow_mreg_del_copy_action(dev, flow);
4759         if (flow->fdir) {
4760                 LIST_FOREACH(priv_fdir_flow, &priv->fdir_flows, next) {
4761                         if (priv_fdir_flow->rix_flow == flow_idx)
4762                                 break;
4763                 }
4764                 if (priv_fdir_flow) {
4765                         LIST_REMOVE(priv_fdir_flow, next);
4766                         rte_free(priv_fdir_flow->fdir);
4767                         rte_free(priv_fdir_flow);
4768                 }
4769         }
4770         mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], flow_idx);
4771 }
4772
4773 /**
4774  * Destroy all flows.
4775  *
4776  * @param dev
4777  *   Pointer to Ethernet device.
4778  * @param list
4779  *   Pointer to the Indexed flow list.
4780  * @param active
4781  *   If flushing is called avtively.
4782  */
4783 void
4784 mlx5_flow_list_flush(struct rte_eth_dev *dev, uint32_t *list, bool active)
4785 {
4786         uint32_t num_flushed = 0;
4787
4788         while (*list) {
4789                 flow_list_destroy(dev, list, *list);
4790                 num_flushed++;
4791         }
4792         if (active) {
4793                 DRV_LOG(INFO, "port %u: %u flows flushed before stopping",
4794                         dev->data->port_id, num_flushed);
4795         }
4796 }
4797
4798 /**
4799  * Remove all flows.
4800  *
4801  * @param dev
4802  *   Pointer to Ethernet device.
4803  * @param list
4804  *   Pointer to the Indexed flow list.
4805  */
4806 void
4807 mlx5_flow_stop(struct rte_eth_dev *dev, uint32_t *list)
4808 {
4809         struct mlx5_priv *priv = dev->data->dev_private;
4810         struct rte_flow *flow = NULL;
4811         uint32_t idx;
4812
4813         ILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], *list, idx,
4814                       flow, next) {
4815                 flow_drv_remove(dev, flow);
4816                 flow_mreg_stop_copy_action(dev, flow);
4817         }
4818         flow_mreg_del_default_copy_action(dev);
4819         flow_rxq_flags_clear(dev);
4820 }
4821
4822 /**
4823  * Add all flows.
4824  *
4825  * @param dev
4826  *   Pointer to Ethernet device.
4827  * @param list
4828  *   Pointer to the Indexed flow list.
4829  *
4830  * @return
4831  *   0 on success, a negative errno value otherwise and rte_errno is set.
4832  */
4833 int
4834 mlx5_flow_start(struct rte_eth_dev *dev, uint32_t *list)
4835 {
4836         struct mlx5_priv *priv = dev->data->dev_private;
4837         struct rte_flow *flow = NULL;
4838         struct rte_flow_error error;
4839         uint32_t idx;
4840         int ret = 0;
4841
4842         /* Make sure default copy action (reg_c[0] -> reg_b) is created. */
4843         ret = flow_mreg_add_default_copy_action(dev, &error);
4844         if (ret < 0)
4845                 return -rte_errno;
4846         /* Apply Flows created by application. */
4847         ILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], *list, idx,
4848                       flow, next) {
4849                 ret = flow_mreg_start_copy_action(dev, flow);
4850                 if (ret < 0)
4851                         goto error;
4852                 ret = flow_drv_apply(dev, flow, &error);
4853                 if (ret < 0)
4854                         goto error;
4855                 flow_rxq_flags_set(dev, flow);
4856         }
4857         return 0;
4858 error:
4859         ret = rte_errno; /* Save rte_errno before cleanup. */
4860         mlx5_flow_stop(dev, list);
4861         rte_errno = ret; /* Restore rte_errno. */
4862         return -rte_errno;
4863 }
4864
4865 /**
4866  * Stop all default actions for flows.
4867  *
4868  * @param dev
4869  *   Pointer to Ethernet device.
4870  */
4871 void
4872 mlx5_flow_stop_default(struct rte_eth_dev *dev)
4873 {
4874         flow_mreg_del_default_copy_action(dev);
4875         flow_rxq_flags_clear(dev);
4876 }
4877
4878 /**
4879  * Start all default actions for flows.
4880  *
4881  * @param dev
4882  *   Pointer to Ethernet device.
4883  * @return
4884  *   0 on success, a negative errno value otherwise and rte_errno is set.
4885  */
4886 int
4887 mlx5_flow_start_default(struct rte_eth_dev *dev)
4888 {
4889         struct rte_flow_error error;
4890
4891         /* Make sure default copy action (reg_c[0] -> reg_b) is created. */
4892         return flow_mreg_add_default_copy_action(dev, &error);
4893 }
4894
4895 /**
4896  * Allocate intermediate resources for flow creation.
4897  *
4898  * @param dev
4899  *   Pointer to Ethernet device.
4900  */
4901 void
4902 mlx5_flow_alloc_intermediate(struct rte_eth_dev *dev)
4903 {
4904         struct mlx5_priv *priv = dev->data->dev_private;
4905
4906         if (!priv->inter_flows) {
4907                 priv->inter_flows = rte_calloc(__func__, 1,
4908                                     MLX5_NUM_MAX_DEV_FLOWS *
4909                                     sizeof(struct mlx5_flow) +
4910                                     (sizeof(struct mlx5_flow_rss_desc) +
4911                                     sizeof(uint16_t) * UINT16_MAX) * 2, 0);
4912                 if (!priv->inter_flows) {
4913                         DRV_LOG(ERR, "can't allocate intermediate memory.");
4914                         return;
4915                 }
4916         }
4917         priv->rss_desc = &((struct mlx5_flow *)priv->inter_flows)
4918                          [MLX5_NUM_MAX_DEV_FLOWS];
4919         /* Reset the index. */
4920         priv->flow_idx = 0;
4921         priv->flow_nested_idx = 0;
4922 }
4923
4924 /**
4925  * Free intermediate resources for flows.
4926  *
4927  * @param dev
4928  *   Pointer to Ethernet device.
4929  */
4930 void
4931 mlx5_flow_free_intermediate(struct rte_eth_dev *dev)
4932 {
4933         struct mlx5_priv *priv = dev->data->dev_private;
4934
4935         rte_free(priv->inter_flows);
4936         priv->inter_flows = NULL;
4937 }
4938
4939 /**
4940  * Verify the flow list is empty
4941  *
4942  * @param dev
4943  *  Pointer to Ethernet device.
4944  *
4945  * @return the number of flows not released.
4946  */
4947 int
4948 mlx5_flow_verify(struct rte_eth_dev *dev)
4949 {
4950         struct mlx5_priv *priv = dev->data->dev_private;
4951         struct rte_flow *flow;
4952         uint32_t idx;
4953         int ret = 0;
4954
4955         ILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], priv->flows, idx,
4956                       flow, next) {
4957                 DRV_LOG(DEBUG, "port %u flow %p still referenced",
4958                         dev->data->port_id, (void *)flow);
4959                 ++ret;
4960         }
4961         return ret;
4962 }
4963
4964 /**
4965  * Enable default hairpin egress flow.
4966  *
4967  * @param dev
4968  *   Pointer to Ethernet device.
4969  * @param queue
4970  *   The queue index.
4971  *
4972  * @return
4973  *   0 on success, a negative errno value otherwise and rte_errno is set.
4974  */
4975 int
4976 mlx5_ctrl_flow_source_queue(struct rte_eth_dev *dev,
4977                             uint32_t queue)
4978 {
4979         struct mlx5_priv *priv = dev->data->dev_private;
4980         const struct rte_flow_attr attr = {
4981                 .egress = 1,
4982                 .priority = 0,
4983         };
4984         struct mlx5_rte_flow_item_tx_queue queue_spec = {
4985                 .queue = queue,
4986         };
4987         struct mlx5_rte_flow_item_tx_queue queue_mask = {
4988                 .queue = UINT32_MAX,
4989         };
4990         struct rte_flow_item items[] = {
4991                 {
4992                         .type = (enum rte_flow_item_type)
4993                                 MLX5_RTE_FLOW_ITEM_TYPE_TX_QUEUE,
4994                         .spec = &queue_spec,
4995                         .last = NULL,
4996                         .mask = &queue_mask,
4997                 },
4998                 {
4999                         .type = RTE_FLOW_ITEM_TYPE_END,
5000                 },
5001         };
5002         struct rte_flow_action_jump jump = {
5003                 .group = MLX5_HAIRPIN_TX_TABLE,
5004         };
5005         struct rte_flow_action actions[2];
5006         uint32_t flow_idx;
5007         struct rte_flow_error error;
5008
5009         actions[0].type = RTE_FLOW_ACTION_TYPE_JUMP;
5010         actions[0].conf = &jump;
5011         actions[1].type = RTE_FLOW_ACTION_TYPE_END;
5012         flow_idx = flow_list_create(dev, &priv->ctrl_flows,
5013                                 &attr, items, actions, false, &error);
5014         if (!flow_idx) {
5015                 DRV_LOG(DEBUG,
5016                         "Failed to create ctrl flow: rte_errno(%d),"
5017                         " type(%d), message(%s)",
5018                         rte_errno, error.type,
5019                         error.message ? error.message : " (no stated reason)");
5020                 return -rte_errno;
5021         }
5022         return 0;
5023 }
5024
5025 /**
5026  * Enable a control flow configured from the control plane.
5027  *
5028  * @param dev
5029  *   Pointer to Ethernet device.
5030  * @param eth_spec
5031  *   An Ethernet flow spec to apply.
5032  * @param eth_mask
5033  *   An Ethernet flow mask to apply.
5034  * @param vlan_spec
5035  *   A VLAN flow spec to apply.
5036  * @param vlan_mask
5037  *   A VLAN flow mask to apply.
5038  *
5039  * @return
5040  *   0 on success, a negative errno value otherwise and rte_errno is set.
5041  */
5042 int
5043 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
5044                     struct rte_flow_item_eth *eth_spec,
5045                     struct rte_flow_item_eth *eth_mask,
5046                     struct rte_flow_item_vlan *vlan_spec,
5047                     struct rte_flow_item_vlan *vlan_mask)
5048 {
5049         struct mlx5_priv *priv = dev->data->dev_private;
5050         const struct rte_flow_attr attr = {
5051                 .ingress = 1,
5052                 .priority = MLX5_FLOW_PRIO_RSVD,
5053         };
5054         struct rte_flow_item items[] = {
5055                 {
5056                         .type = RTE_FLOW_ITEM_TYPE_ETH,
5057                         .spec = eth_spec,
5058                         .last = NULL,
5059                         .mask = eth_mask,
5060                 },
5061                 {
5062                         .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
5063                                               RTE_FLOW_ITEM_TYPE_END,
5064                         .spec = vlan_spec,
5065                         .last = NULL,
5066                         .mask = vlan_mask,
5067                 },
5068                 {
5069                         .type = RTE_FLOW_ITEM_TYPE_END,
5070                 },
5071         };
5072         uint16_t queue[priv->reta_idx_n];
5073         struct rte_flow_action_rss action_rss = {
5074                 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
5075                 .level = 0,
5076                 .types = priv->rss_conf.rss_hf,
5077                 .key_len = priv->rss_conf.rss_key_len,
5078                 .queue_num = priv->reta_idx_n,
5079                 .key = priv->rss_conf.rss_key,
5080                 .queue = queue,
5081         };
5082         struct rte_flow_action actions[] = {
5083                 {
5084                         .type = RTE_FLOW_ACTION_TYPE_RSS,
5085                         .conf = &action_rss,
5086                 },
5087                 {
5088                         .type = RTE_FLOW_ACTION_TYPE_END,
5089                 },
5090         };
5091         uint32_t flow_idx;
5092         struct rte_flow_error error;
5093         unsigned int i;
5094
5095         if (!priv->reta_idx_n || !priv->rxqs_n) {
5096                 return 0;
5097         }
5098         if (!(dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG))
5099                 action_rss.types = 0;
5100         for (i = 0; i != priv->reta_idx_n; ++i)
5101                 queue[i] = (*priv->reta_idx)[i];
5102         flow_idx = flow_list_create(dev, &priv->ctrl_flows,
5103                                 &attr, items, actions, false, &error);
5104         if (!flow_idx)
5105                 return -rte_errno;
5106         return 0;
5107 }
5108
5109 /**
5110  * Enable a flow control configured from the control plane.
5111  *
5112  * @param dev
5113  *   Pointer to Ethernet device.
5114  * @param eth_spec
5115  *   An Ethernet flow spec to apply.
5116  * @param eth_mask
5117  *   An Ethernet flow mask to apply.
5118  *
5119  * @return
5120  *   0 on success, a negative errno value otherwise and rte_errno is set.
5121  */
5122 int
5123 mlx5_ctrl_flow(struct rte_eth_dev *dev,
5124                struct rte_flow_item_eth *eth_spec,
5125                struct rte_flow_item_eth *eth_mask)
5126 {
5127         return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
5128 }
5129
5130 /**
5131  * Create default miss flow rule matching lacp traffic
5132  *
5133  * @param dev
5134  *   Pointer to Ethernet device.
5135  * @param eth_spec
5136  *   An Ethernet flow spec to apply.
5137  *
5138  * @return
5139  *   0 on success, a negative errno value otherwise and rte_errno is set.
5140  */
5141 int
5142 mlx5_flow_lacp_miss(struct rte_eth_dev *dev)
5143 {
5144         struct mlx5_priv *priv = dev->data->dev_private;
5145         /*
5146          * The LACP matching is done by only using ether type since using
5147          * a multicast dst mac causes kernel to give low priority to this flow.
5148          */
5149         static const struct rte_flow_item_eth lacp_spec = {
5150                 .type = RTE_BE16(0x8809),
5151         };
5152         static const struct rte_flow_item_eth lacp_mask = {
5153                 .type = 0xffff,
5154         };
5155         const struct rte_flow_attr attr = {
5156                 .ingress = 1,
5157         };
5158         struct rte_flow_item items[] = {
5159                 {
5160                         .type = RTE_FLOW_ITEM_TYPE_ETH,
5161                         .spec = &lacp_spec,
5162                         .mask = &lacp_mask,
5163                 },
5164                 {
5165                         .type = RTE_FLOW_ITEM_TYPE_END,
5166                 },
5167         };
5168         struct rte_flow_action actions[] = {
5169                 {
5170                         .type = (enum rte_flow_action_type)
5171                                 MLX5_RTE_FLOW_ACTION_TYPE_DEFAULT_MISS,
5172                 },
5173                 {
5174                         .type = RTE_FLOW_ACTION_TYPE_END,
5175                 },
5176         };
5177         struct rte_flow_error error;
5178         uint32_t flow_idx = flow_list_create(dev, &priv->ctrl_flows,
5179                                 &attr, items, actions, false, &error);
5180
5181         if (!flow_idx)
5182                 return -rte_errno;
5183         return 0;
5184 }
5185
5186 /**
5187  * Destroy a flow.
5188  *
5189  * @see rte_flow_destroy()
5190  * @see rte_flow_ops
5191  */
5192 int
5193 mlx5_flow_destroy(struct rte_eth_dev *dev,
5194                   struct rte_flow *flow,
5195                   struct rte_flow_error *error __rte_unused)
5196 {
5197         struct mlx5_priv *priv = dev->data->dev_private;
5198
5199         flow_list_destroy(dev, &priv->flows, (uintptr_t)(void *)flow);
5200         return 0;
5201 }
5202
5203 /**
5204  * Destroy all flows.
5205  *
5206  * @see rte_flow_flush()
5207  * @see rte_flow_ops
5208  */
5209 int
5210 mlx5_flow_flush(struct rte_eth_dev *dev,
5211                 struct rte_flow_error *error __rte_unused)
5212 {
5213         struct mlx5_priv *priv = dev->data->dev_private;
5214
5215         mlx5_flow_list_flush(dev, &priv->flows, false);
5216         return 0;
5217 }
5218
5219 /**
5220  * Isolated mode.
5221  *
5222  * @see rte_flow_isolate()
5223  * @see rte_flow_ops
5224  */
5225 int
5226 mlx5_flow_isolate(struct rte_eth_dev *dev,
5227                   int enable,
5228                   struct rte_flow_error *error)
5229 {
5230         struct mlx5_priv *priv = dev->data->dev_private;
5231
5232         if (dev->data->dev_started) {
5233                 rte_flow_error_set(error, EBUSY,
5234                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
5235                                    NULL,
5236                                    "port must be stopped first");
5237                 return -rte_errno;
5238         }
5239         priv->isolated = !!enable;
5240         if (enable)
5241                 dev->dev_ops = &mlx5_os_dev_ops_isolate;
5242         else
5243                 dev->dev_ops = &mlx5_os_dev_ops;
5244         return 0;
5245 }
5246
5247 /**
5248  * Query a flow.
5249  *
5250  * @see rte_flow_query()
5251  * @see rte_flow_ops
5252  */
5253 static int
5254 flow_drv_query(struct rte_eth_dev *dev,
5255                uint32_t flow_idx,
5256                const struct rte_flow_action *actions,
5257                void *data,
5258                struct rte_flow_error *error)
5259 {
5260         struct mlx5_priv *priv = dev->data->dev_private;
5261         const struct mlx5_flow_driver_ops *fops;
5262         struct rte_flow *flow = mlx5_ipool_get(priv->sh->ipool
5263                                                [MLX5_IPOOL_RTE_FLOW],
5264                                                flow_idx);
5265         enum mlx5_flow_drv_type ftype;
5266
5267         if (!flow) {
5268                 return rte_flow_error_set(error, ENOENT,
5269                           RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
5270                           NULL,
5271                           "invalid flow handle");
5272         }
5273         ftype = flow->drv_type;
5274         MLX5_ASSERT(ftype > MLX5_FLOW_TYPE_MIN && ftype < MLX5_FLOW_TYPE_MAX);
5275         fops = flow_get_drv_ops(ftype);
5276
5277         return fops->query(dev, flow, actions, data, error);
5278 }
5279
5280 /**
5281  * Query a flow.
5282  *
5283  * @see rte_flow_query()
5284  * @see rte_flow_ops
5285  */
5286 int
5287 mlx5_flow_query(struct rte_eth_dev *dev,
5288                 struct rte_flow *flow,
5289                 const struct rte_flow_action *actions,
5290                 void *data,
5291                 struct rte_flow_error *error)
5292 {
5293         int ret;
5294
5295         ret = flow_drv_query(dev, (uintptr_t)(void *)flow, actions, data,
5296                              error);
5297         if (ret < 0)
5298                 return ret;
5299         return 0;
5300 }
5301
5302 /**
5303  * Convert a flow director filter to a generic flow.
5304  *
5305  * @param dev
5306  *   Pointer to Ethernet device.
5307  * @param fdir_filter
5308  *   Flow director filter to add.
5309  * @param attributes
5310  *   Generic flow parameters structure.
5311  *
5312  * @return
5313  *   0 on success, a negative errno value otherwise and rte_errno is set.
5314  */
5315 static int
5316 flow_fdir_filter_convert(struct rte_eth_dev *dev,
5317                          const struct rte_eth_fdir_filter *fdir_filter,
5318                          struct mlx5_fdir *attributes)
5319 {
5320         struct mlx5_priv *priv = dev->data->dev_private;
5321         const struct rte_eth_fdir_input *input = &fdir_filter->input;
5322         const struct rte_eth_fdir_masks *mask =
5323                 &dev->data->dev_conf.fdir_conf.mask;
5324
5325         /* Validate queue number. */
5326         if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
5327                 DRV_LOG(ERR, "port %u invalid queue number %d",
5328                         dev->data->port_id, fdir_filter->action.rx_queue);
5329                 rte_errno = EINVAL;
5330                 return -rte_errno;
5331         }
5332         attributes->attr.ingress = 1;
5333         attributes->items[0] = (struct rte_flow_item) {
5334                 .type = RTE_FLOW_ITEM_TYPE_ETH,
5335                 .spec = &attributes->l2,
5336                 .mask = &attributes->l2_mask,
5337         };
5338         switch (fdir_filter->action.behavior) {
5339         case RTE_ETH_FDIR_ACCEPT:
5340                 attributes->actions[0] = (struct rte_flow_action){
5341                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
5342                         .conf = &attributes->queue,
5343                 };
5344                 break;
5345         case RTE_ETH_FDIR_REJECT:
5346                 attributes->actions[0] = (struct rte_flow_action){
5347                         .type = RTE_FLOW_ACTION_TYPE_DROP,
5348                 };
5349                 break;
5350         default:
5351                 DRV_LOG(ERR, "port %u invalid behavior %d",
5352                         dev->data->port_id,
5353                         fdir_filter->action.behavior);
5354                 rte_errno = ENOTSUP;
5355                 return -rte_errno;
5356         }
5357         attributes->queue.index = fdir_filter->action.rx_queue;
5358         /* Handle L3. */
5359         switch (fdir_filter->input.flow_type) {
5360         case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
5361         case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
5362         case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
5363                 attributes->l3.ipv4.hdr = (struct rte_ipv4_hdr){
5364                         .src_addr = input->flow.ip4_flow.src_ip,
5365                         .dst_addr = input->flow.ip4_flow.dst_ip,
5366                         .time_to_live = input->flow.ip4_flow.ttl,
5367                         .type_of_service = input->flow.ip4_flow.tos,
5368                 };
5369                 attributes->l3_mask.ipv4.hdr = (struct rte_ipv4_hdr){
5370                         .src_addr = mask->ipv4_mask.src_ip,
5371                         .dst_addr = mask->ipv4_mask.dst_ip,
5372                         .time_to_live = mask->ipv4_mask.ttl,
5373                         .type_of_service = mask->ipv4_mask.tos,
5374                         .next_proto_id = mask->ipv4_mask.proto,
5375                 };
5376                 attributes->items[1] = (struct rte_flow_item){
5377                         .type = RTE_FLOW_ITEM_TYPE_IPV4,
5378                         .spec = &attributes->l3,
5379                         .mask = &attributes->l3_mask,
5380                 };
5381                 break;
5382         case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
5383         case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
5384         case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
5385                 attributes->l3.ipv6.hdr = (struct rte_ipv6_hdr){
5386                         .hop_limits = input->flow.ipv6_flow.hop_limits,
5387                         .proto = input->flow.ipv6_flow.proto,
5388                 };
5389
5390                 memcpy(attributes->l3.ipv6.hdr.src_addr,
5391                        input->flow.ipv6_flow.src_ip,
5392                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
5393                 memcpy(attributes->l3.ipv6.hdr.dst_addr,
5394                        input->flow.ipv6_flow.dst_ip,
5395                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
5396                 memcpy(attributes->l3_mask.ipv6.hdr.src_addr,
5397                        mask->ipv6_mask.src_ip,
5398                        RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
5399                 memcpy(attributes->l3_mask.ipv6.hdr.dst_addr,
5400                        mask->ipv6_mask.dst_ip,
5401                        RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
5402                 attributes->items[1] = (struct rte_flow_item){
5403                         .type = RTE_FLOW_ITEM_TYPE_IPV6,
5404                         .spec = &attributes->l3,
5405                         .mask = &attributes->l3_mask,
5406                 };
5407                 break;
5408         default:
5409                 DRV_LOG(ERR, "port %u invalid flow type%d",
5410                         dev->data->port_id, fdir_filter->input.flow_type);
5411                 rte_errno = ENOTSUP;
5412                 return -rte_errno;
5413         }
5414         /* Handle L4. */
5415         switch (fdir_filter->input.flow_type) {
5416         case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
5417                 attributes->l4.udp.hdr = (struct rte_udp_hdr){
5418                         .src_port = input->flow.udp4_flow.src_port,
5419                         .dst_port = input->flow.udp4_flow.dst_port,
5420                 };
5421                 attributes->l4_mask.udp.hdr = (struct rte_udp_hdr){
5422                         .src_port = mask->src_port_mask,
5423                         .dst_port = mask->dst_port_mask,
5424                 };
5425                 attributes->items[2] = (struct rte_flow_item){
5426                         .type = RTE_FLOW_ITEM_TYPE_UDP,
5427                         .spec = &attributes->l4,
5428                         .mask = &attributes->l4_mask,
5429                 };
5430                 break;
5431         case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
5432                 attributes->l4.tcp.hdr = (struct rte_tcp_hdr){
5433                         .src_port = input->flow.tcp4_flow.src_port,
5434                         .dst_port = input->flow.tcp4_flow.dst_port,
5435                 };
5436                 attributes->l4_mask.tcp.hdr = (struct rte_tcp_hdr){
5437                         .src_port = mask->src_port_mask,
5438                         .dst_port = mask->dst_port_mask,
5439                 };
5440                 attributes->items[2] = (struct rte_flow_item){
5441                         .type = RTE_FLOW_ITEM_TYPE_TCP,
5442                         .spec = &attributes->l4,
5443                         .mask = &attributes->l4_mask,
5444                 };
5445                 break;
5446         case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
5447                 attributes->l4.udp.hdr = (struct rte_udp_hdr){
5448                         .src_port = input->flow.udp6_flow.src_port,
5449                         .dst_port = input->flow.udp6_flow.dst_port,
5450                 };
5451                 attributes->l4_mask.udp.hdr = (struct rte_udp_hdr){
5452                         .src_port = mask->src_port_mask,
5453                         .dst_port = mask->dst_port_mask,
5454                 };
5455                 attributes->items[2] = (struct rte_flow_item){
5456                         .type = RTE_FLOW_ITEM_TYPE_UDP,
5457                         .spec = &attributes->l4,
5458                         .mask = &attributes->l4_mask,
5459                 };
5460                 break;
5461         case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
5462                 attributes->l4.tcp.hdr = (struct rte_tcp_hdr){
5463                         .src_port = input->flow.tcp6_flow.src_port,
5464                         .dst_port = input->flow.tcp6_flow.dst_port,
5465                 };
5466                 attributes->l4_mask.tcp.hdr = (struct rte_tcp_hdr){
5467                         .src_port = mask->src_port_mask,
5468                         .dst_port = mask->dst_port_mask,
5469                 };
5470                 attributes->items[2] = (struct rte_flow_item){
5471                         .type = RTE_FLOW_ITEM_TYPE_TCP,
5472                         .spec = &attributes->l4,
5473                         .mask = &attributes->l4_mask,
5474                 };
5475                 break;
5476         case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
5477         case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
5478                 break;
5479         default:
5480                 DRV_LOG(ERR, "port %u invalid flow type%d",
5481                         dev->data->port_id, fdir_filter->input.flow_type);
5482                 rte_errno = ENOTSUP;
5483                 return -rte_errno;
5484         }
5485         return 0;
5486 }
5487
5488 #define FLOW_FDIR_CMP(f1, f2, fld) \
5489         memcmp(&(f1)->fld, &(f2)->fld, sizeof(f1->fld))
5490
5491 /**
5492  * Compare two FDIR flows. If items and actions are identical, the two flows are
5493  * regarded as same.
5494  *
5495  * @param dev
5496  *   Pointer to Ethernet device.
5497  * @param f1
5498  *   FDIR flow to compare.
5499  * @param f2
5500  *   FDIR flow to compare.
5501  *
5502  * @return
5503  *   Zero on match, 1 otherwise.
5504  */
5505 static int
5506 flow_fdir_cmp(const struct mlx5_fdir *f1, const struct mlx5_fdir *f2)
5507 {
5508         if (FLOW_FDIR_CMP(f1, f2, attr) ||
5509             FLOW_FDIR_CMP(f1, f2, l2) ||
5510             FLOW_FDIR_CMP(f1, f2, l2_mask) ||
5511             FLOW_FDIR_CMP(f1, f2, l3) ||
5512             FLOW_FDIR_CMP(f1, f2, l3_mask) ||
5513             FLOW_FDIR_CMP(f1, f2, l4) ||
5514             FLOW_FDIR_CMP(f1, f2, l4_mask) ||
5515             FLOW_FDIR_CMP(f1, f2, actions[0].type))
5516                 return 1;
5517         if (f1->actions[0].type == RTE_FLOW_ACTION_TYPE_QUEUE &&
5518             FLOW_FDIR_CMP(f1, f2, queue))
5519                 return 1;
5520         return 0;
5521 }
5522
5523 /**
5524  * Search device flow list to find out a matched FDIR flow.
5525  *
5526  * @param dev
5527  *   Pointer to Ethernet device.
5528  * @param fdir_flow
5529  *   FDIR flow to lookup.
5530  *
5531  * @return
5532  *   Index of flow if found, 0 otherwise.
5533  */
5534 static uint32_t
5535 flow_fdir_filter_lookup(struct rte_eth_dev *dev, struct mlx5_fdir *fdir_flow)
5536 {
5537         struct mlx5_priv *priv = dev->data->dev_private;
5538         uint32_t flow_idx = 0;
5539         struct mlx5_fdir_flow *priv_fdir_flow = NULL;
5540
5541         MLX5_ASSERT(fdir_flow);
5542         LIST_FOREACH(priv_fdir_flow, &priv->fdir_flows, next) {
5543                 if (!flow_fdir_cmp(priv_fdir_flow->fdir, fdir_flow)) {
5544                         DRV_LOG(DEBUG, "port %u found FDIR flow %u",
5545                                 dev->data->port_id, flow_idx);
5546                         flow_idx = priv_fdir_flow->rix_flow;
5547                         break;
5548                 }
5549         }
5550         return flow_idx;
5551 }
5552
5553 /**
5554  * Add new flow director filter and store it in list.
5555  *
5556  * @param dev
5557  *   Pointer to Ethernet device.
5558  * @param fdir_filter
5559  *   Flow director filter to add.
5560  *
5561  * @return
5562  *   0 on success, a negative errno value otherwise and rte_errno is set.
5563  */
5564 static int
5565 flow_fdir_filter_add(struct rte_eth_dev *dev,
5566                      const struct rte_eth_fdir_filter *fdir_filter)
5567 {
5568         struct mlx5_priv *priv = dev->data->dev_private;
5569         struct mlx5_fdir *fdir_flow;
5570         struct rte_flow *flow;
5571         struct mlx5_fdir_flow *priv_fdir_flow = NULL;
5572         uint32_t flow_idx;
5573         int ret;
5574
5575         fdir_flow = rte_zmalloc(__func__, sizeof(*fdir_flow), 0);
5576         if (!fdir_flow) {
5577                 rte_errno = ENOMEM;
5578                 return -rte_errno;
5579         }
5580         ret = flow_fdir_filter_convert(dev, fdir_filter, fdir_flow);
5581         if (ret)
5582                 goto error;
5583         flow_idx = flow_fdir_filter_lookup(dev, fdir_flow);
5584         if (flow_idx) {
5585                 rte_errno = EEXIST;
5586                 goto error;
5587         }
5588         priv_fdir_flow = rte_zmalloc(__func__, sizeof(struct mlx5_fdir_flow),
5589                                      0);
5590         if (!priv_fdir_flow) {
5591                 rte_errno = ENOMEM;
5592                 goto error;
5593         }
5594         flow_idx = flow_list_create(dev, &priv->flows, &fdir_flow->attr,
5595                                     fdir_flow->items, fdir_flow->actions, true,
5596                                     NULL);
5597         flow = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], flow_idx);
5598         if (!flow)
5599                 goto error;
5600         flow->fdir = 1;
5601         priv_fdir_flow->fdir = fdir_flow;
5602         priv_fdir_flow->rix_flow = flow_idx;
5603         LIST_INSERT_HEAD(&priv->fdir_flows, priv_fdir_flow, next);
5604         DRV_LOG(DEBUG, "port %u created FDIR flow %p",
5605                 dev->data->port_id, (void *)flow);
5606         return 0;
5607 error:
5608         rte_free(priv_fdir_flow);
5609         rte_free(fdir_flow);
5610         return -rte_errno;
5611 }
5612
5613 /**
5614  * Delete specific filter.
5615  *
5616  * @param dev
5617  *   Pointer to Ethernet device.
5618  * @param fdir_filter
5619  *   Filter to be deleted.
5620  *
5621  * @return
5622  *   0 on success, a negative errno value otherwise and rte_errno is set.
5623  */
5624 static int
5625 flow_fdir_filter_delete(struct rte_eth_dev *dev,
5626                         const struct rte_eth_fdir_filter *fdir_filter)
5627 {
5628         struct mlx5_priv *priv = dev->data->dev_private;
5629         uint32_t flow_idx;
5630         struct mlx5_fdir fdir_flow = {
5631                 .attr.group = 0,
5632         };
5633         struct mlx5_fdir_flow *priv_fdir_flow = NULL;
5634         int ret;
5635
5636         ret = flow_fdir_filter_convert(dev, fdir_filter, &fdir_flow);
5637         if (ret)
5638                 return -rte_errno;
5639         LIST_FOREACH(priv_fdir_flow, &priv->fdir_flows, next) {
5640                 /* Find the fdir in priv list */
5641                 if (!flow_fdir_cmp(priv_fdir_flow->fdir, &fdir_flow))
5642                         break;
5643         }
5644         if (!priv_fdir_flow)
5645                 return 0;
5646         LIST_REMOVE(priv_fdir_flow, next);
5647         flow_idx = priv_fdir_flow->rix_flow;
5648         flow_list_destroy(dev, &priv->flows, flow_idx);
5649         rte_free(priv_fdir_flow->fdir);
5650         rte_free(priv_fdir_flow);
5651         DRV_LOG(DEBUG, "port %u deleted FDIR flow %u",
5652                 dev->data->port_id, flow_idx);
5653         return 0;
5654 }
5655
5656 /**
5657  * Update queue for specific filter.
5658  *
5659  * @param dev
5660  *   Pointer to Ethernet device.
5661  * @param fdir_filter
5662  *   Filter to be updated.
5663  *
5664  * @return
5665  *   0 on success, a negative errno value otherwise and rte_errno is set.
5666  */
5667 static int
5668 flow_fdir_filter_update(struct rte_eth_dev *dev,
5669                         const struct rte_eth_fdir_filter *fdir_filter)
5670 {
5671         int ret;
5672
5673         ret = flow_fdir_filter_delete(dev, fdir_filter);
5674         if (ret)
5675                 return ret;
5676         return flow_fdir_filter_add(dev, fdir_filter);
5677 }
5678
5679 /**
5680  * Flush all filters.
5681  *
5682  * @param dev
5683  *   Pointer to Ethernet device.
5684  */
5685 static void
5686 flow_fdir_filter_flush(struct rte_eth_dev *dev)
5687 {
5688         struct mlx5_priv *priv = dev->data->dev_private;
5689         struct mlx5_fdir_flow *priv_fdir_flow = NULL;
5690
5691         while (!LIST_EMPTY(&priv->fdir_flows)) {
5692                 priv_fdir_flow = LIST_FIRST(&priv->fdir_flows);
5693                 LIST_REMOVE(priv_fdir_flow, next);
5694                 flow_list_destroy(dev, &priv->flows, priv_fdir_flow->rix_flow);
5695                 rte_free(priv_fdir_flow->fdir);
5696                 rte_free(priv_fdir_flow);
5697         }
5698 }
5699
5700 /**
5701  * Get flow director information.
5702  *
5703  * @param dev
5704  *   Pointer to Ethernet device.
5705  * @param[out] fdir_info
5706  *   Resulting flow director information.
5707  */
5708 static void
5709 flow_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info)
5710 {
5711         struct rte_eth_fdir_masks *mask =
5712                 &dev->data->dev_conf.fdir_conf.mask;
5713
5714         fdir_info->mode = dev->data->dev_conf.fdir_conf.mode;
5715         fdir_info->guarant_spc = 0;
5716         rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
5717         fdir_info->max_flexpayload = 0;
5718         fdir_info->flow_types_mask[0] = 0;
5719         fdir_info->flex_payload_unit = 0;
5720         fdir_info->max_flex_payload_segment_num = 0;
5721         fdir_info->flex_payload_limit = 0;
5722         memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
5723 }
5724
5725 /**
5726  * Deal with flow director operations.
5727  *
5728  * @param dev
5729  *   Pointer to Ethernet device.
5730  * @param filter_op
5731  *   Operation to perform.
5732  * @param arg
5733  *   Pointer to operation-specific structure.
5734  *
5735  * @return
5736  *   0 on success, a negative errno value otherwise and rte_errno is set.
5737  */
5738 static int
5739 flow_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op,
5740                     void *arg)
5741 {
5742         enum rte_fdir_mode fdir_mode =
5743                 dev->data->dev_conf.fdir_conf.mode;
5744
5745         if (filter_op == RTE_ETH_FILTER_NOP)
5746                 return 0;
5747         if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
5748             fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
5749                 DRV_LOG(ERR, "port %u flow director mode %d not supported",
5750                         dev->data->port_id, fdir_mode);
5751                 rte_errno = EINVAL;
5752                 return -rte_errno;
5753         }
5754         switch (filter_op) {
5755         case RTE_ETH_FILTER_ADD:
5756                 return flow_fdir_filter_add(dev, arg);
5757         case RTE_ETH_FILTER_UPDATE:
5758                 return flow_fdir_filter_update(dev, arg);
5759         case RTE_ETH_FILTER_DELETE:
5760                 return flow_fdir_filter_delete(dev, arg);
5761         case RTE_ETH_FILTER_FLUSH:
5762                 flow_fdir_filter_flush(dev);
5763                 break;
5764         case RTE_ETH_FILTER_INFO:
5765                 flow_fdir_info_get(dev, arg);
5766                 break;
5767         default:
5768                 DRV_LOG(DEBUG, "port %u unknown operation %u",
5769                         dev->data->port_id, filter_op);
5770                 rte_errno = EINVAL;
5771                 return -rte_errno;
5772         }
5773         return 0;
5774 }
5775
5776 /**
5777  * Manage filter operations.
5778  *
5779  * @param dev
5780  *   Pointer to Ethernet device structure.
5781  * @param filter_type
5782  *   Filter type.
5783  * @param filter_op
5784  *   Operation to perform.
5785  * @param arg
5786  *   Pointer to operation-specific structure.
5787  *
5788  * @return
5789  *   0 on success, a negative errno value otherwise and rte_errno is set.
5790  */
5791 int
5792 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
5793                      enum rte_filter_type filter_type,
5794                      enum rte_filter_op filter_op,
5795                      void *arg)
5796 {
5797         switch (filter_type) {
5798         case RTE_ETH_FILTER_GENERIC:
5799                 if (filter_op != RTE_ETH_FILTER_GET) {
5800                         rte_errno = EINVAL;
5801                         return -rte_errno;
5802                 }
5803                 *(const void **)arg = &mlx5_flow_ops;
5804                 return 0;
5805         case RTE_ETH_FILTER_FDIR:
5806                 return flow_fdir_ctrl_func(dev, filter_op, arg);
5807         default:
5808                 DRV_LOG(ERR, "port %u filter type (%d) not supported",
5809                         dev->data->port_id, filter_type);
5810                 rte_errno = ENOTSUP;
5811                 return -rte_errno;
5812         }
5813         return 0;
5814 }
5815
5816 /**
5817  * Create the needed meter and suffix tables.
5818  *
5819  * @param[in] dev
5820  *   Pointer to Ethernet device.
5821  * @param[in] fm
5822  *   Pointer to the flow meter.
5823  *
5824  * @return
5825  *   Pointer to table set on success, NULL otherwise.
5826  */
5827 struct mlx5_meter_domains_infos *
5828 mlx5_flow_create_mtr_tbls(struct rte_eth_dev *dev,
5829                           const struct mlx5_flow_meter *fm)
5830 {
5831         const struct mlx5_flow_driver_ops *fops;
5832
5833         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
5834         return fops->create_mtr_tbls(dev, fm);
5835 }
5836
5837 /**
5838  * Destroy the meter table set.
5839  *
5840  * @param[in] dev
5841  *   Pointer to Ethernet device.
5842  * @param[in] tbl
5843  *   Pointer to the meter table set.
5844  *
5845  * @return
5846  *   0 on success.
5847  */
5848 int
5849 mlx5_flow_destroy_mtr_tbls(struct rte_eth_dev *dev,
5850                            struct mlx5_meter_domains_infos *tbls)
5851 {
5852         const struct mlx5_flow_driver_ops *fops;
5853
5854         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
5855         return fops->destroy_mtr_tbls(dev, tbls);
5856 }
5857
5858 /**
5859  * Create policer rules.
5860  *
5861  * @param[in] dev
5862  *   Pointer to Ethernet device.
5863  * @param[in] fm
5864  *   Pointer to flow meter structure.
5865  * @param[in] attr
5866  *   Pointer to flow attributes.
5867  *
5868  * @return
5869  *   0 on success, -1 otherwise.
5870  */
5871 int
5872 mlx5_flow_create_policer_rules(struct rte_eth_dev *dev,
5873                                struct mlx5_flow_meter *fm,
5874                                const struct rte_flow_attr *attr)
5875 {
5876         const struct mlx5_flow_driver_ops *fops;
5877
5878         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
5879         return fops->create_policer_rules(dev, fm, attr);
5880 }
5881
5882 /**
5883  * Destroy policer rules.
5884  *
5885  * @param[in] fm
5886  *   Pointer to flow meter structure.
5887  * @param[in] attr
5888  *   Pointer to flow attributes.
5889  *
5890  * @return
5891  *   0 on success, -1 otherwise.
5892  */
5893 int
5894 mlx5_flow_destroy_policer_rules(struct rte_eth_dev *dev,
5895                                 struct mlx5_flow_meter *fm,
5896                                 const struct rte_flow_attr *attr)
5897 {
5898         const struct mlx5_flow_driver_ops *fops;
5899
5900         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
5901         return fops->destroy_policer_rules(dev, fm, attr);
5902 }
5903
5904 /**
5905  * Allocate a counter.
5906  *
5907  * @param[in] dev
5908  *   Pointer to Ethernet device structure.
5909  *
5910  * @return
5911  *   Index to allocated counter  on success, 0 otherwise.
5912  */
5913 uint32_t
5914 mlx5_counter_alloc(struct rte_eth_dev *dev)
5915 {
5916         const struct mlx5_flow_driver_ops *fops;
5917         struct rte_flow_attr attr = { .transfer = 0 };
5918
5919         if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
5920                 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
5921                 return fops->counter_alloc(dev);
5922         }
5923         DRV_LOG(ERR,
5924                 "port %u counter allocate is not supported.",
5925                  dev->data->port_id);
5926         return 0;
5927 }
5928
5929 /**
5930  * Free a counter.
5931  *
5932  * @param[in] dev
5933  *   Pointer to Ethernet device structure.
5934  * @param[in] cnt
5935  *   Index to counter to be free.
5936  */
5937 void
5938 mlx5_counter_free(struct rte_eth_dev *dev, uint32_t cnt)
5939 {
5940         const struct mlx5_flow_driver_ops *fops;
5941         struct rte_flow_attr attr = { .transfer = 0 };
5942
5943         if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
5944                 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
5945                 fops->counter_free(dev, cnt);
5946                 return;
5947         }
5948         DRV_LOG(ERR,
5949                 "port %u counter free is not supported.",
5950                  dev->data->port_id);
5951 }
5952
5953 /**
5954  * Query counter statistics.
5955  *
5956  * @param[in] dev
5957  *   Pointer to Ethernet device structure.
5958  * @param[in] cnt
5959  *   Index to counter to query.
5960  * @param[in] clear
5961  *   Set to clear counter statistics.
5962  * @param[out] pkts
5963  *   The counter hits packets number to save.
5964  * @param[out] bytes
5965  *   The counter hits bytes number to save.
5966  *
5967  * @return
5968  *   0 on success, a negative errno value otherwise.
5969  */
5970 int
5971 mlx5_counter_query(struct rte_eth_dev *dev, uint32_t cnt,
5972                    bool clear, uint64_t *pkts, uint64_t *bytes)
5973 {
5974         const struct mlx5_flow_driver_ops *fops;
5975         struct rte_flow_attr attr = { .transfer = 0 };
5976
5977         if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
5978                 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
5979                 return fops->counter_query(dev, cnt, clear, pkts, bytes);
5980         }
5981         DRV_LOG(ERR,
5982                 "port %u counter query is not supported.",
5983                  dev->data->port_id);
5984         return -ENOTSUP;
5985 }
5986
5987 #define MLX5_POOL_QUERY_FREQ_US 1000000
5988
5989 /**
5990  * Get number of all validate pools.
5991  *
5992  * @param[in] sh
5993  *   Pointer to mlx5_dev_ctx_shared object.
5994  *
5995  * @return
5996  *   The number of all validate pools.
5997  */
5998 static uint32_t
5999 mlx5_get_all_valid_pool_count(struct mlx5_dev_ctx_shared *sh)
6000 {
6001         int i;
6002         uint32_t pools_n = 0;
6003
6004         for (i = 0; i < MLX5_CCONT_TYPE_MAX; ++i)
6005                 pools_n += rte_atomic16_read(&sh->cmng.ccont[i].n_valid);
6006         return pools_n;
6007 }
6008
6009 /**
6010  * Set the periodic procedure for triggering asynchronous batch queries for all
6011  * the counter pools.
6012  *
6013  * @param[in] sh
6014  *   Pointer to mlx5_dev_ctx_shared object.
6015  */
6016 void
6017 mlx5_set_query_alarm(struct mlx5_dev_ctx_shared *sh)
6018 {
6019         uint32_t pools_n, us;
6020
6021         pools_n = mlx5_get_all_valid_pool_count(sh);
6022         us = MLX5_POOL_QUERY_FREQ_US / pools_n;
6023         DRV_LOG(DEBUG, "Set alarm for %u pools each %u us", pools_n, us);
6024         if (rte_eal_alarm_set(us, mlx5_flow_query_alarm, sh)) {
6025                 sh->cmng.query_thread_on = 0;
6026                 DRV_LOG(ERR, "Cannot reinitialize query alarm");
6027         } else {
6028                 sh->cmng.query_thread_on = 1;
6029         }
6030 }
6031
6032 /**
6033  * The periodic procedure for triggering asynchronous batch queries for all the
6034  * counter pools. This function is probably called by the host thread.
6035  *
6036  * @param[in] arg
6037  *   The parameter for the alarm process.
6038  */
6039 void
6040 mlx5_flow_query_alarm(void *arg)
6041 {
6042         struct mlx5_dev_ctx_shared *sh = arg;
6043         struct mlx5_devx_obj *dcs;
6044         uint16_t offset;
6045         int ret;
6046         uint8_t batch = sh->cmng.batch;
6047         uint8_t age = sh->cmng.age;
6048         uint16_t pool_index = sh->cmng.pool_index;
6049         struct mlx5_pools_container *cont;
6050         struct mlx5_flow_counter_pool *pool;
6051         int cont_loop = MLX5_CCONT_TYPE_MAX;
6052
6053         if (sh->cmng.pending_queries >= MLX5_MAX_PENDING_QUERIES)
6054                 goto set_alarm;
6055 next_container:
6056         cont = MLX5_CNT_CONTAINER(sh, batch, age);
6057         rte_spinlock_lock(&cont->resize_sl);
6058         if (!cont->pools) {
6059                 rte_spinlock_unlock(&cont->resize_sl);
6060                 /* Check if all the containers are empty. */
6061                 if (unlikely(--cont_loop == 0))
6062                         goto set_alarm;
6063                 batch ^= 0x1;
6064                 pool_index = 0;
6065                 if (batch == 0 && pool_index == 0) {
6066                         age ^= 0x1;
6067                         sh->cmng.batch = batch;
6068                         sh->cmng.age = age;
6069                 }
6070                 goto next_container;
6071         }
6072         pool = cont->pools[pool_index];
6073         rte_spinlock_unlock(&cont->resize_sl);
6074         if (pool->raw_hw)
6075                 /* There is a pool query in progress. */
6076                 goto set_alarm;
6077         pool->raw_hw =
6078                 LIST_FIRST(&sh->cmng.free_stat_raws);
6079         if (!pool->raw_hw)
6080                 /* No free counter statistics raw memory. */
6081                 goto set_alarm;
6082         dcs = (struct mlx5_devx_obj *)(uintptr_t)rte_atomic64_read
6083                                                               (&pool->a64_dcs);
6084         offset = batch ? 0 : dcs->id % MLX5_COUNTERS_PER_POOL;
6085         /*
6086          * Identify the counters released between query trigger and query
6087          * handle more effiecntly. The counter released in this gap period
6088          * should wait for a new round of query as the new arrived packets
6089          * will not be taken into account.
6090          */
6091         pool->query_gen++;
6092         ret = mlx5_devx_cmd_flow_counter_query(dcs, 0, MLX5_COUNTERS_PER_POOL -
6093                                                offset, NULL, NULL,
6094                                                pool->raw_hw->mem_mng->dm->id,
6095                                                (void *)(uintptr_t)
6096                                                (pool->raw_hw->data + offset),
6097                                                sh->devx_comp,
6098                                                (uint64_t)(uintptr_t)pool);
6099         if (ret) {
6100                 DRV_LOG(ERR, "Failed to trigger asynchronous query for dcs ID"
6101                         " %d", pool->min_dcs->id);
6102                 pool->raw_hw = NULL;
6103                 goto set_alarm;
6104         }
6105         pool->raw_hw->min_dcs_id = dcs->id;
6106         LIST_REMOVE(pool->raw_hw, next);
6107         sh->cmng.pending_queries++;
6108         pool_index++;
6109         if (pool_index >= rte_atomic16_read(&cont->n_valid)) {
6110                 batch ^= 0x1;
6111                 pool_index = 0;
6112                 if (batch == 0 && pool_index == 0)
6113                         age ^= 0x1;
6114         }
6115 set_alarm:
6116         sh->cmng.batch = batch;
6117         sh->cmng.pool_index = pool_index;
6118         sh->cmng.age = age;
6119         mlx5_set_query_alarm(sh);
6120 }
6121
6122 /**
6123  * Check and callback event for new aged flow in the counter pool
6124  *
6125  * @param[in] sh
6126  *   Pointer to mlx5_dev_ctx_shared object.
6127  * @param[in] pool
6128  *   Pointer to Current counter pool.
6129  */
6130 static void
6131 mlx5_flow_aging_check(struct mlx5_dev_ctx_shared *sh,
6132                    struct mlx5_flow_counter_pool *pool)
6133 {
6134         struct mlx5_priv *priv;
6135         struct mlx5_flow_counter *cnt;
6136         struct mlx5_age_info *age_info;
6137         struct mlx5_age_param *age_param;
6138         struct mlx5_counter_stats_raw *cur = pool->raw_hw;
6139         struct mlx5_counter_stats_raw *prev = pool->raw;
6140         uint16_t curr = rte_rdtsc() / (rte_get_tsc_hz() / 10);
6141         uint32_t i;
6142
6143         for (i = 0; i < MLX5_COUNTERS_PER_POOL; ++i) {
6144                 cnt = MLX5_POOL_GET_CNT(pool, i);
6145                 age_param = MLX5_CNT_TO_AGE(cnt);
6146                 if (rte_atomic16_read(&age_param->state) != AGE_CANDIDATE)
6147                         continue;
6148                 if (cur->data[i].hits != prev->data[i].hits) {
6149                         age_param->expire = curr + age_param->timeout;
6150                         continue;
6151                 }
6152                 if ((uint16_t)(curr - age_param->expire) >= (UINT16_MAX / 2))
6153                         continue;
6154                 /**
6155                  * Hold the lock first, or if between the
6156                  * state AGE_TMOUT and tailq operation the
6157                  * release happened, the release procedure
6158                  * may delete a non-existent tailq node.
6159                  */
6160                 priv = rte_eth_devices[age_param->port_id].data->dev_private;
6161                 age_info = GET_PORT_AGE_INFO(priv);
6162                 rte_spinlock_lock(&age_info->aged_sl);
6163                 /* If the cpmset fails, release happens. */
6164                 if (rte_atomic16_cmpset((volatile uint16_t *)
6165                                         &age_param->state,
6166                                         AGE_CANDIDATE,
6167                                         AGE_TMOUT) ==
6168                                         AGE_CANDIDATE) {
6169                         TAILQ_INSERT_TAIL(&age_info->aged_counters, cnt, next);
6170                         MLX5_AGE_SET(age_info, MLX5_AGE_EVENT_NEW);
6171                 }
6172                 rte_spinlock_unlock(&age_info->aged_sl);
6173         }
6174         for (i = 0; i < sh->max_port; i++) {
6175                 age_info = &sh->port[i].age_info;
6176                 if (!MLX5_AGE_GET(age_info, MLX5_AGE_EVENT_NEW))
6177                         continue;
6178                 if (MLX5_AGE_GET(age_info, MLX5_AGE_TRIGGER))
6179                         _rte_eth_dev_callback_process
6180                                 (&rte_eth_devices[sh->port[i].devx_ih_port_id],
6181                                 RTE_ETH_EVENT_FLOW_AGED, NULL);
6182                 age_info->flags = 0;
6183         }
6184 }
6185
6186 /**
6187  * Handler for the HW respond about ready values from an asynchronous batch
6188  * query. This function is probably called by the host thread.
6189  *
6190  * @param[in] sh
6191  *   The pointer to the shared device context.
6192  * @param[in] async_id
6193  *   The Devx async ID.
6194  * @param[in] status
6195  *   The status of the completion.
6196  */
6197 void
6198 mlx5_flow_async_pool_query_handle(struct mlx5_dev_ctx_shared *sh,
6199                                   uint64_t async_id, int status)
6200 {
6201         struct mlx5_flow_counter_pool *pool =
6202                 (struct mlx5_flow_counter_pool *)(uintptr_t)async_id;
6203         struct mlx5_counter_stats_raw *raw_to_free;
6204         uint8_t age = !!IS_AGE_POOL(pool);
6205         uint8_t query_gen = pool->query_gen ^ 1;
6206         struct mlx5_pools_container *cont =
6207                 MLX5_CNT_CONTAINER(sh, !IS_EXT_POOL(pool), age);
6208
6209         if (unlikely(status)) {
6210                 raw_to_free = pool->raw_hw;
6211         } else {
6212                 raw_to_free = pool->raw;
6213                 if (IS_AGE_POOL(pool))
6214                         mlx5_flow_aging_check(sh, pool);
6215                 rte_spinlock_lock(&pool->sl);
6216                 pool->raw = pool->raw_hw;
6217                 rte_spinlock_unlock(&pool->sl);
6218                 /* Be sure the new raw counters data is updated in memory. */
6219                 rte_cio_wmb();
6220                 if (!TAILQ_EMPTY(&pool->counters[query_gen])) {
6221                         rte_spinlock_lock(&cont->csl);
6222                         TAILQ_CONCAT(&cont->counters,
6223                                      &pool->counters[query_gen], next);
6224                         rte_spinlock_unlock(&cont->csl);
6225                 }
6226         }
6227         LIST_INSERT_HEAD(&sh->cmng.free_stat_raws, raw_to_free, next);
6228         pool->raw_hw = NULL;
6229         sh->cmng.pending_queries--;
6230 }
6231
6232 /**
6233  * Translate the rte_flow group index to HW table value.
6234  *
6235  * @param[in] attributes
6236  *   Pointer to flow attributes
6237  * @param[in] external
6238  *   Value is part of flow rule created by request external to PMD.
6239  * @param[in] group
6240  *   rte_flow group index value.
6241  * @param[out] fdb_def_rule
6242  *   Whether fdb jump to table 1 is configured.
6243  * @param[out] table
6244  *   HW table value.
6245  * @param[out] error
6246  *   Pointer to error structure.
6247  *
6248  * @return
6249  *   0 on success, a negative errno value otherwise and rte_errno is set.
6250  */
6251 int
6252 mlx5_flow_group_to_table(const struct rte_flow_attr *attributes, bool external,
6253                          uint32_t group, bool fdb_def_rule, uint32_t *table,
6254                          struct rte_flow_error *error)
6255 {
6256         if (attributes->transfer && external && fdb_def_rule) {
6257                 if (group == UINT32_MAX)
6258                         return rte_flow_error_set
6259                                                 (error, EINVAL,
6260                                                  RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
6261                                                  NULL,
6262                                                  "group index not supported");
6263                 *table = group + 1;
6264         } else {
6265                 *table = group;
6266         }
6267         return 0;
6268 }
6269
6270 /**
6271  * Discover availability of metadata reg_c's.
6272  *
6273  * Iteratively use test flows to check availability.
6274  *
6275  * @param[in] dev
6276  *   Pointer to the Ethernet device structure.
6277  *
6278  * @return
6279  *   0 on success, a negative errno value otherwise and rte_errno is set.
6280  */
6281 int
6282 mlx5_flow_discover_mreg_c(struct rte_eth_dev *dev)
6283 {
6284         struct mlx5_priv *priv = dev->data->dev_private;
6285         struct mlx5_dev_config *config = &priv->config;
6286         enum modify_reg idx;
6287         int n = 0;
6288
6289         /* reg_c[0] and reg_c[1] are reserved. */
6290         config->flow_mreg_c[n++] = REG_C_0;
6291         config->flow_mreg_c[n++] = REG_C_1;
6292         /* Discover availability of other reg_c's. */
6293         for (idx = REG_C_2; idx <= REG_C_7; ++idx) {
6294                 struct rte_flow_attr attr = {
6295                         .group = MLX5_FLOW_MREG_CP_TABLE_GROUP,
6296                         .priority = MLX5_FLOW_PRIO_RSVD,
6297                         .ingress = 1,
6298                 };
6299                 struct rte_flow_item items[] = {
6300                         [0] = {
6301                                 .type = RTE_FLOW_ITEM_TYPE_END,
6302                         },
6303                 };
6304                 struct rte_flow_action actions[] = {
6305                         [0] = {
6306                                 .type = (enum rte_flow_action_type)
6307                                         MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
6308                                 .conf = &(struct mlx5_flow_action_copy_mreg){
6309                                         .src = REG_C_1,
6310                                         .dst = idx,
6311                                 },
6312                         },
6313                         [1] = {
6314                                 .type = RTE_FLOW_ACTION_TYPE_JUMP,
6315                                 .conf = &(struct rte_flow_action_jump){
6316                                         .group = MLX5_FLOW_MREG_ACT_TABLE_GROUP,
6317                                 },
6318                         },
6319                         [2] = {
6320                                 .type = RTE_FLOW_ACTION_TYPE_END,
6321                         },
6322                 };
6323                 uint32_t flow_idx;
6324                 struct rte_flow *flow;
6325                 struct rte_flow_error error;
6326
6327                 if (!config->dv_flow_en)
6328                         break;
6329                 /* Create internal flow, validation skips copy action. */
6330                 flow_idx = flow_list_create(dev, NULL, &attr, items,
6331                                             actions, false, &error);
6332                 flow = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW],
6333                                       flow_idx);
6334                 if (!flow)
6335                         continue;
6336                 if (dev->data->dev_started || !flow_drv_apply(dev, flow, NULL))
6337                         config->flow_mreg_c[n++] = idx;
6338                 flow_list_destroy(dev, NULL, flow_idx);
6339         }
6340         for (; n < MLX5_MREG_C_NUM; ++n)
6341                 config->flow_mreg_c[n] = REG_NONE;
6342         return 0;
6343 }
6344
6345 /**
6346  * Dump flow raw hw data to file
6347  *
6348  * @param[in] dev
6349  *    The pointer to Ethernet device.
6350  * @param[in] file
6351  *   A pointer to a file for output.
6352  * @param[out] error
6353  *   Perform verbose error reporting if not NULL. PMDs initialize this
6354  *   structure in case of error only.
6355  * @return
6356  *   0 on success, a nagative value otherwise.
6357  */
6358 int
6359 mlx5_flow_dev_dump(struct rte_eth_dev *dev,
6360                    FILE *file,
6361                    struct rte_flow_error *error __rte_unused)
6362 {
6363         struct mlx5_priv *priv = dev->data->dev_private;
6364         struct mlx5_dev_ctx_shared *sh = priv->sh;
6365
6366         return mlx5_devx_cmd_flow_dump(sh->fdb_domain, sh->rx_domain,
6367                                        sh->tx_domain, file);
6368 }
6369
6370 /**
6371  * Get aged-out flows.
6372  *
6373  * @param[in] dev
6374  *   Pointer to the Ethernet device structure.
6375  * @param[in] context
6376  *   The address of an array of pointers to the aged-out flows contexts.
6377  * @param[in] nb_countexts
6378  *   The length of context array pointers.
6379  * @param[out] error
6380  *   Perform verbose error reporting if not NULL. Initialized in case of
6381  *   error only.
6382  *
6383  * @return
6384  *   how many contexts get in success, otherwise negative errno value.
6385  *   if nb_contexts is 0, return the amount of all aged contexts.
6386  *   if nb_contexts is not 0 , return the amount of aged flows reported
6387  *   in the context array.
6388  */
6389 int
6390 mlx5_flow_get_aged_flows(struct rte_eth_dev *dev, void **contexts,
6391                         uint32_t nb_contexts, struct rte_flow_error *error)
6392 {
6393         const struct mlx5_flow_driver_ops *fops;
6394         struct rte_flow_attr attr = { .transfer = 0 };
6395
6396         if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
6397                 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
6398                 return fops->get_aged_flows(dev, contexts, nb_contexts,
6399                                                     error);
6400         }
6401         DRV_LOG(ERR,
6402                 "port %u get aged flows is not supported.",
6403                  dev->data->port_id);
6404         return -ENOTSUP;
6405 }