net/mlx5: fix default mark copy flow
[dpdk.git] / drivers / net / mlx5 / mlx5_flow.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2016 6WIND S.A.
3  * Copyright 2016 Mellanox Technologies, Ltd
4  */
5
6 #include <netinet/in.h>
7 #include <sys/queue.h>
8 #include <stdalign.h>
9 #include <stdint.h>
10 #include <string.h>
11
12 /* Verbs header. */
13 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
14 #ifdef PEDANTIC
15 #pragma GCC diagnostic ignored "-Wpedantic"
16 #endif
17 #include <infiniband/verbs.h>
18 #ifdef PEDANTIC
19 #pragma GCC diagnostic error "-Wpedantic"
20 #endif
21
22 #include <rte_common.h>
23 #include <rte_ether.h>
24 #include <rte_ethdev_driver.h>
25 #include <rte_flow.h>
26 #include <rte_flow_driver.h>
27 #include <rte_malloc.h>
28 #include <rte_ip.h>
29
30 #include "mlx5.h"
31 #include "mlx5_defs.h"
32 #include "mlx5_flow.h"
33 #include "mlx5_glue.h"
34 #include "mlx5_prm.h"
35 #include "mlx5_rxtx.h"
36
37 /* Dev ops structure defined in mlx5.c */
38 extern const struct eth_dev_ops mlx5_dev_ops;
39 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
40
41 /** Device flow drivers. */
42 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
43 extern const struct mlx5_flow_driver_ops mlx5_flow_dv_drv_ops;
44 #endif
45 extern const struct mlx5_flow_driver_ops mlx5_flow_verbs_drv_ops;
46
47 const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops;
48
49 const struct mlx5_flow_driver_ops *flow_drv_ops[] = {
50         [MLX5_FLOW_TYPE_MIN] = &mlx5_flow_null_drv_ops,
51 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
52         [MLX5_FLOW_TYPE_DV] = &mlx5_flow_dv_drv_ops,
53 #endif
54         [MLX5_FLOW_TYPE_VERBS] = &mlx5_flow_verbs_drv_ops,
55         [MLX5_FLOW_TYPE_MAX] = &mlx5_flow_null_drv_ops
56 };
57
58 enum mlx5_expansion {
59         MLX5_EXPANSION_ROOT,
60         MLX5_EXPANSION_ROOT_OUTER,
61         MLX5_EXPANSION_ROOT_ETH_VLAN,
62         MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN,
63         MLX5_EXPANSION_OUTER_ETH,
64         MLX5_EXPANSION_OUTER_ETH_VLAN,
65         MLX5_EXPANSION_OUTER_VLAN,
66         MLX5_EXPANSION_OUTER_IPV4,
67         MLX5_EXPANSION_OUTER_IPV4_UDP,
68         MLX5_EXPANSION_OUTER_IPV4_TCP,
69         MLX5_EXPANSION_OUTER_IPV6,
70         MLX5_EXPANSION_OUTER_IPV6_UDP,
71         MLX5_EXPANSION_OUTER_IPV6_TCP,
72         MLX5_EXPANSION_VXLAN,
73         MLX5_EXPANSION_VXLAN_GPE,
74         MLX5_EXPANSION_GRE,
75         MLX5_EXPANSION_MPLS,
76         MLX5_EXPANSION_ETH,
77         MLX5_EXPANSION_ETH_VLAN,
78         MLX5_EXPANSION_VLAN,
79         MLX5_EXPANSION_IPV4,
80         MLX5_EXPANSION_IPV4_UDP,
81         MLX5_EXPANSION_IPV4_TCP,
82         MLX5_EXPANSION_IPV6,
83         MLX5_EXPANSION_IPV6_UDP,
84         MLX5_EXPANSION_IPV6_TCP,
85 };
86
87 /** Supported expansion of items. */
88 static const struct rte_flow_expand_node mlx5_support_expansion[] = {
89         [MLX5_EXPANSION_ROOT] = {
90                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
91                                                  MLX5_EXPANSION_IPV4,
92                                                  MLX5_EXPANSION_IPV6),
93                 .type = RTE_FLOW_ITEM_TYPE_END,
94         },
95         [MLX5_EXPANSION_ROOT_OUTER] = {
96                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_ETH,
97                                                  MLX5_EXPANSION_OUTER_IPV4,
98                                                  MLX5_EXPANSION_OUTER_IPV6),
99                 .type = RTE_FLOW_ITEM_TYPE_END,
100         },
101         [MLX5_EXPANSION_ROOT_ETH_VLAN] = {
102                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH_VLAN),
103                 .type = RTE_FLOW_ITEM_TYPE_END,
104         },
105         [MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN] = {
106                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_ETH_VLAN),
107                 .type = RTE_FLOW_ITEM_TYPE_END,
108         },
109         [MLX5_EXPANSION_OUTER_ETH] = {
110                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_IPV4,
111                                                  MLX5_EXPANSION_OUTER_IPV6,
112                                                  MLX5_EXPANSION_MPLS),
113                 .type = RTE_FLOW_ITEM_TYPE_ETH,
114                 .rss_types = 0,
115         },
116         [MLX5_EXPANSION_OUTER_ETH_VLAN] = {
117                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_VLAN),
118                 .type = RTE_FLOW_ITEM_TYPE_ETH,
119                 .rss_types = 0,
120         },
121         [MLX5_EXPANSION_OUTER_VLAN] = {
122                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_IPV4,
123                                                  MLX5_EXPANSION_OUTER_IPV6),
124                 .type = RTE_FLOW_ITEM_TYPE_VLAN,
125         },
126         [MLX5_EXPANSION_OUTER_IPV4] = {
127                 .next = RTE_FLOW_EXPAND_RSS_NEXT
128                         (MLX5_EXPANSION_OUTER_IPV4_UDP,
129                          MLX5_EXPANSION_OUTER_IPV4_TCP,
130                          MLX5_EXPANSION_GRE,
131                          MLX5_EXPANSION_IPV4,
132                          MLX5_EXPANSION_IPV6),
133                 .type = RTE_FLOW_ITEM_TYPE_IPV4,
134                 .rss_types = ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
135                         ETH_RSS_NONFRAG_IPV4_OTHER,
136         },
137         [MLX5_EXPANSION_OUTER_IPV4_UDP] = {
138                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN,
139                                                  MLX5_EXPANSION_VXLAN_GPE),
140                 .type = RTE_FLOW_ITEM_TYPE_UDP,
141                 .rss_types = ETH_RSS_NONFRAG_IPV4_UDP,
142         },
143         [MLX5_EXPANSION_OUTER_IPV4_TCP] = {
144                 .type = RTE_FLOW_ITEM_TYPE_TCP,
145                 .rss_types = ETH_RSS_NONFRAG_IPV4_TCP,
146         },
147         [MLX5_EXPANSION_OUTER_IPV6] = {
148                 .next = RTE_FLOW_EXPAND_RSS_NEXT
149                         (MLX5_EXPANSION_OUTER_IPV6_UDP,
150                          MLX5_EXPANSION_OUTER_IPV6_TCP,
151                          MLX5_EXPANSION_IPV4,
152                          MLX5_EXPANSION_IPV6),
153                 .type = RTE_FLOW_ITEM_TYPE_IPV6,
154                 .rss_types = ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 |
155                         ETH_RSS_NONFRAG_IPV6_OTHER,
156         },
157         [MLX5_EXPANSION_OUTER_IPV6_UDP] = {
158                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN,
159                                                  MLX5_EXPANSION_VXLAN_GPE),
160                 .type = RTE_FLOW_ITEM_TYPE_UDP,
161                 .rss_types = ETH_RSS_NONFRAG_IPV6_UDP,
162         },
163         [MLX5_EXPANSION_OUTER_IPV6_TCP] = {
164                 .type = RTE_FLOW_ITEM_TYPE_TCP,
165                 .rss_types = ETH_RSS_NONFRAG_IPV6_TCP,
166         },
167         [MLX5_EXPANSION_VXLAN] = {
168                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH),
169                 .type = RTE_FLOW_ITEM_TYPE_VXLAN,
170         },
171         [MLX5_EXPANSION_VXLAN_GPE] = {
172                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
173                                                  MLX5_EXPANSION_IPV4,
174                                                  MLX5_EXPANSION_IPV6),
175                 .type = RTE_FLOW_ITEM_TYPE_VXLAN_GPE,
176         },
177         [MLX5_EXPANSION_GRE] = {
178                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4),
179                 .type = RTE_FLOW_ITEM_TYPE_GRE,
180         },
181         [MLX5_EXPANSION_MPLS] = {
182                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
183                                                  MLX5_EXPANSION_IPV6),
184                 .type = RTE_FLOW_ITEM_TYPE_MPLS,
185         },
186         [MLX5_EXPANSION_ETH] = {
187                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
188                                                  MLX5_EXPANSION_IPV6),
189                 .type = RTE_FLOW_ITEM_TYPE_ETH,
190         },
191         [MLX5_EXPANSION_ETH_VLAN] = {
192                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VLAN),
193                 .type = RTE_FLOW_ITEM_TYPE_ETH,
194         },
195         [MLX5_EXPANSION_VLAN] = {
196                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
197                                                  MLX5_EXPANSION_IPV6),
198                 .type = RTE_FLOW_ITEM_TYPE_VLAN,
199         },
200         [MLX5_EXPANSION_IPV4] = {
201                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4_UDP,
202                                                  MLX5_EXPANSION_IPV4_TCP),
203                 .type = RTE_FLOW_ITEM_TYPE_IPV4,
204                 .rss_types = ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
205                         ETH_RSS_NONFRAG_IPV4_OTHER,
206         },
207         [MLX5_EXPANSION_IPV4_UDP] = {
208                 .type = RTE_FLOW_ITEM_TYPE_UDP,
209                 .rss_types = ETH_RSS_NONFRAG_IPV4_UDP,
210         },
211         [MLX5_EXPANSION_IPV4_TCP] = {
212                 .type = RTE_FLOW_ITEM_TYPE_TCP,
213                 .rss_types = ETH_RSS_NONFRAG_IPV4_TCP,
214         },
215         [MLX5_EXPANSION_IPV6] = {
216                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV6_UDP,
217                                                  MLX5_EXPANSION_IPV6_TCP),
218                 .type = RTE_FLOW_ITEM_TYPE_IPV6,
219                 .rss_types = ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 |
220                         ETH_RSS_NONFRAG_IPV6_OTHER,
221         },
222         [MLX5_EXPANSION_IPV6_UDP] = {
223                 .type = RTE_FLOW_ITEM_TYPE_UDP,
224                 .rss_types = ETH_RSS_NONFRAG_IPV6_UDP,
225         },
226         [MLX5_EXPANSION_IPV6_TCP] = {
227                 .type = RTE_FLOW_ITEM_TYPE_TCP,
228                 .rss_types = ETH_RSS_NONFRAG_IPV6_TCP,
229         },
230 };
231
232 static const struct rte_flow_ops mlx5_flow_ops = {
233         .validate = mlx5_flow_validate,
234         .create = mlx5_flow_create,
235         .destroy = mlx5_flow_destroy,
236         .flush = mlx5_flow_flush,
237         .isolate = mlx5_flow_isolate,
238         .query = mlx5_flow_query,
239 };
240
241 /* Convert FDIR request to Generic flow. */
242 struct mlx5_fdir {
243         struct rte_flow_attr attr;
244         struct rte_flow_item items[4];
245         struct rte_flow_item_eth l2;
246         struct rte_flow_item_eth l2_mask;
247         union {
248                 struct rte_flow_item_ipv4 ipv4;
249                 struct rte_flow_item_ipv6 ipv6;
250         } l3;
251         union {
252                 struct rte_flow_item_ipv4 ipv4;
253                 struct rte_flow_item_ipv6 ipv6;
254         } l3_mask;
255         union {
256                 struct rte_flow_item_udp udp;
257                 struct rte_flow_item_tcp tcp;
258         } l4;
259         union {
260                 struct rte_flow_item_udp udp;
261                 struct rte_flow_item_tcp tcp;
262         } l4_mask;
263         struct rte_flow_action actions[2];
264         struct rte_flow_action_queue queue;
265 };
266
267 /* Map of Verbs to Flow priority with 8 Verbs priorities. */
268 static const uint32_t priority_map_3[][MLX5_PRIORITY_MAP_MAX] = {
269         { 0, 1, 2 }, { 2, 3, 4 }, { 5, 6, 7 },
270 };
271
272 /* Map of Verbs to Flow priority with 16 Verbs priorities. */
273 static const uint32_t priority_map_5[][MLX5_PRIORITY_MAP_MAX] = {
274         { 0, 1, 2 }, { 3, 4, 5 }, { 6, 7, 8 },
275         { 9, 10, 11 }, { 12, 13, 14 },
276 };
277
278 /* Tunnel information. */
279 struct mlx5_flow_tunnel_info {
280         uint64_t tunnel; /**< Tunnel bit (see MLX5_FLOW_*). */
281         uint32_t ptype; /**< Tunnel Ptype (see RTE_PTYPE_*). */
282 };
283
284 static struct mlx5_flow_tunnel_info tunnels_info[] = {
285         {
286                 .tunnel = MLX5_FLOW_LAYER_VXLAN,
287                 .ptype = RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L4_UDP,
288         },
289         {
290                 .tunnel = MLX5_FLOW_LAYER_GENEVE,
291                 .ptype = RTE_PTYPE_TUNNEL_GENEVE | RTE_PTYPE_L4_UDP,
292         },
293         {
294                 .tunnel = MLX5_FLOW_LAYER_VXLAN_GPE,
295                 .ptype = RTE_PTYPE_TUNNEL_VXLAN_GPE | RTE_PTYPE_L4_UDP,
296         },
297         {
298                 .tunnel = MLX5_FLOW_LAYER_GRE,
299                 .ptype = RTE_PTYPE_TUNNEL_GRE,
300         },
301         {
302                 .tunnel = MLX5_FLOW_LAYER_MPLS | MLX5_FLOW_LAYER_OUTER_L4_UDP,
303                 .ptype = RTE_PTYPE_TUNNEL_MPLS_IN_UDP | RTE_PTYPE_L4_UDP,
304         },
305         {
306                 .tunnel = MLX5_FLOW_LAYER_MPLS,
307                 .ptype = RTE_PTYPE_TUNNEL_MPLS_IN_GRE,
308         },
309         {
310                 .tunnel = MLX5_FLOW_LAYER_NVGRE,
311                 .ptype = RTE_PTYPE_TUNNEL_NVGRE,
312         },
313         {
314                 .tunnel = MLX5_FLOW_LAYER_IPIP,
315                 .ptype = RTE_PTYPE_TUNNEL_IP,
316         },
317         {
318                 .tunnel = MLX5_FLOW_LAYER_IPV6_ENCAP,
319                 .ptype = RTE_PTYPE_TUNNEL_IP,
320         },
321 };
322
323 /**
324  * Translate tag ID to register.
325  *
326  * @param[in] dev
327  *   Pointer to the Ethernet device structure.
328  * @param[in] feature
329  *   The feature that request the register.
330  * @param[in] id
331  *   The request register ID.
332  * @param[out] error
333  *   Error description in case of any.
334  *
335  * @return
336  *   The request register on success, a negative errno
337  *   value otherwise and rte_errno is set.
338  */
339 enum modify_reg
340 mlx5_flow_get_reg_id(struct rte_eth_dev *dev,
341                      enum mlx5_feature_name feature,
342                      uint32_t id,
343                      struct rte_flow_error *error)
344 {
345         struct mlx5_priv *priv = dev->data->dev_private;
346         struct mlx5_dev_config *config = &priv->config;
347         enum modify_reg start_reg;
348
349         switch (feature) {
350         case MLX5_HAIRPIN_RX:
351                 return REG_B;
352         case MLX5_HAIRPIN_TX:
353                 return REG_A;
354         case MLX5_METADATA_RX:
355                 switch (config->dv_xmeta_en) {
356                 case MLX5_XMETA_MODE_LEGACY:
357                         return REG_B;
358                 case MLX5_XMETA_MODE_META16:
359                         return REG_C_0;
360                 case MLX5_XMETA_MODE_META32:
361                         return REG_C_1;
362                 }
363                 break;
364         case MLX5_METADATA_TX:
365                 return REG_A;
366         case MLX5_METADATA_FDB:
367                 switch (config->dv_xmeta_en) {
368                 case MLX5_XMETA_MODE_LEGACY:
369                         return REG_NONE;
370                 case MLX5_XMETA_MODE_META16:
371                         return REG_C_0;
372                 case MLX5_XMETA_MODE_META32:
373                         return REG_C_1;
374                 }
375                 break;
376         case MLX5_FLOW_MARK:
377                 switch (config->dv_xmeta_en) {
378                 case MLX5_XMETA_MODE_LEGACY:
379                         return REG_NONE;
380                 case MLX5_XMETA_MODE_META16:
381                         return REG_C_1;
382                 case MLX5_XMETA_MODE_META32:
383                         return REG_C_0;
384                 }
385                 break;
386         case MLX5_COPY_MARK:
387         case MLX5_MTR_SFX:
388                 /*
389                  * Metadata COPY_MARK register using is in meter suffix sub
390                  * flow while with meter. It's safe to share the same register.
391                  */
392                 return priv->mtr_color_reg != REG_C_2 ? REG_C_2 : REG_C_3;
393         case MLX5_MTR_COLOR:
394                 RTE_ASSERT(priv->mtr_color_reg != REG_NONE);
395                 return priv->mtr_color_reg;
396         case MLX5_APP_TAG:
397                 /*
398                  * If meter is enable, it will engage two registers for color
399                  * match and flow match. If meter color match is not using the
400                  * REG_C_2, need to skip the REG_C_x be used by meter color
401                  * match.
402                  * If meter is disable, free to use all available registers.
403                  */
404                 if (priv->mtr_color_reg != REG_NONE)
405                         start_reg = priv->mtr_color_reg != REG_C_2 ? REG_C_3 :
406                                     REG_C_4;
407                 else
408                         start_reg = REG_C_2;
409                 if (id > (REG_C_7 - start_reg))
410                         return rte_flow_error_set(error, EINVAL,
411                                                   RTE_FLOW_ERROR_TYPE_ITEM,
412                                                   NULL, "invalid tag id");
413                 if (config->flow_mreg_c[id + start_reg - REG_C_0] == REG_NONE)
414                         return rte_flow_error_set(error, ENOTSUP,
415                                                   RTE_FLOW_ERROR_TYPE_ITEM,
416                                                   NULL, "unsupported tag id");
417                 /*
418                  * This case means meter is using the REG_C_x great than 2.
419                  * Take care not to conflict with meter color REG_C_x.
420                  * If the available index REG_C_y >= REG_C_x, skip the
421                  * color register.
422                  */
423                 if (start_reg == REG_C_3 && config->flow_mreg_c
424                     [id + REG_C_3 - REG_C_0] >= priv->mtr_color_reg) {
425                         if (config->flow_mreg_c[id + 1 + REG_C_3 - REG_C_0] !=
426                             REG_NONE)
427                                 return config->flow_mreg_c
428                                                 [id + 1 + REG_C_3 - REG_C_0];
429                         return rte_flow_error_set(error, ENOTSUP,
430                                                   RTE_FLOW_ERROR_TYPE_ITEM,
431                                                   NULL, "unsupported tag id");
432                 }
433                 return config->flow_mreg_c[id + start_reg - REG_C_0];
434         }
435         assert(false);
436         return rte_flow_error_set(error, EINVAL,
437                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
438                                   NULL, "invalid feature name");
439 }
440
441 /**
442  * Check extensive flow metadata register support.
443  *
444  * @param dev
445  *   Pointer to rte_eth_dev structure.
446  *
447  * @return
448  *   True if device supports extensive flow metadata register, otherwise false.
449  */
450 bool
451 mlx5_flow_ext_mreg_supported(struct rte_eth_dev *dev)
452 {
453         struct mlx5_priv *priv = dev->data->dev_private;
454         struct mlx5_dev_config *config = &priv->config;
455
456         /*
457          * Having available reg_c can be regarded inclusively as supporting
458          * extensive flow metadata register, which could mean,
459          * - metadata register copy action by modify header.
460          * - 16 modify header actions is supported.
461          * - reg_c's are preserved across different domain (FDB and NIC) on
462          *   packet loopback by flow lookup miss.
463          */
464         return config->flow_mreg_c[2] != REG_NONE;
465 }
466
467 /**
468  * Discover the maximum number of priority available.
469  *
470  * @param[in] dev
471  *   Pointer to the Ethernet device structure.
472  *
473  * @return
474  *   number of supported flow priority on success, a negative errno
475  *   value otherwise and rte_errno is set.
476  */
477 int
478 mlx5_flow_discover_priorities(struct rte_eth_dev *dev)
479 {
480         struct mlx5_priv *priv = dev->data->dev_private;
481         struct {
482                 struct ibv_flow_attr attr;
483                 struct ibv_flow_spec_eth eth;
484                 struct ibv_flow_spec_action_drop drop;
485         } flow_attr = {
486                 .attr = {
487                         .num_of_specs = 2,
488                         .port = (uint8_t)priv->ibv_port,
489                 },
490                 .eth = {
491                         .type = IBV_FLOW_SPEC_ETH,
492                         .size = sizeof(struct ibv_flow_spec_eth),
493                 },
494                 .drop = {
495                         .size = sizeof(struct ibv_flow_spec_action_drop),
496                         .type = IBV_FLOW_SPEC_ACTION_DROP,
497                 },
498         };
499         struct ibv_flow *flow;
500         struct mlx5_hrxq *drop = mlx5_hrxq_drop_new(dev);
501         uint16_t vprio[] = { 8, 16 };
502         int i;
503         int priority = 0;
504
505         if (!drop) {
506                 rte_errno = ENOTSUP;
507                 return -rte_errno;
508         }
509         for (i = 0; i != RTE_DIM(vprio); i++) {
510                 flow_attr.attr.priority = vprio[i] - 1;
511                 flow = mlx5_glue->create_flow(drop->qp, &flow_attr.attr);
512                 if (!flow)
513                         break;
514                 claim_zero(mlx5_glue->destroy_flow(flow));
515                 priority = vprio[i];
516         }
517         mlx5_hrxq_drop_release(dev);
518         switch (priority) {
519         case 8:
520                 priority = RTE_DIM(priority_map_3);
521                 break;
522         case 16:
523                 priority = RTE_DIM(priority_map_5);
524                 break;
525         default:
526                 rte_errno = ENOTSUP;
527                 DRV_LOG(ERR,
528                         "port %u verbs maximum priority: %d expected 8/16",
529                         dev->data->port_id, priority);
530                 return -rte_errno;
531         }
532         DRV_LOG(INFO, "port %u flow maximum priority: %d",
533                 dev->data->port_id, priority);
534         return priority;
535 }
536
537 /**
538  * Adjust flow priority based on the highest layer and the request priority.
539  *
540  * @param[in] dev
541  *   Pointer to the Ethernet device structure.
542  * @param[in] priority
543  *   The rule base priority.
544  * @param[in] subpriority
545  *   The priority based on the items.
546  *
547  * @return
548  *   The new priority.
549  */
550 uint32_t mlx5_flow_adjust_priority(struct rte_eth_dev *dev, int32_t priority,
551                                    uint32_t subpriority)
552 {
553         uint32_t res = 0;
554         struct mlx5_priv *priv = dev->data->dev_private;
555
556         switch (priv->config.flow_prio) {
557         case RTE_DIM(priority_map_3):
558                 res = priority_map_3[priority][subpriority];
559                 break;
560         case RTE_DIM(priority_map_5):
561                 res = priority_map_5[priority][subpriority];
562                 break;
563         }
564         return  res;
565 }
566
567 /**
568  * Verify the @p item specifications (spec, last, mask) are compatible with the
569  * NIC capabilities.
570  *
571  * @param[in] item
572  *   Item specification.
573  * @param[in] mask
574  *   @p item->mask or flow default bit-masks.
575  * @param[in] nic_mask
576  *   Bit-masks covering supported fields by the NIC to compare with user mask.
577  * @param[in] size
578  *   Bit-masks size in bytes.
579  * @param[out] error
580  *   Pointer to error structure.
581  *
582  * @return
583  *   0 on success, a negative errno value otherwise and rte_errno is set.
584  */
585 int
586 mlx5_flow_item_acceptable(const struct rte_flow_item *item,
587                           const uint8_t *mask,
588                           const uint8_t *nic_mask,
589                           unsigned int size,
590                           struct rte_flow_error *error)
591 {
592         unsigned int i;
593
594         assert(nic_mask);
595         for (i = 0; i < size; ++i)
596                 if ((nic_mask[i] | mask[i]) != nic_mask[i])
597                         return rte_flow_error_set(error, ENOTSUP,
598                                                   RTE_FLOW_ERROR_TYPE_ITEM,
599                                                   item,
600                                                   "mask enables non supported"
601                                                   " bits");
602         if (!item->spec && (item->mask || item->last))
603                 return rte_flow_error_set(error, EINVAL,
604                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
605                                           "mask/last without a spec is not"
606                                           " supported");
607         if (item->spec && item->last) {
608                 uint8_t spec[size];
609                 uint8_t last[size];
610                 unsigned int i;
611                 int ret;
612
613                 for (i = 0; i < size; ++i) {
614                         spec[i] = ((const uint8_t *)item->spec)[i] & mask[i];
615                         last[i] = ((const uint8_t *)item->last)[i] & mask[i];
616                 }
617                 ret = memcmp(spec, last, size);
618                 if (ret != 0)
619                         return rte_flow_error_set(error, EINVAL,
620                                                   RTE_FLOW_ERROR_TYPE_ITEM,
621                                                   item,
622                                                   "range is not valid");
623         }
624         return 0;
625 }
626
627 /**
628  * Adjust the hash fields according to the @p flow information.
629  *
630  * @param[in] dev_flow.
631  *   Pointer to the mlx5_flow.
632  * @param[in] tunnel
633  *   1 when the hash field is for a tunnel item.
634  * @param[in] layer_types
635  *   ETH_RSS_* types.
636  * @param[in] hash_fields
637  *   Item hash fields.
638  *
639  * @return
640  *   The hash fields that should be used.
641  */
642 uint64_t
643 mlx5_flow_hashfields_adjust(struct mlx5_flow *dev_flow,
644                             int tunnel __rte_unused, uint64_t layer_types,
645                             uint64_t hash_fields)
646 {
647         struct rte_flow *flow = dev_flow->flow;
648 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
649         int rss_request_inner = flow->rss.level >= 2;
650
651         /* Check RSS hash level for tunnel. */
652         if (tunnel && rss_request_inner)
653                 hash_fields |= IBV_RX_HASH_INNER;
654         else if (tunnel || rss_request_inner)
655                 return 0;
656 #endif
657         /* Check if requested layer matches RSS hash fields. */
658         if (!(flow->rss.types & layer_types))
659                 return 0;
660         return hash_fields;
661 }
662
663 /**
664  * Lookup and set the ptype in the data Rx part.  A single Ptype can be used,
665  * if several tunnel rules are used on this queue, the tunnel ptype will be
666  * cleared.
667  *
668  * @param rxq_ctrl
669  *   Rx queue to update.
670  */
671 static void
672 flow_rxq_tunnel_ptype_update(struct mlx5_rxq_ctrl *rxq_ctrl)
673 {
674         unsigned int i;
675         uint32_t tunnel_ptype = 0;
676
677         /* Look up for the ptype to use. */
678         for (i = 0; i != MLX5_FLOW_TUNNEL; ++i) {
679                 if (!rxq_ctrl->flow_tunnels_n[i])
680                         continue;
681                 if (!tunnel_ptype) {
682                         tunnel_ptype = tunnels_info[i].ptype;
683                 } else {
684                         tunnel_ptype = 0;
685                         break;
686                 }
687         }
688         rxq_ctrl->rxq.tunnel = tunnel_ptype;
689 }
690
691 /**
692  * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) according to the devive
693  * flow.
694  *
695  * @param[in] dev
696  *   Pointer to the Ethernet device structure.
697  * @param[in] dev_flow
698  *   Pointer to device flow structure.
699  */
700 static void
701 flow_drv_rxq_flags_set(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow)
702 {
703         struct mlx5_priv *priv = dev->data->dev_private;
704         struct rte_flow *flow = dev_flow->flow;
705         const int mark = !!(dev_flow->actions &
706                             (MLX5_FLOW_ACTION_FLAG | MLX5_FLOW_ACTION_MARK));
707         const int tunnel = !!(dev_flow->layers & MLX5_FLOW_LAYER_TUNNEL);
708         unsigned int i;
709
710         for (i = 0; i != flow->rss.queue_num; ++i) {
711                 int idx = (*flow->rss.queue)[i];
712                 struct mlx5_rxq_ctrl *rxq_ctrl =
713                         container_of((*priv->rxqs)[idx],
714                                      struct mlx5_rxq_ctrl, rxq);
715
716                 /*
717                  * To support metadata register copy on Tx loopback,
718                  * this must be always enabled (metadata may arive
719                  * from other port - not from local flows only.
720                  */
721                 if (priv->config.dv_flow_en &&
722                     priv->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY &&
723                     mlx5_flow_ext_mreg_supported(dev)) {
724                         rxq_ctrl->rxq.mark = 1;
725                         rxq_ctrl->flow_mark_n = 1;
726                 } else if (mark) {
727                         rxq_ctrl->rxq.mark = 1;
728                         rxq_ctrl->flow_mark_n++;
729                 }
730                 if (tunnel) {
731                         unsigned int j;
732
733                         /* Increase the counter matching the flow. */
734                         for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) {
735                                 if ((tunnels_info[j].tunnel &
736                                      dev_flow->layers) ==
737                                     tunnels_info[j].tunnel) {
738                                         rxq_ctrl->flow_tunnels_n[j]++;
739                                         break;
740                                 }
741                         }
742                         flow_rxq_tunnel_ptype_update(rxq_ctrl);
743                 }
744         }
745 }
746
747 /**
748  * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) for a flow
749  *
750  * @param[in] dev
751  *   Pointer to the Ethernet device structure.
752  * @param[in] flow
753  *   Pointer to flow structure.
754  */
755 static void
756 flow_rxq_flags_set(struct rte_eth_dev *dev, struct rte_flow *flow)
757 {
758         struct mlx5_flow *dev_flow;
759
760         LIST_FOREACH(dev_flow, &flow->dev_flows, next)
761                 flow_drv_rxq_flags_set(dev, dev_flow);
762 }
763
764 /**
765  * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the
766  * device flow if no other flow uses it with the same kind of request.
767  *
768  * @param dev
769  *   Pointer to Ethernet device.
770  * @param[in] dev_flow
771  *   Pointer to the device flow.
772  */
773 static void
774 flow_drv_rxq_flags_trim(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow)
775 {
776         struct mlx5_priv *priv = dev->data->dev_private;
777         struct rte_flow *flow = dev_flow->flow;
778         const int mark = !!(dev_flow->actions &
779                             (MLX5_FLOW_ACTION_FLAG | MLX5_FLOW_ACTION_MARK));
780         const int tunnel = !!(dev_flow->layers & MLX5_FLOW_LAYER_TUNNEL);
781         unsigned int i;
782
783         assert(dev->data->dev_started);
784         for (i = 0; i != flow->rss.queue_num; ++i) {
785                 int idx = (*flow->rss.queue)[i];
786                 struct mlx5_rxq_ctrl *rxq_ctrl =
787                         container_of((*priv->rxqs)[idx],
788                                      struct mlx5_rxq_ctrl, rxq);
789
790                 if (priv->config.dv_flow_en &&
791                     priv->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY &&
792                     mlx5_flow_ext_mreg_supported(dev)) {
793                         rxq_ctrl->rxq.mark = 1;
794                         rxq_ctrl->flow_mark_n = 1;
795                 } else if (mark) {
796                         rxq_ctrl->flow_mark_n--;
797                         rxq_ctrl->rxq.mark = !!rxq_ctrl->flow_mark_n;
798                 }
799                 if (tunnel) {
800                         unsigned int j;
801
802                         /* Decrease the counter matching the flow. */
803                         for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) {
804                                 if ((tunnels_info[j].tunnel &
805                                      dev_flow->layers) ==
806                                     tunnels_info[j].tunnel) {
807                                         rxq_ctrl->flow_tunnels_n[j]--;
808                                         break;
809                                 }
810                         }
811                         flow_rxq_tunnel_ptype_update(rxq_ctrl);
812                 }
813         }
814 }
815
816 /**
817  * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the
818  * @p flow if no other flow uses it with the same kind of request.
819  *
820  * @param dev
821  *   Pointer to Ethernet device.
822  * @param[in] flow
823  *   Pointer to the flow.
824  */
825 static void
826 flow_rxq_flags_trim(struct rte_eth_dev *dev, struct rte_flow *flow)
827 {
828         struct mlx5_flow *dev_flow;
829
830         LIST_FOREACH(dev_flow, &flow->dev_flows, next)
831                 flow_drv_rxq_flags_trim(dev, dev_flow);
832 }
833
834 /**
835  * Clear the Mark/Flag and Tunnel ptype information in all Rx queues.
836  *
837  * @param dev
838  *   Pointer to Ethernet device.
839  */
840 static void
841 flow_rxq_flags_clear(struct rte_eth_dev *dev)
842 {
843         struct mlx5_priv *priv = dev->data->dev_private;
844         unsigned int i;
845
846         for (i = 0; i != priv->rxqs_n; ++i) {
847                 struct mlx5_rxq_ctrl *rxq_ctrl;
848                 unsigned int j;
849
850                 if (!(*priv->rxqs)[i])
851                         continue;
852                 rxq_ctrl = container_of((*priv->rxqs)[i],
853                                         struct mlx5_rxq_ctrl, rxq);
854                 rxq_ctrl->flow_mark_n = 0;
855                 rxq_ctrl->rxq.mark = 0;
856                 for (j = 0; j != MLX5_FLOW_TUNNEL; ++j)
857                         rxq_ctrl->flow_tunnels_n[j] = 0;
858                 rxq_ctrl->rxq.tunnel = 0;
859         }
860 }
861
862 /*
863  * return a pointer to the desired action in the list of actions.
864  *
865  * @param[in] actions
866  *   The list of actions to search the action in.
867  * @param[in] action
868  *   The action to find.
869  *
870  * @return
871  *   Pointer to the action in the list, if found. NULL otherwise.
872  */
873 const struct rte_flow_action *
874 mlx5_flow_find_action(const struct rte_flow_action *actions,
875                       enum rte_flow_action_type action)
876 {
877         if (actions == NULL)
878                 return NULL;
879         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++)
880                 if (actions->type == action)
881                         return actions;
882         return NULL;
883 }
884
885 /*
886  * Validate the flag action.
887  *
888  * @param[in] action_flags
889  *   Bit-fields that holds the actions detected until now.
890  * @param[in] attr
891  *   Attributes of flow that includes this action.
892  * @param[out] error
893  *   Pointer to error structure.
894  *
895  * @return
896  *   0 on success, a negative errno value otherwise and rte_errno is set.
897  */
898 int
899 mlx5_flow_validate_action_flag(uint64_t action_flags,
900                                const struct rte_flow_attr *attr,
901                                struct rte_flow_error *error)
902 {
903
904         if (action_flags & MLX5_FLOW_ACTION_DROP)
905                 return rte_flow_error_set(error, EINVAL,
906                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
907                                           "can't drop and flag in same flow");
908         if (action_flags & MLX5_FLOW_ACTION_MARK)
909                 return rte_flow_error_set(error, EINVAL,
910                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
911                                           "can't mark and flag in same flow");
912         if (action_flags & MLX5_FLOW_ACTION_FLAG)
913                 return rte_flow_error_set(error, EINVAL,
914                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
915                                           "can't have 2 flag"
916                                           " actions in same flow");
917         if (attr->egress)
918                 return rte_flow_error_set(error, ENOTSUP,
919                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
920                                           "flag action not supported for "
921                                           "egress");
922         return 0;
923 }
924
925 /*
926  * Validate the mark action.
927  *
928  * @param[in] action
929  *   Pointer to the queue action.
930  * @param[in] action_flags
931  *   Bit-fields that holds the actions detected until now.
932  * @param[in] attr
933  *   Attributes of flow that includes this action.
934  * @param[out] error
935  *   Pointer to error structure.
936  *
937  * @return
938  *   0 on success, a negative errno value otherwise and rte_errno is set.
939  */
940 int
941 mlx5_flow_validate_action_mark(const struct rte_flow_action *action,
942                                uint64_t action_flags,
943                                const struct rte_flow_attr *attr,
944                                struct rte_flow_error *error)
945 {
946         const struct rte_flow_action_mark *mark = action->conf;
947
948         if (!mark)
949                 return rte_flow_error_set(error, EINVAL,
950                                           RTE_FLOW_ERROR_TYPE_ACTION,
951                                           action,
952                                           "configuration cannot be null");
953         if (mark->id >= MLX5_FLOW_MARK_MAX)
954                 return rte_flow_error_set(error, EINVAL,
955                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
956                                           &mark->id,
957                                           "mark id must in 0 <= id < "
958                                           RTE_STR(MLX5_FLOW_MARK_MAX));
959         if (action_flags & MLX5_FLOW_ACTION_DROP)
960                 return rte_flow_error_set(error, EINVAL,
961                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
962                                           "can't drop and mark in same flow");
963         if (action_flags & MLX5_FLOW_ACTION_FLAG)
964                 return rte_flow_error_set(error, EINVAL,
965                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
966                                           "can't flag and mark in same flow");
967         if (action_flags & MLX5_FLOW_ACTION_MARK)
968                 return rte_flow_error_set(error, EINVAL,
969                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
970                                           "can't have 2 mark actions in same"
971                                           " flow");
972         if (attr->egress)
973                 return rte_flow_error_set(error, ENOTSUP,
974                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
975                                           "mark action not supported for "
976                                           "egress");
977         return 0;
978 }
979
980 /*
981  * Validate the drop action.
982  *
983  * @param[in] action_flags
984  *   Bit-fields that holds the actions detected until now.
985  * @param[in] attr
986  *   Attributes of flow that includes this action.
987  * @param[out] error
988  *   Pointer to error structure.
989  *
990  * @return
991  *   0 on success, a negative errno value otherwise and rte_errno is set.
992  */
993 int
994 mlx5_flow_validate_action_drop(uint64_t action_flags,
995                                const struct rte_flow_attr *attr,
996                                struct rte_flow_error *error)
997 {
998         if (action_flags & MLX5_FLOW_ACTION_FLAG)
999                 return rte_flow_error_set(error, EINVAL,
1000                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1001                                           "can't drop and flag in same flow");
1002         if (action_flags & MLX5_FLOW_ACTION_MARK)
1003                 return rte_flow_error_set(error, EINVAL,
1004                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1005                                           "can't drop and mark in same flow");
1006         if (action_flags & (MLX5_FLOW_FATE_ACTIONS |
1007                             MLX5_FLOW_FATE_ESWITCH_ACTIONS))
1008                 return rte_flow_error_set(error, EINVAL,
1009                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1010                                           "can't have 2 fate actions in"
1011                                           " same flow");
1012         if (attr->egress)
1013                 return rte_flow_error_set(error, ENOTSUP,
1014                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1015                                           "drop action not supported for "
1016                                           "egress");
1017         return 0;
1018 }
1019
1020 /*
1021  * Validate the queue action.
1022  *
1023  * @param[in] action
1024  *   Pointer to the queue action.
1025  * @param[in] action_flags
1026  *   Bit-fields that holds the actions detected until now.
1027  * @param[in] dev
1028  *   Pointer to the Ethernet device structure.
1029  * @param[in] attr
1030  *   Attributes of flow that includes this action.
1031  * @param[out] error
1032  *   Pointer to error structure.
1033  *
1034  * @return
1035  *   0 on success, a negative errno value otherwise and rte_errno is set.
1036  */
1037 int
1038 mlx5_flow_validate_action_queue(const struct rte_flow_action *action,
1039                                 uint64_t action_flags,
1040                                 struct rte_eth_dev *dev,
1041                                 const struct rte_flow_attr *attr,
1042                                 struct rte_flow_error *error)
1043 {
1044         struct mlx5_priv *priv = dev->data->dev_private;
1045         const struct rte_flow_action_queue *queue = action->conf;
1046
1047         if (action_flags & MLX5_FLOW_FATE_ACTIONS)
1048                 return rte_flow_error_set(error, EINVAL,
1049                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1050                                           "can't have 2 fate actions in"
1051                                           " same flow");
1052         if (!priv->rxqs_n)
1053                 return rte_flow_error_set(error, EINVAL,
1054                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1055                                           NULL, "No Rx queues configured");
1056         if (queue->index >= priv->rxqs_n)
1057                 return rte_flow_error_set(error, EINVAL,
1058                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1059                                           &queue->index,
1060                                           "queue index out of range");
1061         if (!(*priv->rxqs)[queue->index])
1062                 return rte_flow_error_set(error, EINVAL,
1063                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1064                                           &queue->index,
1065                                           "queue is not configured");
1066         if (attr->egress)
1067                 return rte_flow_error_set(error, ENOTSUP,
1068                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1069                                           "queue action not supported for "
1070                                           "egress");
1071         return 0;
1072 }
1073
1074 /*
1075  * Validate the rss action.
1076  *
1077  * @param[in] action
1078  *   Pointer to the queue action.
1079  * @param[in] action_flags
1080  *   Bit-fields that holds the actions detected until now.
1081  * @param[in] dev
1082  *   Pointer to the Ethernet device structure.
1083  * @param[in] attr
1084  *   Attributes of flow that includes this action.
1085  * @param[in] item_flags
1086  *   Items that were detected.
1087  * @param[out] error
1088  *   Pointer to error structure.
1089  *
1090  * @return
1091  *   0 on success, a negative errno value otherwise and rte_errno is set.
1092  */
1093 int
1094 mlx5_flow_validate_action_rss(const struct rte_flow_action *action,
1095                               uint64_t action_flags,
1096                               struct rte_eth_dev *dev,
1097                               const struct rte_flow_attr *attr,
1098                               uint64_t item_flags,
1099                               struct rte_flow_error *error)
1100 {
1101         struct mlx5_priv *priv = dev->data->dev_private;
1102         const struct rte_flow_action_rss *rss = action->conf;
1103         int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1104         unsigned int i;
1105
1106         if (action_flags & MLX5_FLOW_FATE_ACTIONS)
1107                 return rte_flow_error_set(error, EINVAL,
1108                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1109                                           "can't have 2 fate actions"
1110                                           " in same flow");
1111         if (rss->func != RTE_ETH_HASH_FUNCTION_DEFAULT &&
1112             rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ)
1113                 return rte_flow_error_set(error, ENOTSUP,
1114                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1115                                           &rss->func,
1116                                           "RSS hash function not supported");
1117 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
1118         if (rss->level > 2)
1119 #else
1120         if (rss->level > 1)
1121 #endif
1122                 return rte_flow_error_set(error, ENOTSUP,
1123                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1124                                           &rss->level,
1125                                           "tunnel RSS is not supported");
1126         /* allow RSS key_len 0 in case of NULL (default) RSS key. */
1127         if (rss->key_len == 0 && rss->key != NULL)
1128                 return rte_flow_error_set(error, ENOTSUP,
1129                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1130                                           &rss->key_len,
1131                                           "RSS hash key length 0");
1132         if (rss->key_len > 0 && rss->key_len < MLX5_RSS_HASH_KEY_LEN)
1133                 return rte_flow_error_set(error, ENOTSUP,
1134                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1135                                           &rss->key_len,
1136                                           "RSS hash key too small");
1137         if (rss->key_len > MLX5_RSS_HASH_KEY_LEN)
1138                 return rte_flow_error_set(error, ENOTSUP,
1139                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1140                                           &rss->key_len,
1141                                           "RSS hash key too large");
1142         if (rss->queue_num > priv->config.ind_table_max_size)
1143                 return rte_flow_error_set(error, ENOTSUP,
1144                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1145                                           &rss->queue_num,
1146                                           "number of queues too large");
1147         if (rss->types & MLX5_RSS_HF_MASK)
1148                 return rte_flow_error_set(error, ENOTSUP,
1149                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1150                                           &rss->types,
1151                                           "some RSS protocols are not"
1152                                           " supported");
1153         if (!priv->rxqs_n)
1154                 return rte_flow_error_set(error, EINVAL,
1155                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1156                                           NULL, "No Rx queues configured");
1157         if (!rss->queue_num)
1158                 return rte_flow_error_set(error, EINVAL,
1159                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1160                                           NULL, "No queues configured");
1161         for (i = 0; i != rss->queue_num; ++i) {
1162                 if (rss->queue[i] >= priv->rxqs_n)
1163                         return rte_flow_error_set
1164                                 (error, EINVAL,
1165                                  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1166                                  &rss->queue[i], "queue index out of range");
1167                 if (!(*priv->rxqs)[rss->queue[i]])
1168                         return rte_flow_error_set
1169                                 (error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1170                                  &rss->queue[i], "queue is not configured");
1171         }
1172         if (attr->egress)
1173                 return rte_flow_error_set(error, ENOTSUP,
1174                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1175                                           "rss action not supported for "
1176                                           "egress");
1177         if (rss->level > 1 &&  !tunnel)
1178                 return rte_flow_error_set(error, EINVAL,
1179                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
1180                                           "inner RSS is not supported for "
1181                                           "non-tunnel flows");
1182         return 0;
1183 }
1184
1185 /*
1186  * Validate the count action.
1187  *
1188  * @param[in] dev
1189  *   Pointer to the Ethernet device structure.
1190  * @param[in] attr
1191  *   Attributes of flow that includes this action.
1192  * @param[out] error
1193  *   Pointer to error structure.
1194  *
1195  * @return
1196  *   0 on success, a negative errno value otherwise and rte_errno is set.
1197  */
1198 int
1199 mlx5_flow_validate_action_count(struct rte_eth_dev *dev __rte_unused,
1200                                 const struct rte_flow_attr *attr,
1201                                 struct rte_flow_error *error)
1202 {
1203         if (attr->egress)
1204                 return rte_flow_error_set(error, ENOTSUP,
1205                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1206                                           "count action not supported for "
1207                                           "egress");
1208         return 0;
1209 }
1210
1211 /**
1212  * Verify the @p attributes will be correctly understood by the NIC and store
1213  * them in the @p flow if everything is correct.
1214  *
1215  * @param[in] dev
1216  *   Pointer to the Ethernet device structure.
1217  * @param[in] attributes
1218  *   Pointer to flow attributes
1219  * @param[out] error
1220  *   Pointer to error structure.
1221  *
1222  * @return
1223  *   0 on success, a negative errno value otherwise and rte_errno is set.
1224  */
1225 int
1226 mlx5_flow_validate_attributes(struct rte_eth_dev *dev,
1227                               const struct rte_flow_attr *attributes,
1228                               struct rte_flow_error *error)
1229 {
1230         struct mlx5_priv *priv = dev->data->dev_private;
1231         uint32_t priority_max = priv->config.flow_prio - 1;
1232
1233         if (attributes->group)
1234                 return rte_flow_error_set(error, ENOTSUP,
1235                                           RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
1236                                           NULL, "groups is not supported");
1237         if (attributes->priority != MLX5_FLOW_PRIO_RSVD &&
1238             attributes->priority >= priority_max)
1239                 return rte_flow_error_set(error, ENOTSUP,
1240                                           RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
1241                                           NULL, "priority out of range");
1242         if (attributes->egress)
1243                 return rte_flow_error_set(error, ENOTSUP,
1244                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1245                                           "egress is not supported");
1246         if (attributes->transfer && !priv->config.dv_esw_en)
1247                 return rte_flow_error_set(error, ENOTSUP,
1248                                           RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
1249                                           NULL, "transfer is not supported");
1250         if (!attributes->ingress)
1251                 return rte_flow_error_set(error, EINVAL,
1252                                           RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
1253                                           NULL,
1254                                           "ingress attribute is mandatory");
1255         return 0;
1256 }
1257
1258 /**
1259  * Validate ICMP6 item.
1260  *
1261  * @param[in] item
1262  *   Item specification.
1263  * @param[in] item_flags
1264  *   Bit-fields that holds the items detected until now.
1265  * @param[out] error
1266  *   Pointer to error structure.
1267  *
1268  * @return
1269  *   0 on success, a negative errno value otherwise and rte_errno is set.
1270  */
1271 int
1272 mlx5_flow_validate_item_icmp6(const struct rte_flow_item *item,
1273                                uint64_t item_flags,
1274                                uint8_t target_protocol,
1275                                struct rte_flow_error *error)
1276 {
1277         const struct rte_flow_item_icmp6 *mask = item->mask;
1278         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1279         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
1280                                       MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1281         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
1282                                       MLX5_FLOW_LAYER_OUTER_L4;
1283         int ret;
1284
1285         if (target_protocol != 0xFF && target_protocol != IPPROTO_ICMPV6)
1286                 return rte_flow_error_set(error, EINVAL,
1287                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1288                                           "protocol filtering not compatible"
1289                                           " with ICMP6 layer");
1290         if (!(item_flags & l3m))
1291                 return rte_flow_error_set(error, EINVAL,
1292                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1293                                           "IPv6 is mandatory to filter on"
1294                                           " ICMP6");
1295         if (item_flags & l4m)
1296                 return rte_flow_error_set(error, EINVAL,
1297                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1298                                           "multiple L4 layers not supported");
1299         if (!mask)
1300                 mask = &rte_flow_item_icmp6_mask;
1301         ret = mlx5_flow_item_acceptable
1302                 (item, (const uint8_t *)mask,
1303                  (const uint8_t *)&rte_flow_item_icmp6_mask,
1304                  sizeof(struct rte_flow_item_icmp6), error);
1305         if (ret < 0)
1306                 return ret;
1307         return 0;
1308 }
1309
1310 /**
1311  * Validate ICMP item.
1312  *
1313  * @param[in] item
1314  *   Item specification.
1315  * @param[in] item_flags
1316  *   Bit-fields that holds the items detected until now.
1317  * @param[out] error
1318  *   Pointer to error structure.
1319  *
1320  * @return
1321  *   0 on success, a negative errno value otherwise and rte_errno is set.
1322  */
1323 int
1324 mlx5_flow_validate_item_icmp(const struct rte_flow_item *item,
1325                              uint64_t item_flags,
1326                              uint8_t target_protocol,
1327                              struct rte_flow_error *error)
1328 {
1329         const struct rte_flow_item_icmp *mask = item->mask;
1330         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1331         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
1332                                       MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1333         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
1334                                       MLX5_FLOW_LAYER_OUTER_L4;
1335         int ret;
1336
1337         if (target_protocol != 0xFF && target_protocol != IPPROTO_ICMP)
1338                 return rte_flow_error_set(error, EINVAL,
1339                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1340                                           "protocol filtering not compatible"
1341                                           " with ICMP layer");
1342         if (!(item_flags & l3m))
1343                 return rte_flow_error_set(error, EINVAL,
1344                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1345                                           "IPv4 is mandatory to filter"
1346                                           " on ICMP");
1347         if (item_flags & l4m)
1348                 return rte_flow_error_set(error, EINVAL,
1349                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1350                                           "multiple L4 layers not supported");
1351         if (!mask)
1352                 mask = &rte_flow_item_icmp_mask;
1353         ret = mlx5_flow_item_acceptable
1354                 (item, (const uint8_t *)mask,
1355                  (const uint8_t *)&rte_flow_item_icmp_mask,
1356                  sizeof(struct rte_flow_item_icmp), error);
1357         if (ret < 0)
1358                 return ret;
1359         return 0;
1360 }
1361
1362 /**
1363  * Validate Ethernet item.
1364  *
1365  * @param[in] item
1366  *   Item specification.
1367  * @param[in] item_flags
1368  *   Bit-fields that holds the items detected until now.
1369  * @param[out] error
1370  *   Pointer to error structure.
1371  *
1372  * @return
1373  *   0 on success, a negative errno value otherwise and rte_errno is set.
1374  */
1375 int
1376 mlx5_flow_validate_item_eth(const struct rte_flow_item *item,
1377                             uint64_t item_flags,
1378                             struct rte_flow_error *error)
1379 {
1380         const struct rte_flow_item_eth *mask = item->mask;
1381         const struct rte_flow_item_eth nic_mask = {
1382                 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1383                 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1384                 .type = RTE_BE16(0xffff),
1385         };
1386         int ret;
1387         int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1388         const uint64_t ethm = tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
1389                                        MLX5_FLOW_LAYER_OUTER_L2;
1390
1391         if (item_flags & ethm)
1392                 return rte_flow_error_set(error, ENOTSUP,
1393                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1394                                           "multiple L2 layers not supported");
1395         if ((!tunnel && (item_flags & MLX5_FLOW_LAYER_OUTER_L3)) ||
1396             (tunnel && (item_flags & MLX5_FLOW_LAYER_INNER_L3)))
1397                 return rte_flow_error_set(error, EINVAL,
1398                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1399                                           "L2 layer should not follow "
1400                                           "L3 layers");
1401         if ((!tunnel && (item_flags & MLX5_FLOW_LAYER_OUTER_VLAN)) ||
1402             (tunnel && (item_flags & MLX5_FLOW_LAYER_INNER_VLAN)))
1403                 return rte_flow_error_set(error, EINVAL,
1404                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1405                                           "L2 layer should not follow VLAN");
1406         if (!mask)
1407                 mask = &rte_flow_item_eth_mask;
1408         ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
1409                                         (const uint8_t *)&nic_mask,
1410                                         sizeof(struct rte_flow_item_eth),
1411                                         error);
1412         return ret;
1413 }
1414
1415 /**
1416  * Validate VLAN item.
1417  *
1418  * @param[in] item
1419  *   Item specification.
1420  * @param[in] item_flags
1421  *   Bit-fields that holds the items detected until now.
1422  * @param[in] dev
1423  *   Ethernet device flow is being created on.
1424  * @param[out] error
1425  *   Pointer to error structure.
1426  *
1427  * @return
1428  *   0 on success, a negative errno value otherwise and rte_errno is set.
1429  */
1430 int
1431 mlx5_flow_validate_item_vlan(const struct rte_flow_item *item,
1432                              uint64_t item_flags,
1433                              struct rte_eth_dev *dev,
1434                              struct rte_flow_error *error)
1435 {
1436         const struct rte_flow_item_vlan *spec = item->spec;
1437         const struct rte_flow_item_vlan *mask = item->mask;
1438         const struct rte_flow_item_vlan nic_mask = {
1439                 .tci = RTE_BE16(UINT16_MAX),
1440                 .inner_type = RTE_BE16(UINT16_MAX),
1441         };
1442         uint16_t vlan_tag = 0;
1443         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1444         int ret;
1445         const uint64_t l34m = tunnel ? (MLX5_FLOW_LAYER_INNER_L3 |
1446                                         MLX5_FLOW_LAYER_INNER_L4) :
1447                                        (MLX5_FLOW_LAYER_OUTER_L3 |
1448                                         MLX5_FLOW_LAYER_OUTER_L4);
1449         const uint64_t vlanm = tunnel ? MLX5_FLOW_LAYER_INNER_VLAN :
1450                                         MLX5_FLOW_LAYER_OUTER_VLAN;
1451
1452         if (item_flags & vlanm)
1453                 return rte_flow_error_set(error, EINVAL,
1454                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1455                                           "multiple VLAN layers not supported");
1456         else if ((item_flags & l34m) != 0)
1457                 return rte_flow_error_set(error, EINVAL,
1458                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1459                                           "VLAN cannot follow L3/L4 layer");
1460         if (!mask)
1461                 mask = &rte_flow_item_vlan_mask;
1462         ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
1463                                         (const uint8_t *)&nic_mask,
1464                                         sizeof(struct rte_flow_item_vlan),
1465                                         error);
1466         if (ret)
1467                 return ret;
1468         if (!tunnel && mask->tci != RTE_BE16(0x0fff)) {
1469                 struct mlx5_priv *priv = dev->data->dev_private;
1470
1471                 if (priv->vmwa_context) {
1472                         /*
1473                          * Non-NULL context means we have a virtual machine
1474                          * and SR-IOV enabled, we have to create VLAN interface
1475                          * to make hypervisor to setup E-Switch vport
1476                          * context correctly. We avoid creating the multiple
1477                          * VLAN interfaces, so we cannot support VLAN tag mask.
1478                          */
1479                         return rte_flow_error_set(error, EINVAL,
1480                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1481                                                   item,
1482                                                   "VLAN tag mask is not"
1483                                                   " supported in virtual"
1484                                                   " environment");
1485                 }
1486         }
1487         if (spec) {
1488                 vlan_tag = spec->tci;
1489                 vlan_tag &= mask->tci;
1490         }
1491         /*
1492          * From verbs perspective an empty VLAN is equivalent
1493          * to a packet without VLAN layer.
1494          */
1495         if (!vlan_tag)
1496                 return rte_flow_error_set(error, EINVAL,
1497                                           RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
1498                                           item->spec,
1499                                           "VLAN cannot be empty");
1500         return 0;
1501 }
1502
1503 /**
1504  * Validate IPV4 item.
1505  *
1506  * @param[in] item
1507  *   Item specification.
1508  * @param[in] item_flags
1509  *   Bit-fields that holds the items detected until now.
1510  * @param[in] acc_mask
1511  *   Acceptable mask, if NULL default internal default mask
1512  *   will be used to check whether item fields are supported.
1513  * @param[out] error
1514  *   Pointer to error structure.
1515  *
1516  * @return
1517  *   0 on success, a negative errno value otherwise and rte_errno is set.
1518  */
1519 int
1520 mlx5_flow_validate_item_ipv4(const struct rte_flow_item *item,
1521                              uint64_t item_flags,
1522                              uint64_t last_item,
1523                              uint16_t ether_type,
1524                              const struct rte_flow_item_ipv4 *acc_mask,
1525                              struct rte_flow_error *error)
1526 {
1527         const struct rte_flow_item_ipv4 *mask = item->mask;
1528         const struct rte_flow_item_ipv4 *spec = item->spec;
1529         const struct rte_flow_item_ipv4 nic_mask = {
1530                 .hdr = {
1531                         .src_addr = RTE_BE32(0xffffffff),
1532                         .dst_addr = RTE_BE32(0xffffffff),
1533                         .type_of_service = 0xff,
1534                         .next_proto_id = 0xff,
1535                 },
1536         };
1537         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1538         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
1539                                       MLX5_FLOW_LAYER_OUTER_L3;
1540         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
1541                                       MLX5_FLOW_LAYER_OUTER_L4;
1542         int ret;
1543         uint8_t next_proto = 0xFF;
1544         const uint64_t l2_vlan = (MLX5_FLOW_LAYER_L2 |
1545                                   MLX5_FLOW_LAYER_OUTER_VLAN |
1546                                   MLX5_FLOW_LAYER_INNER_VLAN);
1547
1548         if ((last_item & l2_vlan) && ether_type &&
1549             ether_type != RTE_ETHER_TYPE_IPV4)
1550                 return rte_flow_error_set(error, EINVAL,
1551                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1552                                           "IPv4 cannot follow L2/VLAN layer "
1553                                           "which ether type is not IPv4");
1554         if (item_flags & MLX5_FLOW_LAYER_IPIP) {
1555                 if (mask && spec)
1556                         next_proto = mask->hdr.next_proto_id &
1557                                      spec->hdr.next_proto_id;
1558                 if (next_proto == IPPROTO_IPIP || next_proto == IPPROTO_IPV6)
1559                         return rte_flow_error_set(error, EINVAL,
1560                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1561                                                   item,
1562                                                   "multiple tunnel "
1563                                                   "not supported");
1564         }
1565         if (item_flags & MLX5_FLOW_LAYER_IPV6_ENCAP)
1566                 return rte_flow_error_set(error, EINVAL,
1567                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1568                                           "wrong tunnel type - IPv6 specified "
1569                                           "but IPv4 item provided");
1570         if (item_flags & l3m)
1571                 return rte_flow_error_set(error, ENOTSUP,
1572                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1573                                           "multiple L3 layers not supported");
1574         else if (item_flags & l4m)
1575                 return rte_flow_error_set(error, EINVAL,
1576                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1577                                           "L3 cannot follow an L4 layer.");
1578         else if ((item_flags & MLX5_FLOW_LAYER_NVGRE) &&
1579                   !(item_flags & MLX5_FLOW_LAYER_INNER_L2))
1580                 return rte_flow_error_set(error, EINVAL,
1581                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1582                                           "L3 cannot follow an NVGRE layer.");
1583         if (!mask)
1584                 mask = &rte_flow_item_ipv4_mask;
1585         else if (mask->hdr.next_proto_id != 0 &&
1586                  mask->hdr.next_proto_id != 0xff)
1587                 return rte_flow_error_set(error, EINVAL,
1588                                           RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
1589                                           "partial mask is not supported"
1590                                           " for protocol");
1591         ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
1592                                         acc_mask ? (const uint8_t *)acc_mask
1593                                                  : (const uint8_t *)&nic_mask,
1594                                         sizeof(struct rte_flow_item_ipv4),
1595                                         error);
1596         if (ret < 0)
1597                 return ret;
1598         return 0;
1599 }
1600
1601 /**
1602  * Validate IPV6 item.
1603  *
1604  * @param[in] item
1605  *   Item specification.
1606  * @param[in] item_flags
1607  *   Bit-fields that holds the items detected until now.
1608  * @param[in] acc_mask
1609  *   Acceptable mask, if NULL default internal default mask
1610  *   will be used to check whether item fields are supported.
1611  * @param[out] error
1612  *   Pointer to error structure.
1613  *
1614  * @return
1615  *   0 on success, a negative errno value otherwise and rte_errno is set.
1616  */
1617 int
1618 mlx5_flow_validate_item_ipv6(const struct rte_flow_item *item,
1619                              uint64_t item_flags,
1620                              uint64_t last_item,
1621                              uint16_t ether_type,
1622                              const struct rte_flow_item_ipv6 *acc_mask,
1623                              struct rte_flow_error *error)
1624 {
1625         const struct rte_flow_item_ipv6 *mask = item->mask;
1626         const struct rte_flow_item_ipv6 *spec = item->spec;
1627         const struct rte_flow_item_ipv6 nic_mask = {
1628                 .hdr = {
1629                         .src_addr =
1630                                 "\xff\xff\xff\xff\xff\xff\xff\xff"
1631                                 "\xff\xff\xff\xff\xff\xff\xff\xff",
1632                         .dst_addr =
1633                                 "\xff\xff\xff\xff\xff\xff\xff\xff"
1634                                 "\xff\xff\xff\xff\xff\xff\xff\xff",
1635                         .vtc_flow = RTE_BE32(0xffffffff),
1636                         .proto = 0xff,
1637                         .hop_limits = 0xff,
1638                 },
1639         };
1640         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1641         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
1642                                       MLX5_FLOW_LAYER_OUTER_L3;
1643         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
1644                                       MLX5_FLOW_LAYER_OUTER_L4;
1645         int ret;
1646         uint8_t next_proto = 0xFF;
1647         const uint64_t l2_vlan = (MLX5_FLOW_LAYER_L2 |
1648                                   MLX5_FLOW_LAYER_OUTER_VLAN |
1649                                   MLX5_FLOW_LAYER_INNER_VLAN);
1650
1651         if ((last_item & l2_vlan) && ether_type &&
1652             ether_type != RTE_ETHER_TYPE_IPV6)
1653                 return rte_flow_error_set(error, EINVAL,
1654                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1655                                           "IPv6 cannot follow L2/VLAN layer "
1656                                           "which ether type is not IPv6");
1657         if (item_flags & MLX5_FLOW_LAYER_IPV6_ENCAP) {
1658                 if (mask && spec)
1659                         next_proto = mask->hdr.proto & spec->hdr.proto;
1660                 if (next_proto == IPPROTO_IPIP || next_proto == IPPROTO_IPV6)
1661                         return rte_flow_error_set(error, EINVAL,
1662                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1663                                                   item,
1664                                                   "multiple tunnel "
1665                                                   "not supported");
1666         }
1667         if (item_flags & MLX5_FLOW_LAYER_IPIP)
1668                 return rte_flow_error_set(error, EINVAL,
1669                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1670                                           "wrong tunnel type - IPv4 specified "
1671                                           "but IPv6 item provided");
1672         if (item_flags & l3m)
1673                 return rte_flow_error_set(error, ENOTSUP,
1674                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1675                                           "multiple L3 layers not supported");
1676         else if (item_flags & l4m)
1677                 return rte_flow_error_set(error, EINVAL,
1678                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1679                                           "L3 cannot follow an L4 layer.");
1680         else if ((item_flags & MLX5_FLOW_LAYER_NVGRE) &&
1681                   !(item_flags & MLX5_FLOW_LAYER_INNER_L2))
1682                 return rte_flow_error_set(error, EINVAL,
1683                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1684                                           "L3 cannot follow an NVGRE layer.");
1685         if (!mask)
1686                 mask = &rte_flow_item_ipv6_mask;
1687         ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
1688                                         acc_mask ? (const uint8_t *)acc_mask
1689                                                  : (const uint8_t *)&nic_mask,
1690                                         sizeof(struct rte_flow_item_ipv6),
1691                                         error);
1692         if (ret < 0)
1693                 return ret;
1694         return 0;
1695 }
1696
1697 /**
1698  * Validate UDP item.
1699  *
1700  * @param[in] item
1701  *   Item specification.
1702  * @param[in] item_flags
1703  *   Bit-fields that holds the items detected until now.
1704  * @param[in] target_protocol
1705  *   The next protocol in the previous item.
1706  * @param[in] flow_mask
1707  *   mlx5 flow-specific (DV, verbs, etc.) supported header fields mask.
1708  * @param[out] error
1709  *   Pointer to error structure.
1710  *
1711  * @return
1712  *   0 on success, a negative errno value otherwise and rte_errno is set.
1713  */
1714 int
1715 mlx5_flow_validate_item_udp(const struct rte_flow_item *item,
1716                             uint64_t item_flags,
1717                             uint8_t target_protocol,
1718                             struct rte_flow_error *error)
1719 {
1720         const struct rte_flow_item_udp *mask = item->mask;
1721         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1722         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
1723                                       MLX5_FLOW_LAYER_OUTER_L3;
1724         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
1725                                       MLX5_FLOW_LAYER_OUTER_L4;
1726         int ret;
1727
1728         if (target_protocol != 0xff && target_protocol != IPPROTO_UDP)
1729                 return rte_flow_error_set(error, EINVAL,
1730                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1731                                           "protocol filtering not compatible"
1732                                           " with UDP layer");
1733         if (!(item_flags & l3m))
1734                 return rte_flow_error_set(error, EINVAL,
1735                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1736                                           "L3 is mandatory to filter on L4");
1737         if (item_flags & l4m)
1738                 return rte_flow_error_set(error, EINVAL,
1739                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1740                                           "multiple L4 layers not supported");
1741         if (!mask)
1742                 mask = &rte_flow_item_udp_mask;
1743         ret = mlx5_flow_item_acceptable
1744                 (item, (const uint8_t *)mask,
1745                  (const uint8_t *)&rte_flow_item_udp_mask,
1746                  sizeof(struct rte_flow_item_udp), error);
1747         if (ret < 0)
1748                 return ret;
1749         return 0;
1750 }
1751
1752 /**
1753  * Validate TCP item.
1754  *
1755  * @param[in] item
1756  *   Item specification.
1757  * @param[in] item_flags
1758  *   Bit-fields that holds the items detected until now.
1759  * @param[in] target_protocol
1760  *   The next protocol in the previous item.
1761  * @param[out] error
1762  *   Pointer to error structure.
1763  *
1764  * @return
1765  *   0 on success, a negative errno value otherwise and rte_errno is set.
1766  */
1767 int
1768 mlx5_flow_validate_item_tcp(const struct rte_flow_item *item,
1769                             uint64_t item_flags,
1770                             uint8_t target_protocol,
1771                             const struct rte_flow_item_tcp *flow_mask,
1772                             struct rte_flow_error *error)
1773 {
1774         const struct rte_flow_item_tcp *mask = item->mask;
1775         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1776         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
1777                                       MLX5_FLOW_LAYER_OUTER_L3;
1778         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
1779                                       MLX5_FLOW_LAYER_OUTER_L4;
1780         int ret;
1781
1782         assert(flow_mask);
1783         if (target_protocol != 0xff && target_protocol != IPPROTO_TCP)
1784                 return rte_flow_error_set(error, EINVAL,
1785                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1786                                           "protocol filtering not compatible"
1787                                           " with TCP layer");
1788         if (!(item_flags & l3m))
1789                 return rte_flow_error_set(error, EINVAL,
1790                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1791                                           "L3 is mandatory to filter on L4");
1792         if (item_flags & l4m)
1793                 return rte_flow_error_set(error, EINVAL,
1794                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1795                                           "multiple L4 layers not supported");
1796         if (!mask)
1797                 mask = &rte_flow_item_tcp_mask;
1798         ret = mlx5_flow_item_acceptable
1799                 (item, (const uint8_t *)mask,
1800                  (const uint8_t *)flow_mask,
1801                  sizeof(struct rte_flow_item_tcp), error);
1802         if (ret < 0)
1803                 return ret;
1804         return 0;
1805 }
1806
1807 /**
1808  * Validate VXLAN item.
1809  *
1810  * @param[in] item
1811  *   Item specification.
1812  * @param[in] item_flags
1813  *   Bit-fields that holds the items detected until now.
1814  * @param[in] target_protocol
1815  *   The next protocol in the previous item.
1816  * @param[out] error
1817  *   Pointer to error structure.
1818  *
1819  * @return
1820  *   0 on success, a negative errno value otherwise and rte_errno is set.
1821  */
1822 int
1823 mlx5_flow_validate_item_vxlan(const struct rte_flow_item *item,
1824                               uint64_t item_flags,
1825                               struct rte_flow_error *error)
1826 {
1827         const struct rte_flow_item_vxlan *spec = item->spec;
1828         const struct rte_flow_item_vxlan *mask = item->mask;
1829         int ret;
1830         union vni {
1831                 uint32_t vlan_id;
1832                 uint8_t vni[4];
1833         } id = { .vlan_id = 0, };
1834         uint32_t vlan_id = 0;
1835
1836
1837         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
1838                 return rte_flow_error_set(error, ENOTSUP,
1839                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1840                                           "multiple tunnel layers not"
1841                                           " supported");
1842         /*
1843          * Verify only UDPv4 is present as defined in
1844          * https://tools.ietf.org/html/rfc7348
1845          */
1846         if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
1847                 return rte_flow_error_set(error, EINVAL,
1848                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1849                                           "no outer UDP layer found");
1850         if (!mask)
1851                 mask = &rte_flow_item_vxlan_mask;
1852         ret = mlx5_flow_item_acceptable
1853                 (item, (const uint8_t *)mask,
1854                  (const uint8_t *)&rte_flow_item_vxlan_mask,
1855                  sizeof(struct rte_flow_item_vxlan),
1856                  error);
1857         if (ret < 0)
1858                 return ret;
1859         if (spec) {
1860                 memcpy(&id.vni[1], spec->vni, 3);
1861                 vlan_id = id.vlan_id;
1862                 memcpy(&id.vni[1], mask->vni, 3);
1863                 vlan_id &= id.vlan_id;
1864         }
1865         /*
1866          * Tunnel id 0 is equivalent as not adding a VXLAN layer, if
1867          * only this layer is defined in the Verbs specification it is
1868          * interpreted as wildcard and all packets will match this
1869          * rule, if it follows a full stack layer (ex: eth / ipv4 /
1870          * udp), all packets matching the layers before will also
1871          * match this rule.  To avoid such situation, VNI 0 is
1872          * currently refused.
1873          */
1874         if (!vlan_id)
1875                 return rte_flow_error_set(error, ENOTSUP,
1876                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1877                                           "VXLAN vni cannot be 0");
1878         if (!(item_flags & MLX5_FLOW_LAYER_OUTER))
1879                 return rte_flow_error_set(error, ENOTSUP,
1880                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1881                                           "VXLAN tunnel must be fully defined");
1882         return 0;
1883 }
1884
1885 /**
1886  * Validate VXLAN_GPE item.
1887  *
1888  * @param[in] item
1889  *   Item specification.
1890  * @param[in] item_flags
1891  *   Bit-fields that holds the items detected until now.
1892  * @param[in] priv
1893  *   Pointer to the private data structure.
1894  * @param[in] target_protocol
1895  *   The next protocol in the previous item.
1896  * @param[out] error
1897  *   Pointer to error structure.
1898  *
1899  * @return
1900  *   0 on success, a negative errno value otherwise and rte_errno is set.
1901  */
1902 int
1903 mlx5_flow_validate_item_vxlan_gpe(const struct rte_flow_item *item,
1904                                   uint64_t item_flags,
1905                                   struct rte_eth_dev *dev,
1906                                   struct rte_flow_error *error)
1907 {
1908         struct mlx5_priv *priv = dev->data->dev_private;
1909         const struct rte_flow_item_vxlan_gpe *spec = item->spec;
1910         const struct rte_flow_item_vxlan_gpe *mask = item->mask;
1911         int ret;
1912         union vni {
1913                 uint32_t vlan_id;
1914                 uint8_t vni[4];
1915         } id = { .vlan_id = 0, };
1916         uint32_t vlan_id = 0;
1917
1918         if (!priv->config.l3_vxlan_en)
1919                 return rte_flow_error_set(error, ENOTSUP,
1920                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1921                                           "L3 VXLAN is not enabled by device"
1922                                           " parameter and/or not configured in"
1923                                           " firmware");
1924         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
1925                 return rte_flow_error_set(error, ENOTSUP,
1926                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1927                                           "multiple tunnel layers not"
1928                                           " supported");
1929         /*
1930          * Verify only UDPv4 is present as defined in
1931          * https://tools.ietf.org/html/rfc7348
1932          */
1933         if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
1934                 return rte_flow_error_set(error, EINVAL,
1935                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1936                                           "no outer UDP layer found");
1937         if (!mask)
1938                 mask = &rte_flow_item_vxlan_gpe_mask;
1939         ret = mlx5_flow_item_acceptable
1940                 (item, (const uint8_t *)mask,
1941                  (const uint8_t *)&rte_flow_item_vxlan_gpe_mask,
1942                  sizeof(struct rte_flow_item_vxlan_gpe),
1943                  error);
1944         if (ret < 0)
1945                 return ret;
1946         if (spec) {
1947                 if (spec->protocol)
1948                         return rte_flow_error_set(error, ENOTSUP,
1949                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1950                                                   item,
1951                                                   "VxLAN-GPE protocol"
1952                                                   " not supported");
1953                 memcpy(&id.vni[1], spec->vni, 3);
1954                 vlan_id = id.vlan_id;
1955                 memcpy(&id.vni[1], mask->vni, 3);
1956                 vlan_id &= id.vlan_id;
1957         }
1958         /*
1959          * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
1960          * layer is defined in the Verbs specification it is interpreted as
1961          * wildcard and all packets will match this rule, if it follows a full
1962          * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
1963          * before will also match this rule.  To avoid such situation, VNI 0
1964          * is currently refused.
1965          */
1966         if (!vlan_id)
1967                 return rte_flow_error_set(error, ENOTSUP,
1968                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1969                                           "VXLAN-GPE vni cannot be 0");
1970         if (!(item_flags & MLX5_FLOW_LAYER_OUTER))
1971                 return rte_flow_error_set(error, ENOTSUP,
1972                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1973                                           "VXLAN-GPE tunnel must be fully"
1974                                           " defined");
1975         return 0;
1976 }
1977 /**
1978  * Validate GRE Key item.
1979  *
1980  * @param[in] item
1981  *   Item specification.
1982  * @param[in] item_flags
1983  *   Bit flags to mark detected items.
1984  * @param[in] gre_item
1985  *   Pointer to gre_item
1986  * @param[out] error
1987  *   Pointer to error structure.
1988  *
1989  * @return
1990  *   0 on success, a negative errno value otherwise and rte_errno is set.
1991  */
1992 int
1993 mlx5_flow_validate_item_gre_key(const struct rte_flow_item *item,
1994                                 uint64_t item_flags,
1995                                 const struct rte_flow_item *gre_item,
1996                                 struct rte_flow_error *error)
1997 {
1998         const rte_be32_t *mask = item->mask;
1999         int ret = 0;
2000         rte_be32_t gre_key_default_mask = RTE_BE32(UINT32_MAX);
2001         const struct rte_flow_item_gre *gre_spec;
2002         const struct rte_flow_item_gre *gre_mask;
2003
2004         if (item_flags & MLX5_FLOW_LAYER_GRE_KEY)
2005                 return rte_flow_error_set(error, ENOTSUP,
2006                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2007                                           "Multiple GRE key not support");
2008         if (!(item_flags & MLX5_FLOW_LAYER_GRE))
2009                 return rte_flow_error_set(error, ENOTSUP,
2010                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2011                                           "No preceding GRE header");
2012         if (item_flags & MLX5_FLOW_LAYER_INNER)
2013                 return rte_flow_error_set(error, ENOTSUP,
2014                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2015                                           "GRE key following a wrong item");
2016         gre_mask = gre_item->mask;
2017         if (!gre_mask)
2018                 gre_mask = &rte_flow_item_gre_mask;
2019         gre_spec = gre_item->spec;
2020         if (gre_spec && (gre_mask->c_rsvd0_ver & RTE_BE16(0x2000)) &&
2021                          !(gre_spec->c_rsvd0_ver & RTE_BE16(0x2000)))
2022                 return rte_flow_error_set(error, EINVAL,
2023                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2024                                           "Key bit must be on");
2025
2026         if (!mask)
2027                 mask = &gre_key_default_mask;
2028         ret = mlx5_flow_item_acceptable
2029                 (item, (const uint8_t *)mask,
2030                  (const uint8_t *)&gre_key_default_mask,
2031                  sizeof(rte_be32_t), error);
2032         return ret;
2033 }
2034
2035 /**
2036  * Validate GRE item.
2037  *
2038  * @param[in] item
2039  *   Item specification.
2040  * @param[in] item_flags
2041  *   Bit flags to mark detected items.
2042  * @param[in] target_protocol
2043  *   The next protocol in the previous item.
2044  * @param[out] error
2045  *   Pointer to error structure.
2046  *
2047  * @return
2048  *   0 on success, a negative errno value otherwise and rte_errno is set.
2049  */
2050 int
2051 mlx5_flow_validate_item_gre(const struct rte_flow_item *item,
2052                             uint64_t item_flags,
2053                             uint8_t target_protocol,
2054                             struct rte_flow_error *error)
2055 {
2056         const struct rte_flow_item_gre *spec __rte_unused = item->spec;
2057         const struct rte_flow_item_gre *mask = item->mask;
2058         int ret;
2059         const struct rte_flow_item_gre nic_mask = {
2060                 .c_rsvd0_ver = RTE_BE16(0xB000),
2061                 .protocol = RTE_BE16(UINT16_MAX),
2062         };
2063
2064         if (target_protocol != 0xff && target_protocol != IPPROTO_GRE)
2065                 return rte_flow_error_set(error, EINVAL,
2066                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2067                                           "protocol filtering not compatible"
2068                                           " with this GRE layer");
2069         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2070                 return rte_flow_error_set(error, ENOTSUP,
2071                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2072                                           "multiple tunnel layers not"
2073                                           " supported");
2074         if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3))
2075                 return rte_flow_error_set(error, ENOTSUP,
2076                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2077                                           "L3 Layer is missing");
2078         if (!mask)
2079                 mask = &rte_flow_item_gre_mask;
2080         ret = mlx5_flow_item_acceptable
2081                 (item, (const uint8_t *)mask,
2082                  (const uint8_t *)&nic_mask,
2083                  sizeof(struct rte_flow_item_gre), error);
2084         if (ret < 0)
2085                 return ret;
2086 #ifndef HAVE_MLX5DV_DR
2087 #ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT
2088         if (spec && (spec->protocol & mask->protocol))
2089                 return rte_flow_error_set(error, ENOTSUP,
2090                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2091                                           "without MPLS support the"
2092                                           " specification cannot be used for"
2093                                           " filtering");
2094 #endif
2095 #endif
2096         return 0;
2097 }
2098
2099 /**
2100  * Validate Geneve item.
2101  *
2102  * @param[in] item
2103  *   Item specification.
2104  * @param[in] itemFlags
2105  *   Bit-fields that holds the items detected until now.
2106  * @param[in] enPriv
2107  *   Pointer to the private data structure.
2108  * @param[out] error
2109  *   Pointer to error structure.
2110  *
2111  * @return
2112  *   0 on success, a negative errno value otherwise and rte_errno is set.
2113  */
2114
2115 int
2116 mlx5_flow_validate_item_geneve(const struct rte_flow_item *item,
2117                                uint64_t item_flags,
2118                                struct rte_eth_dev *dev,
2119                                struct rte_flow_error *error)
2120 {
2121         struct mlx5_priv *priv = dev->data->dev_private;
2122         const struct rte_flow_item_geneve *spec = item->spec;
2123         const struct rte_flow_item_geneve *mask = item->mask;
2124         int ret;
2125         uint16_t gbhdr;
2126         uint8_t opt_len = priv->config.hca_attr.geneve_max_opt_len ?
2127                           MLX5_GENEVE_OPT_LEN_1 : MLX5_GENEVE_OPT_LEN_0;
2128         const struct rte_flow_item_geneve nic_mask = {
2129                 .ver_opt_len_o_c_rsvd0 = RTE_BE16(0x3f80),
2130                 .vni = "\xff\xff\xff",
2131                 .protocol = RTE_BE16(UINT16_MAX),
2132         };
2133
2134         if (!(priv->config.hca_attr.flex_parser_protocols &
2135               MLX5_HCA_FLEX_GENEVE_ENABLED) ||
2136             !priv->config.hca_attr.tunnel_stateless_geneve_rx)
2137                 return rte_flow_error_set(error, ENOTSUP,
2138                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2139                                           "L3 Geneve is not enabled by device"
2140                                           " parameter and/or not configured in"
2141                                           " firmware");
2142         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2143                 return rte_flow_error_set(error, ENOTSUP,
2144                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2145                                           "multiple tunnel layers not"
2146                                           " supported");
2147         /*
2148          * Verify only UDPv4 is present as defined in
2149          * https://tools.ietf.org/html/rfc7348
2150          */
2151         if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
2152                 return rte_flow_error_set(error, EINVAL,
2153                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2154                                           "no outer UDP layer found");
2155         if (!mask)
2156                 mask = &rte_flow_item_geneve_mask;
2157         ret = mlx5_flow_item_acceptable
2158                                   (item, (const uint8_t *)mask,
2159                                    (const uint8_t *)&nic_mask,
2160                                    sizeof(struct rte_flow_item_geneve), error);
2161         if (ret)
2162                 return ret;
2163         if (spec) {
2164                 gbhdr = rte_be_to_cpu_16(spec->ver_opt_len_o_c_rsvd0);
2165                 if (MLX5_GENEVE_VER_VAL(gbhdr) ||
2166                      MLX5_GENEVE_CRITO_VAL(gbhdr) ||
2167                      MLX5_GENEVE_RSVD_VAL(gbhdr) || spec->rsvd1)
2168                         return rte_flow_error_set(error, ENOTSUP,
2169                                                   RTE_FLOW_ERROR_TYPE_ITEM,
2170                                                   item,
2171                                                   "Geneve protocol unsupported"
2172                                                   " fields are being used");
2173                 if (MLX5_GENEVE_OPTLEN_VAL(gbhdr) > opt_len)
2174                         return rte_flow_error_set
2175                                         (error, ENOTSUP,
2176                                          RTE_FLOW_ERROR_TYPE_ITEM,
2177                                          item,
2178                                          "Unsupported Geneve options length");
2179         }
2180         if (!(item_flags & MLX5_FLOW_LAYER_OUTER))
2181                 return rte_flow_error_set
2182                                     (error, ENOTSUP,
2183                                      RTE_FLOW_ERROR_TYPE_ITEM, item,
2184                                      "Geneve tunnel must be fully defined");
2185         return 0;
2186 }
2187
2188 /**
2189  * Validate MPLS item.
2190  *
2191  * @param[in] dev
2192  *   Pointer to the rte_eth_dev structure.
2193  * @param[in] item
2194  *   Item specification.
2195  * @param[in] item_flags
2196  *   Bit-fields that holds the items detected until now.
2197  * @param[in] prev_layer
2198  *   The protocol layer indicated in previous item.
2199  * @param[out] error
2200  *   Pointer to error structure.
2201  *
2202  * @return
2203  *   0 on success, a negative errno value otherwise and rte_errno is set.
2204  */
2205 int
2206 mlx5_flow_validate_item_mpls(struct rte_eth_dev *dev __rte_unused,
2207                              const struct rte_flow_item *item __rte_unused,
2208                              uint64_t item_flags __rte_unused,
2209                              uint64_t prev_layer __rte_unused,
2210                              struct rte_flow_error *error)
2211 {
2212 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
2213         const struct rte_flow_item_mpls *mask = item->mask;
2214         struct mlx5_priv *priv = dev->data->dev_private;
2215         int ret;
2216
2217         if (!priv->config.mpls_en)
2218                 return rte_flow_error_set(error, ENOTSUP,
2219                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2220                                           "MPLS not supported or"
2221                                           " disabled in firmware"
2222                                           " configuration.");
2223         /* MPLS over IP, UDP, GRE is allowed */
2224         if (!(prev_layer & (MLX5_FLOW_LAYER_OUTER_L3 |
2225                             MLX5_FLOW_LAYER_OUTER_L4_UDP |
2226                             MLX5_FLOW_LAYER_GRE)))
2227                 return rte_flow_error_set(error, EINVAL,
2228                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2229                                           "protocol filtering not compatible"
2230                                           " with MPLS layer");
2231         /* Multi-tunnel isn't allowed but MPLS over GRE is an exception. */
2232         if ((item_flags & MLX5_FLOW_LAYER_TUNNEL) &&
2233             !(item_flags & MLX5_FLOW_LAYER_GRE))
2234                 return rte_flow_error_set(error, ENOTSUP,
2235                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2236                                           "multiple tunnel layers not"
2237                                           " supported");
2238         if (!mask)
2239                 mask = &rte_flow_item_mpls_mask;
2240         ret = mlx5_flow_item_acceptable
2241                 (item, (const uint8_t *)mask,
2242                  (const uint8_t *)&rte_flow_item_mpls_mask,
2243                  sizeof(struct rte_flow_item_mpls), error);
2244         if (ret < 0)
2245                 return ret;
2246         return 0;
2247 #endif
2248         return rte_flow_error_set(error, ENOTSUP,
2249                                   RTE_FLOW_ERROR_TYPE_ITEM, item,
2250                                   "MPLS is not supported by Verbs, please"
2251                                   " update.");
2252 }
2253
2254 /**
2255  * Validate NVGRE item.
2256  *
2257  * @param[in] item
2258  *   Item specification.
2259  * @param[in] item_flags
2260  *   Bit flags to mark detected items.
2261  * @param[in] target_protocol
2262  *   The next protocol in the previous item.
2263  * @param[out] error
2264  *   Pointer to error structure.
2265  *
2266  * @return
2267  *   0 on success, a negative errno value otherwise and rte_errno is set.
2268  */
2269 int
2270 mlx5_flow_validate_item_nvgre(const struct rte_flow_item *item,
2271                               uint64_t item_flags,
2272                               uint8_t target_protocol,
2273                               struct rte_flow_error *error)
2274 {
2275         const struct rte_flow_item_nvgre *mask = item->mask;
2276         int ret;
2277
2278         if (target_protocol != 0xff && target_protocol != IPPROTO_GRE)
2279                 return rte_flow_error_set(error, EINVAL,
2280                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2281                                           "protocol filtering not compatible"
2282                                           " with this GRE layer");
2283         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2284                 return rte_flow_error_set(error, ENOTSUP,
2285                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2286                                           "multiple tunnel layers not"
2287                                           " supported");
2288         if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3))
2289                 return rte_flow_error_set(error, ENOTSUP,
2290                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2291                                           "L3 Layer is missing");
2292         if (!mask)
2293                 mask = &rte_flow_item_nvgre_mask;
2294         ret = mlx5_flow_item_acceptable
2295                 (item, (const uint8_t *)mask,
2296                  (const uint8_t *)&rte_flow_item_nvgre_mask,
2297                  sizeof(struct rte_flow_item_nvgre), error);
2298         if (ret < 0)
2299                 return ret;
2300         return 0;
2301 }
2302
2303 /* Allocate unique ID for the split Q/RSS subflows. */
2304 static uint32_t
2305 flow_qrss_get_id(struct rte_eth_dev *dev)
2306 {
2307         struct mlx5_priv *priv = dev->data->dev_private;
2308         uint32_t qrss_id, ret;
2309
2310         ret = mlx5_flow_id_get(priv->qrss_id_pool, &qrss_id);
2311         if (ret)
2312                 return 0;
2313         assert(qrss_id);
2314         return qrss_id;
2315 }
2316
2317 /* Free unique ID for the split Q/RSS subflows. */
2318 static void
2319 flow_qrss_free_id(struct rte_eth_dev *dev,  uint32_t qrss_id)
2320 {
2321         struct mlx5_priv *priv = dev->data->dev_private;
2322
2323         if (qrss_id)
2324                 mlx5_flow_id_release(priv->qrss_id_pool, qrss_id);
2325 }
2326
2327 /**
2328  * Release resource related QUEUE/RSS action split.
2329  *
2330  * @param dev
2331  *   Pointer to Ethernet device.
2332  * @param flow
2333  *   Flow to release id's from.
2334  */
2335 static void
2336 flow_mreg_split_qrss_release(struct rte_eth_dev *dev,
2337                              struct rte_flow *flow)
2338 {
2339         struct mlx5_flow *dev_flow;
2340
2341         LIST_FOREACH(dev_flow, &flow->dev_flows, next)
2342                 if (dev_flow->qrss_id)
2343                         flow_qrss_free_id(dev, dev_flow->qrss_id);
2344 }
2345
2346 static int
2347 flow_null_validate(struct rte_eth_dev *dev __rte_unused,
2348                    const struct rte_flow_attr *attr __rte_unused,
2349                    const struct rte_flow_item items[] __rte_unused,
2350                    const struct rte_flow_action actions[] __rte_unused,
2351                    bool external __rte_unused,
2352                    struct rte_flow_error *error)
2353 {
2354         return rte_flow_error_set(error, ENOTSUP,
2355                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
2356 }
2357
2358 static struct mlx5_flow *
2359 flow_null_prepare(const struct rte_flow_attr *attr __rte_unused,
2360                   const struct rte_flow_item items[] __rte_unused,
2361                   const struct rte_flow_action actions[] __rte_unused,
2362                   struct rte_flow_error *error)
2363 {
2364         rte_flow_error_set(error, ENOTSUP,
2365                            RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
2366         return NULL;
2367 }
2368
2369 static int
2370 flow_null_translate(struct rte_eth_dev *dev __rte_unused,
2371                     struct mlx5_flow *dev_flow __rte_unused,
2372                     const struct rte_flow_attr *attr __rte_unused,
2373                     const struct rte_flow_item items[] __rte_unused,
2374                     const struct rte_flow_action actions[] __rte_unused,
2375                     struct rte_flow_error *error)
2376 {
2377         return rte_flow_error_set(error, ENOTSUP,
2378                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
2379 }
2380
2381 static int
2382 flow_null_apply(struct rte_eth_dev *dev __rte_unused,
2383                 struct rte_flow *flow __rte_unused,
2384                 struct rte_flow_error *error)
2385 {
2386         return rte_flow_error_set(error, ENOTSUP,
2387                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
2388 }
2389
2390 static void
2391 flow_null_remove(struct rte_eth_dev *dev __rte_unused,
2392                  struct rte_flow *flow __rte_unused)
2393 {
2394 }
2395
2396 static void
2397 flow_null_destroy(struct rte_eth_dev *dev __rte_unused,
2398                   struct rte_flow *flow __rte_unused)
2399 {
2400 }
2401
2402 static int
2403 flow_null_query(struct rte_eth_dev *dev __rte_unused,
2404                 struct rte_flow *flow __rte_unused,
2405                 const struct rte_flow_action *actions __rte_unused,
2406                 void *data __rte_unused,
2407                 struct rte_flow_error *error)
2408 {
2409         return rte_flow_error_set(error, ENOTSUP,
2410                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
2411 }
2412
2413 /* Void driver to protect from null pointer reference. */
2414 const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops = {
2415         .validate = flow_null_validate,
2416         .prepare = flow_null_prepare,
2417         .translate = flow_null_translate,
2418         .apply = flow_null_apply,
2419         .remove = flow_null_remove,
2420         .destroy = flow_null_destroy,
2421         .query = flow_null_query,
2422 };
2423
2424 /**
2425  * Select flow driver type according to flow attributes and device
2426  * configuration.
2427  *
2428  * @param[in] dev
2429  *   Pointer to the dev structure.
2430  * @param[in] attr
2431  *   Pointer to the flow attributes.
2432  *
2433  * @return
2434  *   flow driver type, MLX5_FLOW_TYPE_MAX otherwise.
2435  */
2436 static enum mlx5_flow_drv_type
2437 flow_get_drv_type(struct rte_eth_dev *dev, const struct rte_flow_attr *attr)
2438 {
2439         struct mlx5_priv *priv = dev->data->dev_private;
2440         enum mlx5_flow_drv_type type = MLX5_FLOW_TYPE_MAX;
2441
2442         if (attr->transfer && priv->config.dv_esw_en)
2443                 type = MLX5_FLOW_TYPE_DV;
2444         if (!attr->transfer)
2445                 type = priv->config.dv_flow_en ? MLX5_FLOW_TYPE_DV :
2446                                                  MLX5_FLOW_TYPE_VERBS;
2447         return type;
2448 }
2449
2450 #define flow_get_drv_ops(type) flow_drv_ops[type]
2451
2452 /**
2453  * Flow driver validation API. This abstracts calling driver specific functions.
2454  * The type of flow driver is determined according to flow attributes.
2455  *
2456  * @param[in] dev
2457  *   Pointer to the dev structure.
2458  * @param[in] attr
2459  *   Pointer to the flow attributes.
2460  * @param[in] items
2461  *   Pointer to the list of items.
2462  * @param[in] actions
2463  *   Pointer to the list of actions.
2464  * @param[in] external
2465  *   This flow rule is created by request external to PMD.
2466  * @param[out] error
2467  *   Pointer to the error structure.
2468  *
2469  * @return
2470  *   0 on success, a negative errno value otherwise and rte_errno is set.
2471  */
2472 static inline int
2473 flow_drv_validate(struct rte_eth_dev *dev,
2474                   const struct rte_flow_attr *attr,
2475                   const struct rte_flow_item items[],
2476                   const struct rte_flow_action actions[],
2477                   bool external, struct rte_flow_error *error)
2478 {
2479         const struct mlx5_flow_driver_ops *fops;
2480         enum mlx5_flow_drv_type type = flow_get_drv_type(dev, attr);
2481
2482         fops = flow_get_drv_ops(type);
2483         return fops->validate(dev, attr, items, actions, external, error);
2484 }
2485
2486 /**
2487  * Flow driver preparation API. This abstracts calling driver specific
2488  * functions. Parent flow (rte_flow) should have driver type (drv_type). It
2489  * calculates the size of memory required for device flow, allocates the memory,
2490  * initializes the device flow and returns the pointer.
2491  *
2492  * @note
2493  *   This function initializes device flow structure such as dv or verbs in
2494  *   struct mlx5_flow. However, it is caller's responsibility to initialize the
2495  *   rest. For example, adding returning device flow to flow->dev_flow list and
2496  *   setting backward reference to the flow should be done out of this function.
2497  *   layers field is not filled either.
2498  *
2499  * @param[in] attr
2500  *   Pointer to the flow attributes.
2501  * @param[in] items
2502  *   Pointer to the list of items.
2503  * @param[in] actions
2504  *   Pointer to the list of actions.
2505  * @param[out] error
2506  *   Pointer to the error structure.
2507  *
2508  * @return
2509  *   Pointer to device flow on success, otherwise NULL and rte_errno is set.
2510  */
2511 static inline struct mlx5_flow *
2512 flow_drv_prepare(const struct rte_flow *flow,
2513                  const struct rte_flow_attr *attr,
2514                  const struct rte_flow_item items[],
2515                  const struct rte_flow_action actions[],
2516                  struct rte_flow_error *error)
2517 {
2518         const struct mlx5_flow_driver_ops *fops;
2519         enum mlx5_flow_drv_type type = flow->drv_type;
2520
2521         assert(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
2522         fops = flow_get_drv_ops(type);
2523         return fops->prepare(attr, items, actions, error);
2524 }
2525
2526 /**
2527  * Flow driver translation API. This abstracts calling driver specific
2528  * functions. Parent flow (rte_flow) should have driver type (drv_type). It
2529  * translates a generic flow into a driver flow. flow_drv_prepare() must
2530  * precede.
2531  *
2532  * @note
2533  *   dev_flow->layers could be filled as a result of parsing during translation
2534  *   if needed by flow_drv_apply(). dev_flow->flow->actions can also be filled
2535  *   if necessary. As a flow can have multiple dev_flows by RSS flow expansion,
2536  *   flow->actions could be overwritten even though all the expanded dev_flows
2537  *   have the same actions.
2538  *
2539  * @param[in] dev
2540  *   Pointer to the rte dev structure.
2541  * @param[in, out] dev_flow
2542  *   Pointer to the mlx5 flow.
2543  * @param[in] attr
2544  *   Pointer to the flow attributes.
2545  * @param[in] items
2546  *   Pointer to the list of items.
2547  * @param[in] actions
2548  *   Pointer to the list of actions.
2549  * @param[out] error
2550  *   Pointer to the error structure.
2551  *
2552  * @return
2553  *   0 on success, a negative errno value otherwise and rte_errno is set.
2554  */
2555 static inline int
2556 flow_drv_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
2557                    const struct rte_flow_attr *attr,
2558                    const struct rte_flow_item items[],
2559                    const struct rte_flow_action actions[],
2560                    struct rte_flow_error *error)
2561 {
2562         const struct mlx5_flow_driver_ops *fops;
2563         enum mlx5_flow_drv_type type = dev_flow->flow->drv_type;
2564
2565         assert(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
2566         fops = flow_get_drv_ops(type);
2567         return fops->translate(dev, dev_flow, attr, items, actions, error);
2568 }
2569
2570 /**
2571  * Flow driver apply API. This abstracts calling driver specific functions.
2572  * Parent flow (rte_flow) should have driver type (drv_type). It applies
2573  * translated driver flows on to device. flow_drv_translate() must precede.
2574  *
2575  * @param[in] dev
2576  *   Pointer to Ethernet device structure.
2577  * @param[in, out] flow
2578  *   Pointer to flow structure.
2579  * @param[out] error
2580  *   Pointer to error structure.
2581  *
2582  * @return
2583  *   0 on success, a negative errno value otherwise and rte_errno is set.
2584  */
2585 static inline int
2586 flow_drv_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
2587                struct rte_flow_error *error)
2588 {
2589         const struct mlx5_flow_driver_ops *fops;
2590         enum mlx5_flow_drv_type type = flow->drv_type;
2591
2592         assert(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
2593         fops = flow_get_drv_ops(type);
2594         return fops->apply(dev, flow, error);
2595 }
2596
2597 /**
2598  * Flow driver remove API. This abstracts calling driver specific functions.
2599  * Parent flow (rte_flow) should have driver type (drv_type). It removes a flow
2600  * on device. All the resources of the flow should be freed by calling
2601  * flow_drv_destroy().
2602  *
2603  * @param[in] dev
2604  *   Pointer to Ethernet device.
2605  * @param[in, out] flow
2606  *   Pointer to flow structure.
2607  */
2608 static inline void
2609 flow_drv_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
2610 {
2611         const struct mlx5_flow_driver_ops *fops;
2612         enum mlx5_flow_drv_type type = flow->drv_type;
2613
2614         assert(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
2615         fops = flow_get_drv_ops(type);
2616         fops->remove(dev, flow);
2617 }
2618
2619 /**
2620  * Flow driver destroy API. This abstracts calling driver specific functions.
2621  * Parent flow (rte_flow) should have driver type (drv_type). It removes a flow
2622  * on device and releases resources of the flow.
2623  *
2624  * @param[in] dev
2625  *   Pointer to Ethernet device.
2626  * @param[in, out] flow
2627  *   Pointer to flow structure.
2628  */
2629 static inline void
2630 flow_drv_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
2631 {
2632         const struct mlx5_flow_driver_ops *fops;
2633         enum mlx5_flow_drv_type type = flow->drv_type;
2634
2635         flow_mreg_split_qrss_release(dev, flow);
2636         assert(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
2637         fops = flow_get_drv_ops(type);
2638         fops->destroy(dev, flow);
2639 }
2640
2641 /**
2642  * Validate a flow supported by the NIC.
2643  *
2644  * @see rte_flow_validate()
2645  * @see rte_flow_ops
2646  */
2647 int
2648 mlx5_flow_validate(struct rte_eth_dev *dev,
2649                    const struct rte_flow_attr *attr,
2650                    const struct rte_flow_item items[],
2651                    const struct rte_flow_action actions[],
2652                    struct rte_flow_error *error)
2653 {
2654         int ret;
2655
2656         ret = flow_drv_validate(dev, attr, items, actions, true, error);
2657         if (ret < 0)
2658                 return ret;
2659         return 0;
2660 }
2661
2662 /**
2663  * Get port id item from the item list.
2664  *
2665  * @param[in] item
2666  *   Pointer to the list of items.
2667  *
2668  * @return
2669  *   Pointer to the port id item if exist, else return NULL.
2670  */
2671 static const struct rte_flow_item *
2672 find_port_id_item(const struct rte_flow_item *item)
2673 {
2674         assert(item);
2675         for (; item->type != RTE_FLOW_ITEM_TYPE_END; item++) {
2676                 if (item->type == RTE_FLOW_ITEM_TYPE_PORT_ID)
2677                         return item;
2678         }
2679         return NULL;
2680 }
2681
2682 /**
2683  * Get RSS action from the action list.
2684  *
2685  * @param[in] actions
2686  *   Pointer to the list of actions.
2687  *
2688  * @return
2689  *   Pointer to the RSS action if exist, else return NULL.
2690  */
2691 static const struct rte_flow_action_rss*
2692 flow_get_rss_action(const struct rte_flow_action actions[])
2693 {
2694         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
2695                 switch (actions->type) {
2696                 case RTE_FLOW_ACTION_TYPE_RSS:
2697                         return (const struct rte_flow_action_rss *)
2698                                actions->conf;
2699                 default:
2700                         break;
2701                 }
2702         }
2703         return NULL;
2704 }
2705
2706 static unsigned int
2707 find_graph_root(const struct rte_flow_item pattern[], uint32_t rss_level)
2708 {
2709         const struct rte_flow_item *item;
2710         unsigned int has_vlan = 0;
2711
2712         for (item = pattern; item->type != RTE_FLOW_ITEM_TYPE_END; item++) {
2713                 if (item->type == RTE_FLOW_ITEM_TYPE_VLAN) {
2714                         has_vlan = 1;
2715                         break;
2716                 }
2717         }
2718         if (has_vlan)
2719                 return rss_level < 2 ? MLX5_EXPANSION_ROOT_ETH_VLAN :
2720                                        MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN;
2721         return rss_level < 2 ? MLX5_EXPANSION_ROOT :
2722                                MLX5_EXPANSION_ROOT_OUTER;
2723 }
2724
2725 /**
2726  * Get QUEUE/RSS action from the action list.
2727  *
2728  * @param[in] actions
2729  *   Pointer to the list of actions.
2730  * @param[out] qrss
2731  *   Pointer to the return pointer.
2732  * @param[out] qrss_type
2733  *   Pointer to the action type to return. RTE_FLOW_ACTION_TYPE_END is returned
2734  *   if no QUEUE/RSS is found.
2735  *
2736  * @return
2737  *   Total number of actions.
2738  */
2739 static int
2740 flow_parse_qrss_action(const struct rte_flow_action actions[],
2741                        const struct rte_flow_action **qrss)
2742 {
2743         int actions_n = 0;
2744
2745         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
2746                 switch (actions->type) {
2747                 case RTE_FLOW_ACTION_TYPE_QUEUE:
2748                 case RTE_FLOW_ACTION_TYPE_RSS:
2749                         *qrss = actions;
2750                         break;
2751                 default:
2752                         break;
2753                 }
2754                 actions_n++;
2755         }
2756         /* Count RTE_FLOW_ACTION_TYPE_END. */
2757         return actions_n + 1;
2758 }
2759
2760 /**
2761  * Check meter action from the action list.
2762  *
2763  * @param[in] actions
2764  *   Pointer to the list of actions.
2765  * @param[out] mtr
2766  *   Pointer to the meter exist flag.
2767  *
2768  * @return
2769  *   Total number of actions.
2770  */
2771 static int
2772 flow_check_meter_action(const struct rte_flow_action actions[], uint32_t *mtr)
2773 {
2774         int actions_n = 0;
2775
2776         assert(mtr);
2777         *mtr = 0;
2778         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
2779                 switch (actions->type) {
2780                 case RTE_FLOW_ACTION_TYPE_METER:
2781                         *mtr = 1;
2782                         break;
2783                 default:
2784                         break;
2785                 }
2786                 actions_n++;
2787         }
2788         /* Count RTE_FLOW_ACTION_TYPE_END. */
2789         return actions_n + 1;
2790 }
2791
2792 /**
2793  * Check if the flow should be splited due to hairpin.
2794  * The reason for the split is that in current HW we can't
2795  * support encap on Rx, so if a flow have encap we move it
2796  * to Tx.
2797  *
2798  * @param dev
2799  *   Pointer to Ethernet device.
2800  * @param[in] attr
2801  *   Flow rule attributes.
2802  * @param[in] actions
2803  *   Associated actions (list terminated by the END action).
2804  *
2805  * @return
2806  *   > 0 the number of actions and the flow should be split,
2807  *   0 when no split required.
2808  */
2809 static int
2810 flow_check_hairpin_split(struct rte_eth_dev *dev,
2811                          const struct rte_flow_attr *attr,
2812                          const struct rte_flow_action actions[])
2813 {
2814         int queue_action = 0;
2815         int action_n = 0;
2816         int encap = 0;
2817         const struct rte_flow_action_queue *queue;
2818         const struct rte_flow_action_rss *rss;
2819         const struct rte_flow_action_raw_encap *raw_encap;
2820
2821         if (!attr->ingress)
2822                 return 0;
2823         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
2824                 switch (actions->type) {
2825                 case RTE_FLOW_ACTION_TYPE_QUEUE:
2826                         queue = actions->conf;
2827                         if (queue == NULL)
2828                                 return 0;
2829                         if (mlx5_rxq_get_type(dev, queue->index) !=
2830                             MLX5_RXQ_TYPE_HAIRPIN)
2831                                 return 0;
2832                         queue_action = 1;
2833                         action_n++;
2834                         break;
2835                 case RTE_FLOW_ACTION_TYPE_RSS:
2836                         rss = actions->conf;
2837                         if (rss == NULL || rss->queue_num == 0)
2838                                 return 0;
2839                         if (mlx5_rxq_get_type(dev, rss->queue[0]) !=
2840                             MLX5_RXQ_TYPE_HAIRPIN)
2841                                 return 0;
2842                         queue_action = 1;
2843                         action_n++;
2844                         break;
2845                 case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
2846                 case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
2847                         encap = 1;
2848                         action_n++;
2849                         break;
2850                 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
2851                         raw_encap = actions->conf;
2852                         if (raw_encap->size >
2853                             (sizeof(struct rte_flow_item_eth) +
2854                              sizeof(struct rte_flow_item_ipv4)))
2855                                 encap = 1;
2856                         action_n++;
2857                         break;
2858                 default:
2859                         action_n++;
2860                         break;
2861                 }
2862         }
2863         if (encap == 1 && queue_action)
2864                 return action_n;
2865         return 0;
2866 }
2867
2868 /* Declare flow create/destroy prototype in advance. */
2869 static struct rte_flow *
2870 flow_list_create(struct rte_eth_dev *dev, struct mlx5_flows *list,
2871                  const struct rte_flow_attr *attr,
2872                  const struct rte_flow_item items[],
2873                  const struct rte_flow_action actions[],
2874                  bool external, struct rte_flow_error *error);
2875
2876 static void
2877 flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list,
2878                   struct rte_flow *flow);
2879
2880 /**
2881  * Add a flow of copying flow metadata registers in RX_CP_TBL.
2882  *
2883  * As mark_id is unique, if there's already a registered flow for the mark_id,
2884  * return by increasing the reference counter of the resource. Otherwise, create
2885  * the resource (mcp_res) and flow.
2886  *
2887  * Flow looks like,
2888  *   - If ingress port is ANY and reg_c[1] is mark_id,
2889  *     flow_tag := mark_id, reg_b := reg_c[0] and jump to RX_ACT_TBL.
2890  *
2891  * For default flow (zero mark_id), flow is like,
2892  *   - If ingress port is ANY,
2893  *     reg_b := reg_c[0] and jump to RX_ACT_TBL.
2894  *
2895  * @param dev
2896  *   Pointer to Ethernet device.
2897  * @param mark_id
2898  *   ID of MARK action, zero means default flow for META.
2899  * @param[out] error
2900  *   Perform verbose error reporting if not NULL.
2901  *
2902  * @return
2903  *   Associated resource on success, NULL otherwise and rte_errno is set.
2904  */
2905 static struct mlx5_flow_mreg_copy_resource *
2906 flow_mreg_add_copy_action(struct rte_eth_dev *dev, uint32_t mark_id,
2907                           struct rte_flow_error *error)
2908 {
2909         struct mlx5_priv *priv = dev->data->dev_private;
2910         struct rte_flow_attr attr = {
2911                 .group = MLX5_FLOW_MREG_CP_TABLE_GROUP,
2912                 .ingress = 1,
2913         };
2914         struct mlx5_rte_flow_item_tag tag_spec = {
2915                 .data = mark_id,
2916         };
2917         struct rte_flow_item items[] = {
2918                 [1] = { .type = RTE_FLOW_ITEM_TYPE_END, },
2919         };
2920         struct rte_flow_action_mark ftag = {
2921                 .id = mark_id,
2922         };
2923         struct mlx5_flow_action_copy_mreg cp_mreg = {
2924                 .dst = REG_B,
2925                 .src = 0,
2926         };
2927         struct rte_flow_action_jump jump = {
2928                 .group = MLX5_FLOW_MREG_ACT_TABLE_GROUP,
2929         };
2930         struct rte_flow_action actions[] = {
2931                 [3] = { .type = RTE_FLOW_ACTION_TYPE_END, },
2932         };
2933         struct mlx5_flow_mreg_copy_resource *mcp_res;
2934         int ret;
2935
2936         /* Fill the register fileds in the flow. */
2937         ret = mlx5_flow_get_reg_id(dev, MLX5_FLOW_MARK, 0, error);
2938         if (ret < 0)
2939                 return NULL;
2940         tag_spec.id = ret;
2941         ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_RX, 0, error);
2942         if (ret < 0)
2943                 return NULL;
2944         cp_mreg.src = ret;
2945         /* Check if already registered. */
2946         assert(priv->mreg_cp_tbl);
2947         mcp_res = (void *)mlx5_hlist_lookup(priv->mreg_cp_tbl, mark_id);
2948         if (mcp_res) {
2949                 /* For non-default rule. */
2950                 if (mark_id != MLX5_DEFAULT_COPY_ID)
2951                         mcp_res->refcnt++;
2952                 assert(mark_id != MLX5_DEFAULT_COPY_ID || mcp_res->refcnt == 1);
2953                 return mcp_res;
2954         }
2955         /* Provide the full width of FLAG specific value. */
2956         if (mark_id == (priv->sh->dv_regc0_mask & MLX5_FLOW_MARK_DEFAULT))
2957                 tag_spec.data = MLX5_FLOW_MARK_DEFAULT;
2958         /* Build a new flow. */
2959         if (mark_id != MLX5_DEFAULT_COPY_ID) {
2960                 items[0] = (struct rte_flow_item){
2961                         .type = MLX5_RTE_FLOW_ITEM_TYPE_TAG,
2962                         .spec = &tag_spec,
2963                 };
2964                 items[1] = (struct rte_flow_item){
2965                         .type = RTE_FLOW_ITEM_TYPE_END,
2966                 };
2967                 actions[0] = (struct rte_flow_action){
2968                         .type = MLX5_RTE_FLOW_ACTION_TYPE_MARK,
2969                         .conf = &ftag,
2970                 };
2971                 actions[1] = (struct rte_flow_action){
2972                         .type = MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
2973                         .conf = &cp_mreg,
2974                 };
2975                 actions[2] = (struct rte_flow_action){
2976                         .type = RTE_FLOW_ACTION_TYPE_JUMP,
2977                         .conf = &jump,
2978                 };
2979                 actions[3] = (struct rte_flow_action){
2980                         .type = RTE_FLOW_ACTION_TYPE_END,
2981                 };
2982         } else {
2983                 /* Default rule, wildcard match. */
2984                 attr.priority = MLX5_FLOW_PRIO_RSVD;
2985                 items[0] = (struct rte_flow_item){
2986                         .type = RTE_FLOW_ITEM_TYPE_END,
2987                 };
2988                 actions[0] = (struct rte_flow_action){
2989                         .type = MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
2990                         .conf = &cp_mreg,
2991                 };
2992                 actions[1] = (struct rte_flow_action){
2993                         .type = RTE_FLOW_ACTION_TYPE_JUMP,
2994                         .conf = &jump,
2995                 };
2996                 actions[2] = (struct rte_flow_action){
2997                         .type = RTE_FLOW_ACTION_TYPE_END,
2998                 };
2999         }
3000         /* Build a new entry. */
3001         mcp_res = rte_zmalloc(__func__, sizeof(*mcp_res), 0);
3002         if (!mcp_res) {
3003                 rte_errno = ENOMEM;
3004                 return NULL;
3005         }
3006         /*
3007          * The copy Flows are not included in any list. There
3008          * ones are referenced from other Flows and can not
3009          * be applied, removed, deleted in ardbitrary order
3010          * by list traversing.
3011          */
3012         mcp_res->flow = flow_list_create(dev, NULL, &attr, items,
3013                                          actions, false, error);
3014         if (!mcp_res->flow)
3015                 goto error;
3016         mcp_res->refcnt++;
3017         mcp_res->hlist_ent.key = mark_id;
3018         ret = mlx5_hlist_insert(priv->mreg_cp_tbl,
3019                                 &mcp_res->hlist_ent);
3020         assert(!ret);
3021         if (ret)
3022                 goto error;
3023         return mcp_res;
3024 error:
3025         if (mcp_res->flow)
3026                 flow_list_destroy(dev, NULL, mcp_res->flow);
3027         rte_free(mcp_res);
3028         return NULL;
3029 }
3030
3031 /**
3032  * Release flow in RX_CP_TBL.
3033  *
3034  * @param dev
3035  *   Pointer to Ethernet device.
3036  * @flow
3037  *   Parent flow for wich copying is provided.
3038  */
3039 static void
3040 flow_mreg_del_copy_action(struct rte_eth_dev *dev,
3041                           struct rte_flow *flow)
3042 {
3043         struct mlx5_flow_mreg_copy_resource *mcp_res = flow->mreg_copy;
3044         struct mlx5_priv *priv = dev->data->dev_private;
3045
3046         if (!mcp_res || !priv->mreg_cp_tbl)
3047                 return;
3048         if (flow->copy_applied) {
3049                 assert(mcp_res->appcnt);
3050                 flow->copy_applied = 0;
3051                 --mcp_res->appcnt;
3052                 if (!mcp_res->appcnt)
3053                         flow_drv_remove(dev, mcp_res->flow);
3054         }
3055         /*
3056          * We do not check availability of metadata registers here,
3057          * because copy resources are not allocated in this case.
3058          */
3059         if (--mcp_res->refcnt)
3060                 return;
3061         assert(mcp_res->flow);
3062         flow_list_destroy(dev, NULL, mcp_res->flow);
3063         mlx5_hlist_remove(priv->mreg_cp_tbl, &mcp_res->hlist_ent);
3064         rte_free(mcp_res);
3065         flow->mreg_copy = NULL;
3066 }
3067
3068 /**
3069  * Start flow in RX_CP_TBL.
3070  *
3071  * @param dev
3072  *   Pointer to Ethernet device.
3073  * @flow
3074  *   Parent flow for wich copying is provided.
3075  *
3076  * @return
3077  *   0 on success, a negative errno value otherwise and rte_errno is set.
3078  */
3079 static int
3080 flow_mreg_start_copy_action(struct rte_eth_dev *dev,
3081                             struct rte_flow *flow)
3082 {
3083         struct mlx5_flow_mreg_copy_resource *mcp_res = flow->mreg_copy;
3084         int ret;
3085
3086         if (!mcp_res || flow->copy_applied)
3087                 return 0;
3088         if (!mcp_res->appcnt) {
3089                 ret = flow_drv_apply(dev, mcp_res->flow, NULL);
3090                 if (ret)
3091                         return ret;
3092         }
3093         ++mcp_res->appcnt;
3094         flow->copy_applied = 1;
3095         return 0;
3096 }
3097
3098 /**
3099  * Stop flow in RX_CP_TBL.
3100  *
3101  * @param dev
3102  *   Pointer to Ethernet device.
3103  * @flow
3104  *   Parent flow for wich copying is provided.
3105  */
3106 static void
3107 flow_mreg_stop_copy_action(struct rte_eth_dev *dev,
3108                            struct rte_flow *flow)
3109 {
3110         struct mlx5_flow_mreg_copy_resource *mcp_res = flow->mreg_copy;
3111
3112         if (!mcp_res || !flow->copy_applied)
3113                 return;
3114         assert(mcp_res->appcnt);
3115         --mcp_res->appcnt;
3116         flow->copy_applied = 0;
3117         if (!mcp_res->appcnt)
3118                 flow_drv_remove(dev, mcp_res->flow);
3119 }
3120
3121 /**
3122  * Remove the default copy action from RX_CP_TBL.
3123  *
3124  * @param dev
3125  *   Pointer to Ethernet device.
3126  */
3127 static void
3128 flow_mreg_del_default_copy_action(struct rte_eth_dev *dev)
3129 {
3130         struct mlx5_flow_mreg_copy_resource *mcp_res;
3131         struct mlx5_priv *priv = dev->data->dev_private;
3132
3133         /* Check if default flow is registered. */
3134         if (!priv->mreg_cp_tbl)
3135                 return;
3136         mcp_res = (void *)mlx5_hlist_lookup(priv->mreg_cp_tbl,
3137                                             MLX5_DEFAULT_COPY_ID);
3138         if (!mcp_res)
3139                 return;
3140         assert(mcp_res->flow);
3141         flow_list_destroy(dev, NULL, mcp_res->flow);
3142         mlx5_hlist_remove(priv->mreg_cp_tbl, &mcp_res->hlist_ent);
3143         rte_free(mcp_res);
3144 }
3145
3146 /**
3147  * Add the default copy action in in RX_CP_TBL.
3148  *
3149  * @param dev
3150  *   Pointer to Ethernet device.
3151  * @param[out] error
3152  *   Perform verbose error reporting if not NULL.
3153  *
3154  * @return
3155  *   0 for success, negative value otherwise and rte_errno is set.
3156  */
3157 static int
3158 flow_mreg_add_default_copy_action(struct rte_eth_dev *dev,
3159                                   struct rte_flow_error *error)
3160 {
3161         struct mlx5_priv *priv = dev->data->dev_private;
3162         struct mlx5_flow_mreg_copy_resource *mcp_res;
3163
3164         /* Check whether extensive metadata feature is engaged. */
3165         if (!priv->config.dv_flow_en ||
3166             priv->config.dv_xmeta_en == MLX5_XMETA_MODE_LEGACY ||
3167             !mlx5_flow_ext_mreg_supported(dev) ||
3168             !priv->sh->dv_regc0_mask)
3169                 return 0;
3170         mcp_res = flow_mreg_add_copy_action(dev, MLX5_DEFAULT_COPY_ID, error);
3171         if (!mcp_res)
3172                 return -rte_errno;
3173         return 0;
3174 }
3175
3176 /**
3177  * Add a flow of copying flow metadata registers in RX_CP_TBL.
3178  *
3179  * All the flow having Q/RSS action should be split by
3180  * flow_mreg_split_qrss_prep() to pass by RX_CP_TBL. A flow in the RX_CP_TBL
3181  * performs the following,
3182  *   - CQE->flow_tag := reg_c[1] (MARK)
3183  *   - CQE->flow_table_metadata (reg_b) := reg_c[0] (META)
3184  * As CQE's flow_tag is not a register, it can't be simply copied from reg_c[1]
3185  * but there should be a flow per each MARK ID set by MARK action.
3186  *
3187  * For the aforementioned reason, if there's a MARK action in flow's action
3188  * list, a corresponding flow should be added to the RX_CP_TBL in order to copy
3189  * the MARK ID to CQE's flow_tag like,
3190  *   - If reg_c[1] is mark_id,
3191  *     flow_tag := mark_id, reg_b := reg_c[0] and jump to RX_ACT_TBL.
3192  *
3193  * For SET_META action which stores value in reg_c[0], as the destination is
3194  * also a flow metadata register (reg_b), adding a default flow is enough. Zero
3195  * MARK ID means the default flow. The default flow looks like,
3196  *   - For all flow, reg_b := reg_c[0] and jump to RX_ACT_TBL.
3197  *
3198  * @param dev
3199  *   Pointer to Ethernet device.
3200  * @param flow
3201  *   Pointer to flow structure.
3202  * @param[in] actions
3203  *   Pointer to the list of actions.
3204  * @param[out] error
3205  *   Perform verbose error reporting if not NULL.
3206  *
3207  * @return
3208  *   0 on success, negative value otherwise and rte_errno is set.
3209  */
3210 static int
3211 flow_mreg_update_copy_table(struct rte_eth_dev *dev,
3212                             struct rte_flow *flow,
3213                             const struct rte_flow_action *actions,
3214                             struct rte_flow_error *error)
3215 {
3216         struct mlx5_priv *priv = dev->data->dev_private;
3217         struct mlx5_dev_config *config = &priv->config;
3218         struct mlx5_flow_mreg_copy_resource *mcp_res;
3219         const struct rte_flow_action_mark *mark;
3220
3221         /* Check whether extensive metadata feature is engaged. */
3222         if (!config->dv_flow_en ||
3223             config->dv_xmeta_en == MLX5_XMETA_MODE_LEGACY ||
3224             !mlx5_flow_ext_mreg_supported(dev) ||
3225             !priv->sh->dv_regc0_mask)
3226                 return 0;
3227         /* Find MARK action. */
3228         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
3229                 switch (actions->type) {
3230                 case RTE_FLOW_ACTION_TYPE_FLAG:
3231                         mcp_res = flow_mreg_add_copy_action
3232                                 (dev, MLX5_FLOW_MARK_DEFAULT, error);
3233                         if (!mcp_res)
3234                                 return -rte_errno;
3235                         flow->mreg_copy = mcp_res;
3236                         if (dev->data->dev_started) {
3237                                 mcp_res->appcnt++;
3238                                 flow->copy_applied = 1;
3239                         }
3240                         return 0;
3241                 case RTE_FLOW_ACTION_TYPE_MARK:
3242                         mark = (const struct rte_flow_action_mark *)
3243                                 actions->conf;
3244                         mcp_res =
3245                                 flow_mreg_add_copy_action(dev, mark->id, error);
3246                         if (!mcp_res)
3247                                 return -rte_errno;
3248                         flow->mreg_copy = mcp_res;
3249                         if (dev->data->dev_started) {
3250                                 mcp_res->appcnt++;
3251                                 flow->copy_applied = 1;
3252                         }
3253                         return 0;
3254                 default:
3255                         break;
3256                 }
3257         }
3258         return 0;
3259 }
3260
3261 #define MLX5_MAX_SPLIT_ACTIONS 24
3262 #define MLX5_MAX_SPLIT_ITEMS 24
3263
3264 /**
3265  * Split the hairpin flow.
3266  * Since HW can't support encap on Rx we move the encap to Tx.
3267  * If the count action is after the encap then we also
3268  * move the count action. in this case the count will also measure
3269  * the outer bytes.
3270  *
3271  * @param dev
3272  *   Pointer to Ethernet device.
3273  * @param[in] actions
3274  *   Associated actions (list terminated by the END action).
3275  * @param[out] actions_rx
3276  *   Rx flow actions.
3277  * @param[out] actions_tx
3278  *   Tx flow actions..
3279  * @param[out] pattern_tx
3280  *   The pattern items for the Tx flow.
3281  * @param[out] flow_id
3282  *   The flow ID connected to this flow.
3283  *
3284  * @return
3285  *   0 on success.
3286  */
3287 static int
3288 flow_hairpin_split(struct rte_eth_dev *dev,
3289                    const struct rte_flow_action actions[],
3290                    struct rte_flow_action actions_rx[],
3291                    struct rte_flow_action actions_tx[],
3292                    struct rte_flow_item pattern_tx[],
3293                    uint32_t *flow_id)
3294 {
3295         struct mlx5_priv *priv = dev->data->dev_private;
3296         const struct rte_flow_action_raw_encap *raw_encap;
3297         const struct rte_flow_action_raw_decap *raw_decap;
3298         struct mlx5_rte_flow_action_set_tag *set_tag;
3299         struct rte_flow_action *tag_action;
3300         struct mlx5_rte_flow_item_tag *tag_item;
3301         struct rte_flow_item *item;
3302         char *addr;
3303         int encap = 0;
3304
3305         mlx5_flow_id_get(priv->sh->flow_id_pool, flow_id);
3306         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
3307                 switch (actions->type) {
3308                 case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
3309                 case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
3310                         rte_memcpy(actions_tx, actions,
3311                                sizeof(struct rte_flow_action));
3312                         actions_tx++;
3313                         break;
3314                 case RTE_FLOW_ACTION_TYPE_COUNT:
3315                         if (encap) {
3316                                 rte_memcpy(actions_tx, actions,
3317                                            sizeof(struct rte_flow_action));
3318                                 actions_tx++;
3319                         } else {
3320                                 rte_memcpy(actions_rx, actions,
3321                                            sizeof(struct rte_flow_action));
3322                                 actions_rx++;
3323                         }
3324                         break;
3325                 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
3326                         raw_encap = actions->conf;
3327                         if (raw_encap->size >
3328                             (sizeof(struct rte_flow_item_eth) +
3329                              sizeof(struct rte_flow_item_ipv4))) {
3330                                 memcpy(actions_tx, actions,
3331                                        sizeof(struct rte_flow_action));
3332                                 actions_tx++;
3333                                 encap = 1;
3334                         } else {
3335                                 rte_memcpy(actions_rx, actions,
3336                                            sizeof(struct rte_flow_action));
3337                                 actions_rx++;
3338                         }
3339                         break;
3340                 case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
3341                         raw_decap = actions->conf;
3342                         if (raw_decap->size <
3343                             (sizeof(struct rte_flow_item_eth) +
3344                              sizeof(struct rte_flow_item_ipv4))) {
3345                                 memcpy(actions_tx, actions,
3346                                        sizeof(struct rte_flow_action));
3347                                 actions_tx++;
3348                         } else {
3349                                 rte_memcpy(actions_rx, actions,
3350                                            sizeof(struct rte_flow_action));
3351                                 actions_rx++;
3352                         }
3353                         break;
3354                 default:
3355                         rte_memcpy(actions_rx, actions,
3356                                    sizeof(struct rte_flow_action));
3357                         actions_rx++;
3358                         break;
3359                 }
3360         }
3361         /* Add set meta action and end action for the Rx flow. */
3362         tag_action = actions_rx;
3363         tag_action->type = MLX5_RTE_FLOW_ACTION_TYPE_TAG;
3364         actions_rx++;
3365         rte_memcpy(actions_rx, actions, sizeof(struct rte_flow_action));
3366         actions_rx++;
3367         set_tag = (void *)actions_rx;
3368         set_tag->id = mlx5_flow_get_reg_id(dev, MLX5_HAIRPIN_RX, 0, NULL);
3369         assert(set_tag->id > REG_NONE);
3370         set_tag->data = *flow_id;
3371         tag_action->conf = set_tag;
3372         /* Create Tx item list. */
3373         rte_memcpy(actions_tx, actions, sizeof(struct rte_flow_action));
3374         addr = (void *)&pattern_tx[2];
3375         item = pattern_tx;
3376         item->type = MLX5_RTE_FLOW_ITEM_TYPE_TAG;
3377         tag_item = (void *)addr;
3378         tag_item->data = *flow_id;
3379         tag_item->id = mlx5_flow_get_reg_id(dev, MLX5_HAIRPIN_TX, 0, NULL);
3380         assert(set_tag->id > REG_NONE);
3381         item->spec = tag_item;
3382         addr += sizeof(struct mlx5_rte_flow_item_tag);
3383         tag_item = (void *)addr;
3384         tag_item->data = UINT32_MAX;
3385         tag_item->id = UINT16_MAX;
3386         item->mask = tag_item;
3387         addr += sizeof(struct mlx5_rte_flow_item_tag);
3388         item->last = NULL;
3389         item++;
3390         item->type = RTE_FLOW_ITEM_TYPE_END;
3391         return 0;
3392 }
3393
3394 /**
3395  * The last stage of splitting chain, just creates the subflow
3396  * without any modification.
3397  *
3398  * @param dev
3399  *   Pointer to Ethernet device.
3400  * @param[in] flow
3401  *   Parent flow structure pointer.
3402  * @param[in, out] sub_flow
3403  *   Pointer to return the created subflow, may be NULL.
3404  * @param[in] attr
3405  *   Flow rule attributes.
3406  * @param[in] items
3407  *   Pattern specification (list terminated by the END pattern item).
3408  * @param[in] actions
3409  *   Associated actions (list terminated by the END action).
3410  * @param[in] external
3411  *   This flow rule is created by request external to PMD.
3412  * @param[out] error
3413  *   Perform verbose error reporting if not NULL.
3414  * @return
3415  *   0 on success, negative value otherwise
3416  */
3417 static int
3418 flow_create_split_inner(struct rte_eth_dev *dev,
3419                         struct rte_flow *flow,
3420                         struct mlx5_flow **sub_flow,
3421                         const struct rte_flow_attr *attr,
3422                         const struct rte_flow_item items[],
3423                         const struct rte_flow_action actions[],
3424                         bool external, struct rte_flow_error *error)
3425 {
3426         struct mlx5_flow *dev_flow;
3427
3428         dev_flow = flow_drv_prepare(flow, attr, items, actions, error);
3429         if (!dev_flow)
3430                 return -rte_errno;
3431         dev_flow->flow = flow;
3432         dev_flow->external = external;
3433         /* Subflow object was created, we must include one in the list. */
3434         LIST_INSERT_HEAD(&flow->dev_flows, dev_flow, next);
3435         if (sub_flow)
3436                 *sub_flow = dev_flow;
3437         return flow_drv_translate(dev, dev_flow, attr, items, actions, error);
3438 }
3439
3440 /**
3441  * Split the meter flow.
3442  *
3443  * As meter flow will split to three sub flow, other than meter
3444  * action, the other actions make sense to only meter accepts
3445  * the packet. If it need to be dropped, no other additional
3446  * actions should be take.
3447  *
3448  * One kind of special action which decapsulates the L3 tunnel
3449  * header will be in the prefix sub flow, as not to take the
3450  * L3 tunnel header into account.
3451  *
3452  * @param dev
3453  *   Pointer to Ethernet device.
3454  * @param[in] actions
3455  *   Associated actions (list terminated by the END action).
3456  * @param[out] actions_sfx
3457  *   Suffix flow actions.
3458  * @param[out] actions_pre
3459  *   Prefix flow actions.
3460  * @param[out] pattern_sfx
3461  *   The pattern items for the suffix flow.
3462  * @param[out] tag_sfx
3463  *   Pointer to suffix flow tag.
3464  *
3465  * @return
3466  *   0 on success.
3467  */
3468 static int
3469 flow_meter_split_prep(struct rte_eth_dev *dev,
3470                  const struct rte_flow_action actions[],
3471                  struct rte_flow_action actions_sfx[],
3472                  struct rte_flow_action actions_pre[])
3473 {
3474         struct rte_flow_action *tag_action;
3475         struct mlx5_rte_flow_action_set_tag *set_tag;
3476         struct rte_flow_error error;
3477         const struct rte_flow_action_raw_encap *raw_encap;
3478         const struct rte_flow_action_raw_decap *raw_decap;
3479         uint32_t tag_id;
3480
3481         /* Add the extra tag action first. */
3482         tag_action = actions_pre;
3483         tag_action->type = MLX5_RTE_FLOW_ACTION_TYPE_TAG;
3484         actions_pre++;
3485         /* Prepare the actions for prefix and suffix flow. */
3486         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
3487                 switch (actions->type) {
3488                 case RTE_FLOW_ACTION_TYPE_METER:
3489                 case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP:
3490                 case RTE_FLOW_ACTION_TYPE_NVGRE_DECAP:
3491                         memcpy(actions_pre, actions,
3492                                sizeof(struct rte_flow_action));
3493                         actions_pre++;
3494                         break;
3495                 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
3496                         raw_encap = actions->conf;
3497                         if (raw_encap->size >
3498                             (sizeof(struct rte_flow_item_eth) +
3499                              sizeof(struct rte_flow_item_ipv4))) {
3500                                 memcpy(actions_sfx, actions,
3501                                        sizeof(struct rte_flow_action));
3502                                 actions_sfx++;
3503                         } else {
3504                                 rte_memcpy(actions_pre, actions,
3505                                            sizeof(struct rte_flow_action));
3506                                 actions_pre++;
3507                         }
3508                         break;
3509                 case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
3510                         raw_decap = actions->conf;
3511                         /* Size 0 decap means 50 bytes as vxlan decap. */
3512                         if (raw_decap->size && (raw_decap->size <
3513                             (sizeof(struct rte_flow_item_eth) +
3514                              sizeof(struct rte_flow_item_ipv4)))) {
3515                                 memcpy(actions_sfx, actions,
3516                                        sizeof(struct rte_flow_action));
3517                                 actions_sfx++;
3518                         } else {
3519                                 rte_memcpy(actions_pre, actions,
3520                                            sizeof(struct rte_flow_action));
3521                                 actions_pre++;
3522                         }
3523                         break;
3524                 default:
3525                         memcpy(actions_sfx, actions,
3526                                 sizeof(struct rte_flow_action));
3527                         actions_sfx++;
3528                         break;
3529                 }
3530         }
3531         /* Add end action to the actions. */
3532         actions_sfx->type = RTE_FLOW_ACTION_TYPE_END;
3533         actions_pre->type = RTE_FLOW_ACTION_TYPE_END;
3534         actions_pre++;
3535         /* Set the tag. */
3536         set_tag = (void *)actions_pre;
3537         set_tag->id = mlx5_flow_get_reg_id(dev, MLX5_MTR_SFX, 0, &error);
3538         /*
3539          * Get the id from the qrss_pool to make qrss share the id with meter.
3540          */
3541         tag_id = flow_qrss_get_id(dev);
3542         set_tag->data = rte_cpu_to_be_32(tag_id);
3543         tag_action->conf = set_tag;
3544         return tag_id;
3545 }
3546
3547 /**
3548  * Split action list having QUEUE/RSS for metadata register copy.
3549  *
3550  * Once Q/RSS action is detected in user's action list, the flow action
3551  * should be split in order to copy metadata registers, which will happen in
3552  * RX_CP_TBL like,
3553  *   - CQE->flow_tag := reg_c[1] (MARK)
3554  *   - CQE->flow_table_metadata (reg_b) := reg_c[0] (META)
3555  * The Q/RSS action will be performed on RX_ACT_TBL after passing by RX_CP_TBL.
3556  * This is because the last action of each flow must be a terminal action
3557  * (QUEUE, RSS or DROP).
3558  *
3559  * Flow ID must be allocated to identify actions in the RX_ACT_TBL and it is
3560  * stored and kept in the mlx5_flow structure per each sub_flow.
3561  *
3562  * The Q/RSS action is replaced with,
3563  *   - SET_TAG, setting the allocated flow ID to reg_c[2].
3564  * And the following JUMP action is added at the end,
3565  *   - JUMP, to RX_CP_TBL.
3566  *
3567  * A flow to perform remained Q/RSS action will be created in RX_ACT_TBL by
3568  * flow_create_split_metadata() routine. The flow will look like,
3569  *   - If flow ID matches (reg_c[2]), perform Q/RSS.
3570  *
3571  * @param dev
3572  *   Pointer to Ethernet device.
3573  * @param[out] split_actions
3574  *   Pointer to store split actions to jump to CP_TBL.
3575  * @param[in] actions
3576  *   Pointer to the list of original flow actions.
3577  * @param[in] qrss
3578  *   Pointer to the Q/RSS action.
3579  * @param[in] actions_n
3580  *   Number of original actions.
3581  * @param[out] error
3582  *   Perform verbose error reporting if not NULL.
3583  *
3584  * @return
3585  *   non-zero unique flow_id on success, otherwise 0 and
3586  *   error/rte_error are set.
3587  */
3588 static uint32_t
3589 flow_mreg_split_qrss_prep(struct rte_eth_dev *dev,
3590                           struct rte_flow_action *split_actions,
3591                           const struct rte_flow_action *actions,
3592                           const struct rte_flow_action *qrss,
3593                           int actions_n, struct rte_flow_error *error)
3594 {
3595         struct mlx5_rte_flow_action_set_tag *set_tag;
3596         struct rte_flow_action_jump *jump;
3597         const int qrss_idx = qrss - actions;
3598         uint32_t flow_id = 0;
3599         int ret = 0;
3600
3601         /*
3602          * Given actions will be split
3603          * - Replace QUEUE/RSS action with SET_TAG to set flow ID.
3604          * - Add jump to mreg CP_TBL.
3605          * As a result, there will be one more action.
3606          */
3607         ++actions_n;
3608         memcpy(split_actions, actions, sizeof(*split_actions) * actions_n);
3609         set_tag = (void *)(split_actions + actions_n);
3610         /*
3611          * If tag action is not set to void(it means we are not the meter
3612          * suffix flow), add the tag action. Since meter suffix flow already
3613          * has the tag added.
3614          */
3615         if (split_actions[qrss_idx].type != RTE_FLOW_ACTION_TYPE_VOID) {
3616                 /*
3617                  * Allocate the new subflow ID. This one is unique within
3618                  * device and not shared with representors. Otherwise,
3619                  * we would have to resolve multi-thread access synch
3620                  * issue. Each flow on the shared device is appended
3621                  * with source vport identifier, so the resulting
3622                  * flows will be unique in the shared (by master and
3623                  * representors) domain even if they have coinciding
3624                  * IDs.
3625                  */
3626                 flow_id = flow_qrss_get_id(dev);
3627                 if (!flow_id)
3628                         return rte_flow_error_set(error, ENOMEM,
3629                                                   RTE_FLOW_ERROR_TYPE_ACTION,
3630                                                   NULL, "can't allocate id "
3631                                                   "for split Q/RSS subflow");
3632                 /* Internal SET_TAG action to set flow ID. */
3633                 *set_tag = (struct mlx5_rte_flow_action_set_tag){
3634                         .data = flow_id,
3635                 };
3636                 ret = mlx5_flow_get_reg_id(dev, MLX5_COPY_MARK, 0, error);
3637                 if (ret < 0)
3638                         return ret;
3639                 set_tag->id = ret;
3640                 /* Construct new actions array. */
3641                 /* Replace QUEUE/RSS action. */
3642                 split_actions[qrss_idx] = (struct rte_flow_action){
3643                         .type = MLX5_RTE_FLOW_ACTION_TYPE_TAG,
3644                         .conf = set_tag,
3645                 };
3646         }
3647         /* JUMP action to jump to mreg copy table (CP_TBL). */
3648         jump = (void *)(set_tag + 1);
3649         *jump = (struct rte_flow_action_jump){
3650                 .group = MLX5_FLOW_MREG_CP_TABLE_GROUP,
3651         };
3652         split_actions[actions_n - 2] = (struct rte_flow_action){
3653                 .type = RTE_FLOW_ACTION_TYPE_JUMP,
3654                 .conf = jump,
3655         };
3656         split_actions[actions_n - 1] = (struct rte_flow_action){
3657                 .type = RTE_FLOW_ACTION_TYPE_END,
3658         };
3659         return flow_id;
3660 }
3661
3662 /**
3663  * Extend the given action list for Tx metadata copy.
3664  *
3665  * Copy the given action list to the ext_actions and add flow metadata register
3666  * copy action in order to copy reg_a set by WQE to reg_c[0].
3667  *
3668  * @param[out] ext_actions
3669  *   Pointer to the extended action list.
3670  * @param[in] actions
3671  *   Pointer to the list of actions.
3672  * @param[in] actions_n
3673  *   Number of actions in the list.
3674  * @param[out] error
3675  *   Perform verbose error reporting if not NULL.
3676  *
3677  * @return
3678  *   0 on success, negative value otherwise
3679  */
3680 static int
3681 flow_mreg_tx_copy_prep(struct rte_eth_dev *dev,
3682                        struct rte_flow_action *ext_actions,
3683                        const struct rte_flow_action *actions,
3684                        int actions_n, struct rte_flow_error *error)
3685 {
3686         struct mlx5_flow_action_copy_mreg *cp_mreg =
3687                 (struct mlx5_flow_action_copy_mreg *)
3688                         (ext_actions + actions_n + 1);
3689         int ret;
3690
3691         ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_RX, 0, error);
3692         if (ret < 0)
3693                 return ret;
3694         cp_mreg->dst = ret;
3695         ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_TX, 0, error);
3696         if (ret < 0)
3697                 return ret;
3698         cp_mreg->src = ret;
3699         memcpy(ext_actions, actions,
3700                         sizeof(*ext_actions) * actions_n);
3701         ext_actions[actions_n - 1] = (struct rte_flow_action){
3702                 .type = MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
3703                 .conf = cp_mreg,
3704         };
3705         ext_actions[actions_n] = (struct rte_flow_action){
3706                 .type = RTE_FLOW_ACTION_TYPE_END,
3707         };
3708         return 0;
3709 }
3710
3711 /**
3712  * The splitting for metadata feature.
3713  *
3714  * - Q/RSS action on NIC Rx should be split in order to pass by
3715  *   the mreg copy table (RX_CP_TBL) and then it jumps to the
3716  *   action table (RX_ACT_TBL) which has the split Q/RSS action.
3717  *
3718  * - All the actions on NIC Tx should have a mreg copy action to
3719  *   copy reg_a from WQE to reg_c[0].
3720  *
3721  * @param dev
3722  *   Pointer to Ethernet device.
3723  * @param[in] flow
3724  *   Parent flow structure pointer.
3725  * @param[in] attr
3726  *   Flow rule attributes.
3727  * @param[in] items
3728  *   Pattern specification (list terminated by the END pattern item).
3729  * @param[in] actions
3730  *   Associated actions (list terminated by the END action).
3731  * @param[in] external
3732  *   This flow rule is created by request external to PMD.
3733  * @param[out] error
3734  *   Perform verbose error reporting if not NULL.
3735  * @return
3736  *   0 on success, negative value otherwise
3737  */
3738 static int
3739 flow_create_split_metadata(struct rte_eth_dev *dev,
3740                            struct rte_flow *flow,
3741                            const struct rte_flow_attr *attr,
3742                            const struct rte_flow_item items[],
3743                            const struct rte_flow_action actions[],
3744                            bool external, struct rte_flow_error *error)
3745 {
3746         struct mlx5_priv *priv = dev->data->dev_private;
3747         struct mlx5_dev_config *config = &priv->config;
3748         const struct rte_flow_action *qrss = NULL;
3749         struct rte_flow_action *ext_actions = NULL;
3750         struct mlx5_flow *dev_flow = NULL;
3751         uint32_t qrss_id = 0;
3752         int mtr_sfx = 0;
3753         size_t act_size;
3754         int actions_n;
3755         int ret;
3756
3757         /* Check whether extensive metadata feature is engaged. */
3758         if (!config->dv_flow_en ||
3759             config->dv_xmeta_en == MLX5_XMETA_MODE_LEGACY ||
3760             !mlx5_flow_ext_mreg_supported(dev))
3761                 return flow_create_split_inner(dev, flow, NULL, attr, items,
3762                                                actions, external, error);
3763         actions_n = flow_parse_qrss_action(actions, &qrss);
3764         if (qrss) {
3765                 /* Exclude hairpin flows from splitting. */
3766                 if (qrss->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
3767                         const struct rte_flow_action_queue *queue;
3768
3769                         queue = qrss->conf;
3770                         if (mlx5_rxq_get_type(dev, queue->index) ==
3771                             MLX5_RXQ_TYPE_HAIRPIN)
3772                                 qrss = NULL;
3773                 } else if (qrss->type == RTE_FLOW_ACTION_TYPE_RSS) {
3774                         const struct rte_flow_action_rss *rss;
3775
3776                         rss = qrss->conf;
3777                         if (mlx5_rxq_get_type(dev, rss->queue[0]) ==
3778                             MLX5_RXQ_TYPE_HAIRPIN)
3779                                 qrss = NULL;
3780                 }
3781         }
3782         if (qrss) {
3783                 /* Check if it is in meter suffix table. */
3784                 mtr_sfx = attr->group == (attr->transfer ?
3785                           (MLX5_FLOW_TABLE_LEVEL_SUFFIX - 1) :
3786                           MLX5_FLOW_TABLE_LEVEL_SUFFIX);
3787                 /*
3788                  * Q/RSS action on NIC Rx should be split in order to pass by
3789                  * the mreg copy table (RX_CP_TBL) and then it jumps to the
3790                  * action table (RX_ACT_TBL) which has the split Q/RSS action.
3791                  */
3792                 act_size = sizeof(struct rte_flow_action) * (actions_n + 1) +
3793                            sizeof(struct rte_flow_action_set_tag) +
3794                            sizeof(struct rte_flow_action_jump);
3795                 ext_actions = rte_zmalloc(__func__, act_size, 0);
3796                 if (!ext_actions)
3797                         return rte_flow_error_set(error, ENOMEM,
3798                                                   RTE_FLOW_ERROR_TYPE_ACTION,
3799                                                   NULL, "no memory to split "
3800                                                   "metadata flow");
3801                 /*
3802                  * If we are the suffix flow of meter, tag already exist.
3803                  * Set the tag action to void.
3804                  */
3805                 if (mtr_sfx)
3806                         ext_actions[qrss - actions].type =
3807                                                 RTE_FLOW_ACTION_TYPE_VOID;
3808                 else
3809                         ext_actions[qrss - actions].type =
3810                                                 MLX5_RTE_FLOW_ACTION_TYPE_TAG;
3811                 /*
3812                  * Create the new actions list with removed Q/RSS action
3813                  * and appended set tag and jump to register copy table
3814                  * (RX_CP_TBL). We should preallocate unique tag ID here
3815                  * in advance, because it is needed for set tag action.
3816                  */
3817                 qrss_id = flow_mreg_split_qrss_prep(dev, ext_actions, actions,
3818                                                     qrss, actions_n, error);
3819                 if (!mtr_sfx && !qrss_id) {
3820                         ret = -rte_errno;
3821                         goto exit;
3822                 }
3823         } else if (attr->egress && !attr->transfer) {
3824                 /*
3825                  * All the actions on NIC Tx should have a metadata register
3826                  * copy action to copy reg_a from WQE to reg_c[meta]
3827                  */
3828                 act_size = sizeof(struct rte_flow_action) * (actions_n + 1) +
3829                            sizeof(struct mlx5_flow_action_copy_mreg);
3830                 ext_actions = rte_zmalloc(__func__, act_size, 0);
3831                 if (!ext_actions)
3832                         return rte_flow_error_set(error, ENOMEM,
3833                                                   RTE_FLOW_ERROR_TYPE_ACTION,
3834                                                   NULL, "no memory to split "
3835                                                   "metadata flow");
3836                 /* Create the action list appended with copy register. */
3837                 ret = flow_mreg_tx_copy_prep(dev, ext_actions, actions,
3838                                              actions_n, error);
3839                 if (ret < 0)
3840                         goto exit;
3841         }
3842         /* Add the unmodified original or prefix subflow. */
3843         ret = flow_create_split_inner(dev, flow, &dev_flow, attr, items,
3844                                       ext_actions ? ext_actions : actions,
3845                                       external, error);
3846         if (ret < 0)
3847                 goto exit;
3848         assert(dev_flow);
3849         if (qrss) {
3850                 const struct rte_flow_attr q_attr = {
3851                         .group = MLX5_FLOW_MREG_ACT_TABLE_GROUP,
3852                         .ingress = 1,
3853                 };
3854                 /* Internal PMD action to set register. */
3855                 struct mlx5_rte_flow_item_tag q_tag_spec = {
3856                         .data = qrss_id,
3857                         .id = 0,
3858                 };
3859                 struct rte_flow_item q_items[] = {
3860                         {
3861                                 .type = MLX5_RTE_FLOW_ITEM_TYPE_TAG,
3862                                 .spec = &q_tag_spec,
3863                                 .last = NULL,
3864                                 .mask = NULL,
3865                         },
3866                         {
3867                                 .type = RTE_FLOW_ITEM_TYPE_END,
3868                         },
3869                 };
3870                 struct rte_flow_action q_actions[] = {
3871                         {
3872                                 .type = qrss->type,
3873                                 .conf = qrss->conf,
3874                         },
3875                         {
3876                                 .type = RTE_FLOW_ACTION_TYPE_END,
3877                         },
3878                 };
3879                 uint64_t hash_fields = dev_flow->hash_fields;
3880
3881                 /*
3882                  * Configure the tag item only if there is no meter subflow.
3883                  * Since tag is already marked in the meter suffix subflow
3884                  * we can just use the meter suffix items as is.
3885                  */
3886                 if (qrss_id) {
3887                         /* Not meter subflow. */
3888                         assert(!mtr_sfx);
3889                         /*
3890                          * Put unique id in prefix flow due to it is destroyed
3891                          * after suffix flow and id will be freed after there
3892                          * is no actual flows with this id and identifier
3893                          * reallocation becomes possible (for example, for
3894                          * other flows in other threads).
3895                          */
3896                         dev_flow->qrss_id = qrss_id;
3897                         qrss_id = 0;
3898                         ret = mlx5_flow_get_reg_id(dev, MLX5_COPY_MARK, 0,
3899                                                    error);
3900                         if (ret < 0)
3901                                 goto exit;
3902                         q_tag_spec.id = ret;
3903                 }
3904                 dev_flow = NULL;
3905                 /* Add suffix subflow to execute Q/RSS. */
3906                 ret = flow_create_split_inner(dev, flow, &dev_flow,
3907                                               &q_attr, mtr_sfx ? items :
3908                                               q_items, q_actions,
3909                                               external, error);
3910                 if (ret < 0)
3911                         goto exit;
3912                 assert(dev_flow);
3913                 dev_flow->hash_fields = hash_fields;
3914         }
3915
3916 exit:
3917         /*
3918          * We do not destroy the partially created sub_flows in case of error.
3919          * These ones are included into parent flow list and will be destroyed
3920          * by flow_drv_destroy.
3921          */
3922         flow_qrss_free_id(dev, qrss_id);
3923         rte_free(ext_actions);
3924         return ret;
3925 }
3926
3927 /**
3928  * The splitting for meter feature.
3929  *
3930  * - The meter flow will be split to two flows as prefix and
3931  *   suffix flow. The packets make sense only it pass the prefix
3932  *   meter action.
3933  *
3934  * - Reg_C_5 is used for the packet to match betweend prefix and
3935  *   suffix flow.
3936  *
3937  * @param dev
3938  *   Pointer to Ethernet device.
3939  * @param[in] flow
3940  *   Parent flow structure pointer.
3941  * @param[in] attr
3942  *   Flow rule attributes.
3943  * @param[in] items
3944  *   Pattern specification (list terminated by the END pattern item).
3945  * @param[in] actions
3946  *   Associated actions (list terminated by the END action).
3947  * @param[in] external
3948  *   This flow rule is created by request external to PMD.
3949  * @param[out] error
3950  *   Perform verbose error reporting if not NULL.
3951  * @return
3952  *   0 on success, negative value otherwise
3953  */
3954 static int
3955 flow_create_split_meter(struct rte_eth_dev *dev,
3956                            struct rte_flow *flow,
3957                            const struct rte_flow_attr *attr,
3958                            const struct rte_flow_item items[],
3959                            const struct rte_flow_action actions[],
3960                            bool external, struct rte_flow_error *error)
3961 {
3962         struct mlx5_priv *priv = dev->data->dev_private;
3963         struct rte_flow_action *sfx_actions = NULL;
3964         struct rte_flow_action *pre_actions = NULL;
3965         struct rte_flow_item *sfx_items = NULL;
3966         const  struct rte_flow_item *sfx_port_id_item;
3967         struct mlx5_flow *dev_flow = NULL;
3968         struct rte_flow_attr sfx_attr = *attr;
3969         uint32_t mtr = 0;
3970         uint32_t mtr_tag_id = 0;
3971         size_t act_size;
3972         size_t item_size;
3973         int actions_n = 0;
3974         int ret;
3975
3976         if (priv->mtr_en)
3977                 actions_n = flow_check_meter_action(actions, &mtr);
3978         if (mtr) {
3979                 struct mlx5_rte_flow_item_tag *tag_spec;
3980                 /* The five prefix actions: meter, decap, encap, tag, end. */
3981                 act_size = sizeof(struct rte_flow_action) * (actions_n + 5) +
3982                            sizeof(struct rte_flow_action_set_tag);
3983                 /* tag, end. */
3984 #define METER_SUFFIX_ITEM 3
3985                 item_size = sizeof(struct rte_flow_item) * METER_SUFFIX_ITEM +
3986                             sizeof(struct mlx5_rte_flow_item_tag);
3987                 sfx_actions = rte_zmalloc(__func__, (act_size + item_size), 0);
3988                 if (!sfx_actions)
3989                         return rte_flow_error_set(error, ENOMEM,
3990                                                   RTE_FLOW_ERROR_TYPE_ACTION,
3991                                                   NULL, "no memory to split "
3992                                                   "meter flow");
3993                 pre_actions = sfx_actions + actions_n;
3994                 mtr_tag_id = flow_meter_split_prep(dev, actions, sfx_actions,
3995                                                      pre_actions);
3996                 if (!mtr_tag_id) {
3997                         ret = -rte_errno;
3998                         goto exit;
3999                 }
4000                 /* Add the prefix subflow. */
4001                 ret = flow_create_split_inner(dev, flow, &dev_flow, attr, items,
4002                                                   pre_actions, external, error);
4003                 if (ret) {
4004                         ret = -rte_errno;
4005                         goto exit;
4006                 }
4007                 dev_flow->mtr_flow_id = mtr_tag_id;
4008                 /* Prepare the suffix flow match pattern. */
4009                 sfx_items = (struct rte_flow_item *)((char *)sfx_actions +
4010                              act_size);
4011                 tag_spec = (struct mlx5_rte_flow_item_tag *)(sfx_items +
4012                             METER_SUFFIX_ITEM);
4013                 tag_spec->data = rte_cpu_to_be_32(dev_flow->mtr_flow_id);
4014                 tag_spec->id = mlx5_flow_get_reg_id(dev, MLX5_MTR_SFX, 0,
4015                                                     error);
4016                 sfx_items->type = MLX5_RTE_FLOW_ITEM_TYPE_TAG;
4017                 sfx_items->spec = tag_spec;
4018                 sfx_items->last = NULL;
4019                 sfx_items->mask = NULL;
4020                 sfx_items++;
4021                 sfx_port_id_item = find_port_id_item(items);
4022                 if (sfx_port_id_item) {
4023                         memcpy(sfx_items, sfx_port_id_item,
4024                                sizeof(*sfx_items));
4025                         sfx_items++;
4026                 }
4027                 sfx_items->type = RTE_FLOW_ITEM_TYPE_END;
4028                 sfx_items -= METER_SUFFIX_ITEM;
4029                 /* Setting the sfx group atrr. */
4030                 sfx_attr.group = sfx_attr.transfer ?
4031                                 (MLX5_FLOW_TABLE_LEVEL_SUFFIX - 1) :
4032                                  MLX5_FLOW_TABLE_LEVEL_SUFFIX;
4033         }
4034         /* Add the prefix subflow. */
4035         ret = flow_create_split_metadata(dev, flow, &sfx_attr,
4036                                          sfx_items ? sfx_items : items,
4037                                          sfx_actions ? sfx_actions : actions,
4038                                          external, error);
4039 exit:
4040         if (sfx_actions)
4041                 rte_free(sfx_actions);
4042         return ret;
4043 }
4044
4045 /**
4046  * Split the flow to subflow set. The splitters might be linked
4047  * in the chain, like this:
4048  * flow_create_split_outer() calls:
4049  *   flow_create_split_meter() calls:
4050  *     flow_create_split_metadata(meter_subflow_0) calls:
4051  *       flow_create_split_inner(metadata_subflow_0)
4052  *       flow_create_split_inner(metadata_subflow_1)
4053  *       flow_create_split_inner(metadata_subflow_2)
4054  *     flow_create_split_metadata(meter_subflow_1) calls:
4055  *       flow_create_split_inner(metadata_subflow_0)
4056  *       flow_create_split_inner(metadata_subflow_1)
4057  *       flow_create_split_inner(metadata_subflow_2)
4058  *
4059  * This provide flexible way to add new levels of flow splitting.
4060  * The all of successfully created subflows are included to the
4061  * parent flow dev_flow list.
4062  *
4063  * @param dev
4064  *   Pointer to Ethernet device.
4065  * @param[in] flow
4066  *   Parent flow structure pointer.
4067  * @param[in] attr
4068  *   Flow rule attributes.
4069  * @param[in] items
4070  *   Pattern specification (list terminated by the END pattern item).
4071  * @param[in] actions
4072  *   Associated actions (list terminated by the END action).
4073  * @param[in] external
4074  *   This flow rule is created by request external to PMD.
4075  * @param[out] error
4076  *   Perform verbose error reporting if not NULL.
4077  * @return
4078  *   0 on success, negative value otherwise
4079  */
4080 static int
4081 flow_create_split_outer(struct rte_eth_dev *dev,
4082                         struct rte_flow *flow,
4083                         const struct rte_flow_attr *attr,
4084                         const struct rte_flow_item items[],
4085                         const struct rte_flow_action actions[],
4086                         bool external, struct rte_flow_error *error)
4087 {
4088         int ret;
4089
4090         ret = flow_create_split_meter(dev, flow, attr, items,
4091                                          actions, external, error);
4092         assert(ret <= 0);
4093         return ret;
4094 }
4095
4096 /**
4097  * Create a flow and add it to @p list.
4098  *
4099  * @param dev
4100  *   Pointer to Ethernet device.
4101  * @param list
4102  *   Pointer to a TAILQ flow list. If this parameter NULL,
4103  *   no list insertion occurred, flow is just created,
4104  *   this is caller's responsibility to track the
4105  *   created flow.
4106  * @param[in] attr
4107  *   Flow rule attributes.
4108  * @param[in] items
4109  *   Pattern specification (list terminated by the END pattern item).
4110  * @param[in] actions
4111  *   Associated actions (list terminated by the END action).
4112  * @param[in] external
4113  *   This flow rule is created by request external to PMD.
4114  * @param[out] error
4115  *   Perform verbose error reporting if not NULL.
4116  *
4117  * @return
4118  *   A flow on success, NULL otherwise and rte_errno is set.
4119  */
4120 static struct rte_flow *
4121 flow_list_create(struct rte_eth_dev *dev, struct mlx5_flows *list,
4122                  const struct rte_flow_attr *attr,
4123                  const struct rte_flow_item items[],
4124                  const struct rte_flow_action actions[],
4125                  bool external, struct rte_flow_error *error)
4126 {
4127         struct mlx5_priv *priv = dev->data->dev_private;
4128         struct rte_flow *flow = NULL;
4129         struct mlx5_flow *dev_flow;
4130         const struct rte_flow_action_rss *rss;
4131         union {
4132                 struct rte_flow_expand_rss buf;
4133                 uint8_t buffer[2048];
4134         } expand_buffer;
4135         union {
4136                 struct rte_flow_action actions[MLX5_MAX_SPLIT_ACTIONS];
4137                 uint8_t buffer[2048];
4138         } actions_rx;
4139         union {
4140                 struct rte_flow_action actions[MLX5_MAX_SPLIT_ACTIONS];
4141                 uint8_t buffer[2048];
4142         } actions_hairpin_tx;
4143         union {
4144                 struct rte_flow_item items[MLX5_MAX_SPLIT_ITEMS];
4145                 uint8_t buffer[2048];
4146         } items_tx;
4147         struct rte_flow_expand_rss *buf = &expand_buffer.buf;
4148         const struct rte_flow_action *p_actions_rx = actions;
4149         int ret;
4150         uint32_t i;
4151         uint32_t flow_size;
4152         int hairpin_flow = 0;
4153         uint32_t hairpin_id = 0;
4154         struct rte_flow_attr attr_tx = { .priority = 0 };
4155
4156         hairpin_flow = flow_check_hairpin_split(dev, attr, actions);
4157         if (hairpin_flow > 0) {
4158                 if (hairpin_flow > MLX5_MAX_SPLIT_ACTIONS) {
4159                         rte_errno = EINVAL;
4160                         return NULL;
4161                 }
4162                 flow_hairpin_split(dev, actions, actions_rx.actions,
4163                                    actions_hairpin_tx.actions, items_tx.items,
4164                                    &hairpin_id);
4165                 p_actions_rx = actions_rx.actions;
4166         }
4167         ret = flow_drv_validate(dev, attr, items, p_actions_rx, external,
4168                                 error);
4169         if (ret < 0)
4170                 goto error_before_flow;
4171         flow_size = sizeof(struct rte_flow);
4172         rss = flow_get_rss_action(p_actions_rx);
4173         if (rss)
4174                 flow_size += RTE_ALIGN_CEIL(rss->queue_num * sizeof(uint16_t),
4175                                             sizeof(void *));
4176         else
4177                 flow_size += RTE_ALIGN_CEIL(sizeof(uint16_t), sizeof(void *));
4178         flow = rte_calloc(__func__, 1, flow_size, 0);
4179         if (!flow) {
4180                 rte_errno = ENOMEM;
4181                 goto error_before_flow;
4182         }
4183         flow->drv_type = flow_get_drv_type(dev, attr);
4184         if (hairpin_id != 0)
4185                 flow->hairpin_flow_id = hairpin_id;
4186         assert(flow->drv_type > MLX5_FLOW_TYPE_MIN &&
4187                flow->drv_type < MLX5_FLOW_TYPE_MAX);
4188         flow->rss.queue = (void *)(flow + 1);
4189         if (rss) {
4190                 /*
4191                  * The following information is required by
4192                  * mlx5_flow_hashfields_adjust() in advance.
4193                  */
4194                 flow->rss.level = rss->level;
4195                 /* RSS type 0 indicates default RSS type (ETH_RSS_IP). */
4196                 flow->rss.types = !rss->types ? ETH_RSS_IP : rss->types;
4197         }
4198         LIST_INIT(&flow->dev_flows);
4199         if (rss && rss->types) {
4200                 unsigned int graph_root;
4201
4202                 graph_root = find_graph_root(items, rss->level);
4203                 ret = rte_flow_expand_rss(buf, sizeof(expand_buffer.buffer),
4204                                           items, rss->types,
4205                                           mlx5_support_expansion,
4206                                           graph_root);
4207                 assert(ret > 0 &&
4208                        (unsigned int)ret < sizeof(expand_buffer.buffer));
4209         } else {
4210                 buf->entries = 1;
4211                 buf->entry[0].pattern = (void *)(uintptr_t)items;
4212         }
4213         for (i = 0; i < buf->entries; ++i) {
4214                 /*
4215                  * The splitter may create multiple dev_flows,
4216                  * depending on configuration. In the simplest
4217                  * case it just creates unmodified original flow.
4218                  */
4219                 ret = flow_create_split_outer(dev, flow, attr,
4220                                               buf->entry[i].pattern,
4221                                               p_actions_rx, external,
4222                                               error);
4223                 if (ret < 0)
4224                         goto error;
4225         }
4226         /* Create the tx flow. */
4227         if (hairpin_flow) {
4228                 attr_tx.group = MLX5_HAIRPIN_TX_TABLE;
4229                 attr_tx.ingress = 0;
4230                 attr_tx.egress = 1;
4231                 dev_flow = flow_drv_prepare(flow, &attr_tx, items_tx.items,
4232                                             actions_hairpin_tx.actions, error);
4233                 if (!dev_flow)
4234                         goto error;
4235                 dev_flow->flow = flow;
4236                 dev_flow->external = 0;
4237                 LIST_INSERT_HEAD(&flow->dev_flows, dev_flow, next);
4238                 ret = flow_drv_translate(dev, dev_flow, &attr_tx,
4239                                          items_tx.items,
4240                                          actions_hairpin_tx.actions, error);
4241                 if (ret < 0)
4242                         goto error;
4243         }
4244         /*
4245          * Update the metadata register copy table. If extensive
4246          * metadata feature is enabled and registers are supported
4247          * we might create the extra rte_flow for each unique
4248          * MARK/FLAG action ID.
4249          *
4250          * The table is updated for ingress Flows only, because
4251          * the egress Flows belong to the different device and
4252          * copy table should be updated in peer NIC Rx domain.
4253          */
4254         if (attr->ingress &&
4255             (external || attr->group != MLX5_FLOW_MREG_CP_TABLE_GROUP)) {
4256                 ret = flow_mreg_update_copy_table(dev, flow, actions, error);
4257                 if (ret)
4258                         goto error;
4259         }
4260         if (dev->data->dev_started) {
4261                 ret = flow_drv_apply(dev, flow, error);
4262                 if (ret < 0)
4263                         goto error;
4264         }
4265         if (list)
4266                 TAILQ_INSERT_TAIL(list, flow, next);
4267         flow_rxq_flags_set(dev, flow);
4268         return flow;
4269 error_before_flow:
4270         if (hairpin_id)
4271                 mlx5_flow_id_release(priv->sh->flow_id_pool,
4272                                      hairpin_id);
4273         return NULL;
4274 error:
4275         assert(flow);
4276         flow_mreg_del_copy_action(dev, flow);
4277         ret = rte_errno; /* Save rte_errno before cleanup. */
4278         if (flow->hairpin_flow_id)
4279                 mlx5_flow_id_release(priv->sh->flow_id_pool,
4280                                      flow->hairpin_flow_id);
4281         assert(flow);
4282         flow_drv_destroy(dev, flow);
4283         rte_free(flow);
4284         rte_errno = ret; /* Restore rte_errno. */
4285         return NULL;
4286 }
4287
4288 /**
4289  * Create a dedicated flow rule on e-switch table 0 (root table), to direct all
4290  * incoming packets to table 1.
4291  *
4292  * Other flow rules, requested for group n, will be created in
4293  * e-switch table n+1.
4294  * Jump action to e-switch group n will be created to group n+1.
4295  *
4296  * Used when working in switchdev mode, to utilise advantages of table 1
4297  * and above.
4298  *
4299  * @param dev
4300  *   Pointer to Ethernet device.
4301  *
4302  * @return
4303  *   Pointer to flow on success, NULL otherwise and rte_errno is set.
4304  */
4305 struct rte_flow *
4306 mlx5_flow_create_esw_table_zero_flow(struct rte_eth_dev *dev)
4307 {
4308         const struct rte_flow_attr attr = {
4309                 .group = 0,
4310                 .priority = 0,
4311                 .ingress = 1,
4312                 .egress = 0,
4313                 .transfer = 1,
4314         };
4315         const struct rte_flow_item pattern = {
4316                 .type = RTE_FLOW_ITEM_TYPE_END,
4317         };
4318         struct rte_flow_action_jump jump = {
4319                 .group = 1,
4320         };
4321         const struct rte_flow_action actions[] = {
4322                 {
4323                         .type = RTE_FLOW_ACTION_TYPE_JUMP,
4324                         .conf = &jump,
4325                 },
4326                 {
4327                         .type = RTE_FLOW_ACTION_TYPE_END,
4328                 },
4329         };
4330         struct mlx5_priv *priv = dev->data->dev_private;
4331         struct rte_flow_error error;
4332
4333         return flow_list_create(dev, &priv->ctrl_flows, &attr, &pattern,
4334                                 actions, false, &error);
4335 }
4336
4337 /**
4338  * Create a flow.
4339  *
4340  * @see rte_flow_create()
4341  * @see rte_flow_ops
4342  */
4343 struct rte_flow *
4344 mlx5_flow_create(struct rte_eth_dev *dev,
4345                  const struct rte_flow_attr *attr,
4346                  const struct rte_flow_item items[],
4347                  const struct rte_flow_action actions[],
4348                  struct rte_flow_error *error)
4349 {
4350         struct mlx5_priv *priv = dev->data->dev_private;
4351
4352         return flow_list_create(dev, &priv->flows,
4353                                 attr, items, actions, true, error);
4354 }
4355
4356 /**
4357  * Destroy a flow in a list.
4358  *
4359  * @param dev
4360  *   Pointer to Ethernet device.
4361  * @param list
4362  *   Pointer to a TAILQ flow list. If this parameter NULL,
4363  *   there is no flow removal from the list.
4364  * @param[in] flow
4365  *   Flow to destroy.
4366  */
4367 static void
4368 flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list,
4369                   struct rte_flow *flow)
4370 {
4371         struct mlx5_priv *priv = dev->data->dev_private;
4372
4373         /*
4374          * Update RX queue flags only if port is started, otherwise it is
4375          * already clean.
4376          */
4377         if (dev->data->dev_started)
4378                 flow_rxq_flags_trim(dev, flow);
4379         if (flow->hairpin_flow_id)
4380                 mlx5_flow_id_release(priv->sh->flow_id_pool,
4381                                      flow->hairpin_flow_id);
4382         flow_drv_destroy(dev, flow);
4383         if (list)
4384                 TAILQ_REMOVE(list, flow, next);
4385         flow_mreg_del_copy_action(dev, flow);
4386         rte_free(flow->fdir);
4387         rte_free(flow);
4388 }
4389
4390 /**
4391  * Destroy all flows.
4392  *
4393  * @param dev
4394  *   Pointer to Ethernet device.
4395  * @param list
4396  *   Pointer to a TAILQ flow list.
4397  */
4398 void
4399 mlx5_flow_list_flush(struct rte_eth_dev *dev, struct mlx5_flows *list)
4400 {
4401         while (!TAILQ_EMPTY(list)) {
4402                 struct rte_flow *flow;
4403
4404                 flow = TAILQ_FIRST(list);
4405                 flow_list_destroy(dev, list, flow);
4406         }
4407 }
4408
4409 /**
4410  * Remove all flows.
4411  *
4412  * @param dev
4413  *   Pointer to Ethernet device.
4414  * @param list
4415  *   Pointer to a TAILQ flow list.
4416  */
4417 void
4418 mlx5_flow_stop(struct rte_eth_dev *dev, struct mlx5_flows *list)
4419 {
4420         struct rte_flow *flow;
4421
4422         TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {
4423                 flow_drv_remove(dev, flow);
4424                 flow_mreg_stop_copy_action(dev, flow);
4425         }
4426         flow_mreg_del_default_copy_action(dev);
4427         flow_rxq_flags_clear(dev);
4428 }
4429
4430 /**
4431  * Add all flows.
4432  *
4433  * @param dev
4434  *   Pointer to Ethernet device.
4435  * @param list
4436  *   Pointer to a TAILQ flow list.
4437  *
4438  * @return
4439  *   0 on success, a negative errno value otherwise and rte_errno is set.
4440  */
4441 int
4442 mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list)
4443 {
4444         struct rte_flow *flow;
4445         struct rte_flow_error error;
4446         int ret = 0;
4447
4448         /* Make sure default copy action (reg_c[0] -> reg_b) is created. */
4449         ret = flow_mreg_add_default_copy_action(dev, &error);
4450         if (ret < 0)
4451                 return -rte_errno;
4452         /* Apply Flows created by application. */
4453         TAILQ_FOREACH(flow, list, next) {
4454                 ret = flow_mreg_start_copy_action(dev, flow);
4455                 if (ret < 0)
4456                         goto error;
4457                 ret = flow_drv_apply(dev, flow, &error);
4458                 if (ret < 0)
4459                         goto error;
4460                 flow_rxq_flags_set(dev, flow);
4461         }
4462         return 0;
4463 error:
4464         ret = rte_errno; /* Save rte_errno before cleanup. */
4465         mlx5_flow_stop(dev, list);
4466         rte_errno = ret; /* Restore rte_errno. */
4467         return -rte_errno;
4468 }
4469
4470 /**
4471  * Verify the flow list is empty
4472  *
4473  * @param dev
4474  *  Pointer to Ethernet device.
4475  *
4476  * @return the number of flows not released.
4477  */
4478 int
4479 mlx5_flow_verify(struct rte_eth_dev *dev)
4480 {
4481         struct mlx5_priv *priv = dev->data->dev_private;
4482         struct rte_flow *flow;
4483         int ret = 0;
4484
4485         TAILQ_FOREACH(flow, &priv->flows, next) {
4486                 DRV_LOG(DEBUG, "port %u flow %p still referenced",
4487                         dev->data->port_id, (void *)flow);
4488                 ++ret;
4489         }
4490         return ret;
4491 }
4492
4493 /**
4494  * Enable default hairpin egress flow.
4495  *
4496  * @param dev
4497  *   Pointer to Ethernet device.
4498  * @param queue
4499  *   The queue index.
4500  *
4501  * @return
4502  *   0 on success, a negative errno value otherwise and rte_errno is set.
4503  */
4504 int
4505 mlx5_ctrl_flow_source_queue(struct rte_eth_dev *dev,
4506                             uint32_t queue)
4507 {
4508         struct mlx5_priv *priv = dev->data->dev_private;
4509         const struct rte_flow_attr attr = {
4510                 .egress = 1,
4511                 .priority = 0,
4512         };
4513         struct mlx5_rte_flow_item_tx_queue queue_spec = {
4514                 .queue = queue,
4515         };
4516         struct mlx5_rte_flow_item_tx_queue queue_mask = {
4517                 .queue = UINT32_MAX,
4518         };
4519         struct rte_flow_item items[] = {
4520                 {
4521                         .type = MLX5_RTE_FLOW_ITEM_TYPE_TX_QUEUE,
4522                         .spec = &queue_spec,
4523                         .last = NULL,
4524                         .mask = &queue_mask,
4525                 },
4526                 {
4527                         .type = RTE_FLOW_ITEM_TYPE_END,
4528                 },
4529         };
4530         struct rte_flow_action_jump jump = {
4531                 .group = MLX5_HAIRPIN_TX_TABLE,
4532         };
4533         struct rte_flow_action actions[2];
4534         struct rte_flow *flow;
4535         struct rte_flow_error error;
4536
4537         actions[0].type = RTE_FLOW_ACTION_TYPE_JUMP;
4538         actions[0].conf = &jump;
4539         actions[1].type = RTE_FLOW_ACTION_TYPE_END;
4540         flow = flow_list_create(dev, &priv->ctrl_flows,
4541                                 &attr, items, actions, false, &error);
4542         if (!flow) {
4543                 DRV_LOG(DEBUG,
4544                         "Failed to create ctrl flow: rte_errno(%d),"
4545                         " type(%d), message(%s)",
4546                         rte_errno, error.type,
4547                         error.message ? error.message : " (no stated reason)");
4548                 return -rte_errno;
4549         }
4550         return 0;
4551 }
4552
4553 /**
4554  * Enable a control flow configured from the control plane.
4555  *
4556  * @param dev
4557  *   Pointer to Ethernet device.
4558  * @param eth_spec
4559  *   An Ethernet flow spec to apply.
4560  * @param eth_mask
4561  *   An Ethernet flow mask to apply.
4562  * @param vlan_spec
4563  *   A VLAN flow spec to apply.
4564  * @param vlan_mask
4565  *   A VLAN flow mask to apply.
4566  *
4567  * @return
4568  *   0 on success, a negative errno value otherwise and rte_errno is set.
4569  */
4570 int
4571 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
4572                     struct rte_flow_item_eth *eth_spec,
4573                     struct rte_flow_item_eth *eth_mask,
4574                     struct rte_flow_item_vlan *vlan_spec,
4575                     struct rte_flow_item_vlan *vlan_mask)
4576 {
4577         struct mlx5_priv *priv = dev->data->dev_private;
4578         const struct rte_flow_attr attr = {
4579                 .ingress = 1,
4580                 .priority = MLX5_FLOW_PRIO_RSVD,
4581         };
4582         struct rte_flow_item items[] = {
4583                 {
4584                         .type = RTE_FLOW_ITEM_TYPE_ETH,
4585                         .spec = eth_spec,
4586                         .last = NULL,
4587                         .mask = eth_mask,
4588                 },
4589                 {
4590                         .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
4591                                               RTE_FLOW_ITEM_TYPE_END,
4592                         .spec = vlan_spec,
4593                         .last = NULL,
4594                         .mask = vlan_mask,
4595                 },
4596                 {
4597                         .type = RTE_FLOW_ITEM_TYPE_END,
4598                 },
4599         };
4600         uint16_t queue[priv->reta_idx_n];
4601         struct rte_flow_action_rss action_rss = {
4602                 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
4603                 .level = 0,
4604                 .types = priv->rss_conf.rss_hf,
4605                 .key_len = priv->rss_conf.rss_key_len,
4606                 .queue_num = priv->reta_idx_n,
4607                 .key = priv->rss_conf.rss_key,
4608                 .queue = queue,
4609         };
4610         struct rte_flow_action actions[] = {
4611                 {
4612                         .type = RTE_FLOW_ACTION_TYPE_RSS,
4613                         .conf = &action_rss,
4614                 },
4615                 {
4616                         .type = RTE_FLOW_ACTION_TYPE_END,
4617                 },
4618         };
4619         struct rte_flow *flow;
4620         struct rte_flow_error error;
4621         unsigned int i;
4622
4623         if (!priv->reta_idx_n || !priv->rxqs_n) {
4624                 return 0;
4625         }
4626         for (i = 0; i != priv->reta_idx_n; ++i)
4627                 queue[i] = (*priv->reta_idx)[i];
4628         flow = flow_list_create(dev, &priv->ctrl_flows,
4629                                 &attr, items, actions, false, &error);
4630         if (!flow)
4631                 return -rte_errno;
4632         return 0;
4633 }
4634
4635 /**
4636  * Enable a flow control configured from the control plane.
4637  *
4638  * @param dev
4639  *   Pointer to Ethernet device.
4640  * @param eth_spec
4641  *   An Ethernet flow spec to apply.
4642  * @param eth_mask
4643  *   An Ethernet flow mask to apply.
4644  *
4645  * @return
4646  *   0 on success, a negative errno value otherwise and rte_errno is set.
4647  */
4648 int
4649 mlx5_ctrl_flow(struct rte_eth_dev *dev,
4650                struct rte_flow_item_eth *eth_spec,
4651                struct rte_flow_item_eth *eth_mask)
4652 {
4653         return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
4654 }
4655
4656 /**
4657  * Destroy a flow.
4658  *
4659  * @see rte_flow_destroy()
4660  * @see rte_flow_ops
4661  */
4662 int
4663 mlx5_flow_destroy(struct rte_eth_dev *dev,
4664                   struct rte_flow *flow,
4665                   struct rte_flow_error *error __rte_unused)
4666 {
4667         struct mlx5_priv *priv = dev->data->dev_private;
4668
4669         flow_list_destroy(dev, &priv->flows, flow);
4670         return 0;
4671 }
4672
4673 /**
4674  * Destroy all flows.
4675  *
4676  * @see rte_flow_flush()
4677  * @see rte_flow_ops
4678  */
4679 int
4680 mlx5_flow_flush(struct rte_eth_dev *dev,
4681                 struct rte_flow_error *error __rte_unused)
4682 {
4683         struct mlx5_priv *priv = dev->data->dev_private;
4684
4685         mlx5_flow_list_flush(dev, &priv->flows);
4686         return 0;
4687 }
4688
4689 /**
4690  * Isolated mode.
4691  *
4692  * @see rte_flow_isolate()
4693  * @see rte_flow_ops
4694  */
4695 int
4696 mlx5_flow_isolate(struct rte_eth_dev *dev,
4697                   int enable,
4698                   struct rte_flow_error *error)
4699 {
4700         struct mlx5_priv *priv = dev->data->dev_private;
4701
4702         if (dev->data->dev_started) {
4703                 rte_flow_error_set(error, EBUSY,
4704                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
4705                                    NULL,
4706                                    "port must be stopped first");
4707                 return -rte_errno;
4708         }
4709         priv->isolated = !!enable;
4710         if (enable)
4711                 dev->dev_ops = &mlx5_dev_ops_isolate;
4712         else
4713                 dev->dev_ops = &mlx5_dev_ops;
4714         return 0;
4715 }
4716
4717 /**
4718  * Query a flow.
4719  *
4720  * @see rte_flow_query()
4721  * @see rte_flow_ops
4722  */
4723 static int
4724 flow_drv_query(struct rte_eth_dev *dev,
4725                struct rte_flow *flow,
4726                const struct rte_flow_action *actions,
4727                void *data,
4728                struct rte_flow_error *error)
4729 {
4730         const struct mlx5_flow_driver_ops *fops;
4731         enum mlx5_flow_drv_type ftype = flow->drv_type;
4732
4733         assert(ftype > MLX5_FLOW_TYPE_MIN && ftype < MLX5_FLOW_TYPE_MAX);
4734         fops = flow_get_drv_ops(ftype);
4735
4736         return fops->query(dev, flow, actions, data, error);
4737 }
4738
4739 /**
4740  * Query a flow.
4741  *
4742  * @see rte_flow_query()
4743  * @see rte_flow_ops
4744  */
4745 int
4746 mlx5_flow_query(struct rte_eth_dev *dev,
4747                 struct rte_flow *flow,
4748                 const struct rte_flow_action *actions,
4749                 void *data,
4750                 struct rte_flow_error *error)
4751 {
4752         int ret;
4753
4754         ret = flow_drv_query(dev, flow, actions, data, error);
4755         if (ret < 0)
4756                 return ret;
4757         return 0;
4758 }
4759
4760 /**
4761  * Convert a flow director filter to a generic flow.
4762  *
4763  * @param dev
4764  *   Pointer to Ethernet device.
4765  * @param fdir_filter
4766  *   Flow director filter to add.
4767  * @param attributes
4768  *   Generic flow parameters structure.
4769  *
4770  * @return
4771  *   0 on success, a negative errno value otherwise and rte_errno is set.
4772  */
4773 static int
4774 flow_fdir_filter_convert(struct rte_eth_dev *dev,
4775                          const struct rte_eth_fdir_filter *fdir_filter,
4776                          struct mlx5_fdir *attributes)
4777 {
4778         struct mlx5_priv *priv = dev->data->dev_private;
4779         const struct rte_eth_fdir_input *input = &fdir_filter->input;
4780         const struct rte_eth_fdir_masks *mask =
4781                 &dev->data->dev_conf.fdir_conf.mask;
4782
4783         /* Validate queue number. */
4784         if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
4785                 DRV_LOG(ERR, "port %u invalid queue number %d",
4786                         dev->data->port_id, fdir_filter->action.rx_queue);
4787                 rte_errno = EINVAL;
4788                 return -rte_errno;
4789         }
4790         attributes->attr.ingress = 1;
4791         attributes->items[0] = (struct rte_flow_item) {
4792                 .type = RTE_FLOW_ITEM_TYPE_ETH,
4793                 .spec = &attributes->l2,
4794                 .mask = &attributes->l2_mask,
4795         };
4796         switch (fdir_filter->action.behavior) {
4797         case RTE_ETH_FDIR_ACCEPT:
4798                 attributes->actions[0] = (struct rte_flow_action){
4799                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
4800                         .conf = &attributes->queue,
4801                 };
4802                 break;
4803         case RTE_ETH_FDIR_REJECT:
4804                 attributes->actions[0] = (struct rte_flow_action){
4805                         .type = RTE_FLOW_ACTION_TYPE_DROP,
4806                 };
4807                 break;
4808         default:
4809                 DRV_LOG(ERR, "port %u invalid behavior %d",
4810                         dev->data->port_id,
4811                         fdir_filter->action.behavior);
4812                 rte_errno = ENOTSUP;
4813                 return -rte_errno;
4814         }
4815         attributes->queue.index = fdir_filter->action.rx_queue;
4816         /* Handle L3. */
4817         switch (fdir_filter->input.flow_type) {
4818         case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
4819         case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
4820         case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
4821                 attributes->l3.ipv4.hdr = (struct rte_ipv4_hdr){
4822                         .src_addr = input->flow.ip4_flow.src_ip,
4823                         .dst_addr = input->flow.ip4_flow.dst_ip,
4824                         .time_to_live = input->flow.ip4_flow.ttl,
4825                         .type_of_service = input->flow.ip4_flow.tos,
4826                 };
4827                 attributes->l3_mask.ipv4.hdr = (struct rte_ipv4_hdr){
4828                         .src_addr = mask->ipv4_mask.src_ip,
4829                         .dst_addr = mask->ipv4_mask.dst_ip,
4830                         .time_to_live = mask->ipv4_mask.ttl,
4831                         .type_of_service = mask->ipv4_mask.tos,
4832                         .next_proto_id = mask->ipv4_mask.proto,
4833                 };
4834                 attributes->items[1] = (struct rte_flow_item){
4835                         .type = RTE_FLOW_ITEM_TYPE_IPV4,
4836                         .spec = &attributes->l3,
4837                         .mask = &attributes->l3_mask,
4838                 };
4839                 break;
4840         case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
4841         case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
4842         case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
4843                 attributes->l3.ipv6.hdr = (struct rte_ipv6_hdr){
4844                         .hop_limits = input->flow.ipv6_flow.hop_limits,
4845                         .proto = input->flow.ipv6_flow.proto,
4846                 };
4847
4848                 memcpy(attributes->l3.ipv6.hdr.src_addr,
4849                        input->flow.ipv6_flow.src_ip,
4850                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
4851                 memcpy(attributes->l3.ipv6.hdr.dst_addr,
4852                        input->flow.ipv6_flow.dst_ip,
4853                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
4854                 memcpy(attributes->l3_mask.ipv6.hdr.src_addr,
4855                        mask->ipv6_mask.src_ip,
4856                        RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
4857                 memcpy(attributes->l3_mask.ipv6.hdr.dst_addr,
4858                        mask->ipv6_mask.dst_ip,
4859                        RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
4860                 attributes->items[1] = (struct rte_flow_item){
4861                         .type = RTE_FLOW_ITEM_TYPE_IPV6,
4862                         .spec = &attributes->l3,
4863                         .mask = &attributes->l3_mask,
4864                 };
4865                 break;
4866         default:
4867                 DRV_LOG(ERR, "port %u invalid flow type%d",
4868                         dev->data->port_id, fdir_filter->input.flow_type);
4869                 rte_errno = ENOTSUP;
4870                 return -rte_errno;
4871         }
4872         /* Handle L4. */
4873         switch (fdir_filter->input.flow_type) {
4874         case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
4875                 attributes->l4.udp.hdr = (struct rte_udp_hdr){
4876                         .src_port = input->flow.udp4_flow.src_port,
4877                         .dst_port = input->flow.udp4_flow.dst_port,
4878                 };
4879                 attributes->l4_mask.udp.hdr = (struct rte_udp_hdr){
4880                         .src_port = mask->src_port_mask,
4881                         .dst_port = mask->dst_port_mask,
4882                 };
4883                 attributes->items[2] = (struct rte_flow_item){
4884                         .type = RTE_FLOW_ITEM_TYPE_UDP,
4885                         .spec = &attributes->l4,
4886                         .mask = &attributes->l4_mask,
4887                 };
4888                 break;
4889         case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
4890                 attributes->l4.tcp.hdr = (struct rte_tcp_hdr){
4891                         .src_port = input->flow.tcp4_flow.src_port,
4892                         .dst_port = input->flow.tcp4_flow.dst_port,
4893                 };
4894                 attributes->l4_mask.tcp.hdr = (struct rte_tcp_hdr){
4895                         .src_port = mask->src_port_mask,
4896                         .dst_port = mask->dst_port_mask,
4897                 };
4898                 attributes->items[2] = (struct rte_flow_item){
4899                         .type = RTE_FLOW_ITEM_TYPE_TCP,
4900                         .spec = &attributes->l4,
4901                         .mask = &attributes->l4_mask,
4902                 };
4903                 break;
4904         case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
4905                 attributes->l4.udp.hdr = (struct rte_udp_hdr){
4906                         .src_port = input->flow.udp6_flow.src_port,
4907                         .dst_port = input->flow.udp6_flow.dst_port,
4908                 };
4909                 attributes->l4_mask.udp.hdr = (struct rte_udp_hdr){
4910                         .src_port = mask->src_port_mask,
4911                         .dst_port = mask->dst_port_mask,
4912                 };
4913                 attributes->items[2] = (struct rte_flow_item){
4914                         .type = RTE_FLOW_ITEM_TYPE_UDP,
4915                         .spec = &attributes->l4,
4916                         .mask = &attributes->l4_mask,
4917                 };
4918                 break;
4919         case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
4920                 attributes->l4.tcp.hdr = (struct rte_tcp_hdr){
4921                         .src_port = input->flow.tcp6_flow.src_port,
4922                         .dst_port = input->flow.tcp6_flow.dst_port,
4923                 };
4924                 attributes->l4_mask.tcp.hdr = (struct rte_tcp_hdr){
4925                         .src_port = mask->src_port_mask,
4926                         .dst_port = mask->dst_port_mask,
4927                 };
4928                 attributes->items[2] = (struct rte_flow_item){
4929                         .type = RTE_FLOW_ITEM_TYPE_TCP,
4930                         .spec = &attributes->l4,
4931                         .mask = &attributes->l4_mask,
4932                 };
4933                 break;
4934         case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
4935         case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
4936                 break;
4937         default:
4938                 DRV_LOG(ERR, "port %u invalid flow type%d",
4939                         dev->data->port_id, fdir_filter->input.flow_type);
4940                 rte_errno = ENOTSUP;
4941                 return -rte_errno;
4942         }
4943         return 0;
4944 }
4945
4946 #define FLOW_FDIR_CMP(f1, f2, fld) \
4947         memcmp(&(f1)->fld, &(f2)->fld, sizeof(f1->fld))
4948
4949 /**
4950  * Compare two FDIR flows. If items and actions are identical, the two flows are
4951  * regarded as same.
4952  *
4953  * @param dev
4954  *   Pointer to Ethernet device.
4955  * @param f1
4956  *   FDIR flow to compare.
4957  * @param f2
4958  *   FDIR flow to compare.
4959  *
4960  * @return
4961  *   Zero on match, 1 otherwise.
4962  */
4963 static int
4964 flow_fdir_cmp(const struct mlx5_fdir *f1, const struct mlx5_fdir *f2)
4965 {
4966         if (FLOW_FDIR_CMP(f1, f2, attr) ||
4967             FLOW_FDIR_CMP(f1, f2, l2) ||
4968             FLOW_FDIR_CMP(f1, f2, l2_mask) ||
4969             FLOW_FDIR_CMP(f1, f2, l3) ||
4970             FLOW_FDIR_CMP(f1, f2, l3_mask) ||
4971             FLOW_FDIR_CMP(f1, f2, l4) ||
4972             FLOW_FDIR_CMP(f1, f2, l4_mask) ||
4973             FLOW_FDIR_CMP(f1, f2, actions[0].type))
4974                 return 1;
4975         if (f1->actions[0].type == RTE_FLOW_ACTION_TYPE_QUEUE &&
4976             FLOW_FDIR_CMP(f1, f2, queue))
4977                 return 1;
4978         return 0;
4979 }
4980
4981 /**
4982  * Search device flow list to find out a matched FDIR flow.
4983  *
4984  * @param dev
4985  *   Pointer to Ethernet device.
4986  * @param fdir_flow
4987  *   FDIR flow to lookup.
4988  *
4989  * @return
4990  *   Pointer of flow if found, NULL otherwise.
4991  */
4992 static struct rte_flow *
4993 flow_fdir_filter_lookup(struct rte_eth_dev *dev, struct mlx5_fdir *fdir_flow)
4994 {
4995         struct mlx5_priv *priv = dev->data->dev_private;
4996         struct rte_flow *flow = NULL;
4997
4998         assert(fdir_flow);
4999         TAILQ_FOREACH(flow, &priv->flows, next) {
5000                 if (flow->fdir && !flow_fdir_cmp(flow->fdir, fdir_flow)) {
5001                         DRV_LOG(DEBUG, "port %u found FDIR flow %p",
5002                                 dev->data->port_id, (void *)flow);
5003                         break;
5004                 }
5005         }
5006         return flow;
5007 }
5008
5009 /**
5010  * Add new flow director filter and store it in list.
5011  *
5012  * @param dev
5013  *   Pointer to Ethernet device.
5014  * @param fdir_filter
5015  *   Flow director filter to add.
5016  *
5017  * @return
5018  *   0 on success, a negative errno value otherwise and rte_errno is set.
5019  */
5020 static int
5021 flow_fdir_filter_add(struct rte_eth_dev *dev,
5022                      const struct rte_eth_fdir_filter *fdir_filter)
5023 {
5024         struct mlx5_priv *priv = dev->data->dev_private;
5025         struct mlx5_fdir *fdir_flow;
5026         struct rte_flow *flow;
5027         int ret;
5028
5029         fdir_flow = rte_zmalloc(__func__, sizeof(*fdir_flow), 0);
5030         if (!fdir_flow) {
5031                 rte_errno = ENOMEM;
5032                 return -rte_errno;
5033         }
5034         ret = flow_fdir_filter_convert(dev, fdir_filter, fdir_flow);
5035         if (ret)
5036                 goto error;
5037         flow = flow_fdir_filter_lookup(dev, fdir_flow);
5038         if (flow) {
5039                 rte_errno = EEXIST;
5040                 goto error;
5041         }
5042         flow = flow_list_create(dev, &priv->flows, &fdir_flow->attr,
5043                                 fdir_flow->items, fdir_flow->actions, true,
5044                                 NULL);
5045         if (!flow)
5046                 goto error;
5047         assert(!flow->fdir);
5048         flow->fdir = fdir_flow;
5049         DRV_LOG(DEBUG, "port %u created FDIR flow %p",
5050                 dev->data->port_id, (void *)flow);
5051         return 0;
5052 error:
5053         rte_free(fdir_flow);
5054         return -rte_errno;
5055 }
5056
5057 /**
5058  * Delete specific filter.
5059  *
5060  * @param dev
5061  *   Pointer to Ethernet device.
5062  * @param fdir_filter
5063  *   Filter to be deleted.
5064  *
5065  * @return
5066  *   0 on success, a negative errno value otherwise and rte_errno is set.
5067  */
5068 static int
5069 flow_fdir_filter_delete(struct rte_eth_dev *dev,
5070                         const struct rte_eth_fdir_filter *fdir_filter)
5071 {
5072         struct mlx5_priv *priv = dev->data->dev_private;
5073         struct rte_flow *flow;
5074         struct mlx5_fdir fdir_flow = {
5075                 .attr.group = 0,
5076         };
5077         int ret;
5078
5079         ret = flow_fdir_filter_convert(dev, fdir_filter, &fdir_flow);
5080         if (ret)
5081                 return -rte_errno;
5082         flow = flow_fdir_filter_lookup(dev, &fdir_flow);
5083         if (!flow) {
5084                 rte_errno = ENOENT;
5085                 return -rte_errno;
5086         }
5087         flow_list_destroy(dev, &priv->flows, flow);
5088         DRV_LOG(DEBUG, "port %u deleted FDIR flow %p",
5089                 dev->data->port_id, (void *)flow);
5090         return 0;
5091 }
5092
5093 /**
5094  * Update queue for specific filter.
5095  *
5096  * @param dev
5097  *   Pointer to Ethernet device.
5098  * @param fdir_filter
5099  *   Filter to be updated.
5100  *
5101  * @return
5102  *   0 on success, a negative errno value otherwise and rte_errno is set.
5103  */
5104 static int
5105 flow_fdir_filter_update(struct rte_eth_dev *dev,
5106                         const struct rte_eth_fdir_filter *fdir_filter)
5107 {
5108         int ret;
5109
5110         ret = flow_fdir_filter_delete(dev, fdir_filter);
5111         if (ret)
5112                 return ret;
5113         return flow_fdir_filter_add(dev, fdir_filter);
5114 }
5115
5116 /**
5117  * Flush all filters.
5118  *
5119  * @param dev
5120  *   Pointer to Ethernet device.
5121  */
5122 static void
5123 flow_fdir_filter_flush(struct rte_eth_dev *dev)
5124 {
5125         struct mlx5_priv *priv = dev->data->dev_private;
5126
5127         mlx5_flow_list_flush(dev, &priv->flows);
5128 }
5129
5130 /**
5131  * Get flow director information.
5132  *
5133  * @param dev
5134  *   Pointer to Ethernet device.
5135  * @param[out] fdir_info
5136  *   Resulting flow director information.
5137  */
5138 static void
5139 flow_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info)
5140 {
5141         struct rte_eth_fdir_masks *mask =
5142                 &dev->data->dev_conf.fdir_conf.mask;
5143
5144         fdir_info->mode = dev->data->dev_conf.fdir_conf.mode;
5145         fdir_info->guarant_spc = 0;
5146         rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
5147         fdir_info->max_flexpayload = 0;
5148         fdir_info->flow_types_mask[0] = 0;
5149         fdir_info->flex_payload_unit = 0;
5150         fdir_info->max_flex_payload_segment_num = 0;
5151         fdir_info->flex_payload_limit = 0;
5152         memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
5153 }
5154
5155 /**
5156  * Deal with flow director operations.
5157  *
5158  * @param dev
5159  *   Pointer to Ethernet device.
5160  * @param filter_op
5161  *   Operation to perform.
5162  * @param arg
5163  *   Pointer to operation-specific structure.
5164  *
5165  * @return
5166  *   0 on success, a negative errno value otherwise and rte_errno is set.
5167  */
5168 static int
5169 flow_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op,
5170                     void *arg)
5171 {
5172         enum rte_fdir_mode fdir_mode =
5173                 dev->data->dev_conf.fdir_conf.mode;
5174
5175         if (filter_op == RTE_ETH_FILTER_NOP)
5176                 return 0;
5177         if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
5178             fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
5179                 DRV_LOG(ERR, "port %u flow director mode %d not supported",
5180                         dev->data->port_id, fdir_mode);
5181                 rte_errno = EINVAL;
5182                 return -rte_errno;
5183         }
5184         switch (filter_op) {
5185         case RTE_ETH_FILTER_ADD:
5186                 return flow_fdir_filter_add(dev, arg);
5187         case RTE_ETH_FILTER_UPDATE:
5188                 return flow_fdir_filter_update(dev, arg);
5189         case RTE_ETH_FILTER_DELETE:
5190                 return flow_fdir_filter_delete(dev, arg);
5191         case RTE_ETH_FILTER_FLUSH:
5192                 flow_fdir_filter_flush(dev);
5193                 break;
5194         case RTE_ETH_FILTER_INFO:
5195                 flow_fdir_info_get(dev, arg);
5196                 break;
5197         default:
5198                 DRV_LOG(DEBUG, "port %u unknown operation %u",
5199                         dev->data->port_id, filter_op);
5200                 rte_errno = EINVAL;
5201                 return -rte_errno;
5202         }
5203         return 0;
5204 }
5205
5206 /**
5207  * Manage filter operations.
5208  *
5209  * @param dev
5210  *   Pointer to Ethernet device structure.
5211  * @param filter_type
5212  *   Filter type.
5213  * @param filter_op
5214  *   Operation to perform.
5215  * @param arg
5216  *   Pointer to operation-specific structure.
5217  *
5218  * @return
5219  *   0 on success, a negative errno value otherwise and rte_errno is set.
5220  */
5221 int
5222 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
5223                      enum rte_filter_type filter_type,
5224                      enum rte_filter_op filter_op,
5225                      void *arg)
5226 {
5227         switch (filter_type) {
5228         case RTE_ETH_FILTER_GENERIC:
5229                 if (filter_op != RTE_ETH_FILTER_GET) {
5230                         rte_errno = EINVAL;
5231                         return -rte_errno;
5232                 }
5233                 *(const void **)arg = &mlx5_flow_ops;
5234                 return 0;
5235         case RTE_ETH_FILTER_FDIR:
5236                 return flow_fdir_ctrl_func(dev, filter_op, arg);
5237         default:
5238                 DRV_LOG(ERR, "port %u filter type (%d) not supported",
5239                         dev->data->port_id, filter_type);
5240                 rte_errno = ENOTSUP;
5241                 return -rte_errno;
5242         }
5243         return 0;
5244 }
5245
5246 /**
5247  * Create the needed meter and suffix tables.
5248  *
5249  * @param[in] dev
5250  *   Pointer to Ethernet device.
5251  * @param[in] fm
5252  *   Pointer to the flow meter.
5253  *
5254  * @return
5255  *   Pointer to table set on success, NULL otherwise.
5256  */
5257 struct mlx5_meter_domains_infos *
5258 mlx5_flow_create_mtr_tbls(struct rte_eth_dev *dev,
5259                           const struct mlx5_flow_meter *fm)
5260 {
5261         const struct mlx5_flow_driver_ops *fops;
5262
5263         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
5264         return fops->create_mtr_tbls(dev, fm);
5265 }
5266
5267 /**
5268  * Destroy the meter table set.
5269  *
5270  * @param[in] dev
5271  *   Pointer to Ethernet device.
5272  * @param[in] tbl
5273  *   Pointer to the meter table set.
5274  *
5275  * @return
5276  *   0 on success.
5277  */
5278 int
5279 mlx5_flow_destroy_mtr_tbls(struct rte_eth_dev *dev,
5280                            struct mlx5_meter_domains_infos *tbls)
5281 {
5282         const struct mlx5_flow_driver_ops *fops;
5283
5284         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
5285         return fops->destroy_mtr_tbls(dev, tbls);
5286 }
5287
5288 /**
5289  * Create policer rules.
5290  *
5291  * @param[in] dev
5292  *   Pointer to Ethernet device.
5293  * @param[in] fm
5294  *   Pointer to flow meter structure.
5295  * @param[in] attr
5296  *   Pointer to flow attributes.
5297  *
5298  * @return
5299  *   0 on success, -1 otherwise.
5300  */
5301 int
5302 mlx5_flow_create_policer_rules(struct rte_eth_dev *dev,
5303                                struct mlx5_flow_meter *fm,
5304                                const struct rte_flow_attr *attr)
5305 {
5306         const struct mlx5_flow_driver_ops *fops;
5307
5308         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
5309         return fops->create_policer_rules(dev, fm, attr);
5310 }
5311
5312 /**
5313  * Destroy policer rules.
5314  *
5315  * @param[in] fm
5316  *   Pointer to flow meter structure.
5317  * @param[in] attr
5318  *   Pointer to flow attributes.
5319  *
5320  * @return
5321  *   0 on success, -1 otherwise.
5322  */
5323 int
5324 mlx5_flow_destroy_policer_rules(struct rte_eth_dev *dev,
5325                                 struct mlx5_flow_meter *fm,
5326                                 const struct rte_flow_attr *attr)
5327 {
5328         const struct mlx5_flow_driver_ops *fops;
5329
5330         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
5331         return fops->destroy_policer_rules(dev, fm, attr);
5332 }
5333
5334 /**
5335  * Allocate a counter.
5336  *
5337  * @param[in] dev
5338  *   Pointer to Ethernet device structure.
5339  *
5340  * @return
5341  *   Pointer to allocated counter  on success, NULL otherwise.
5342  */
5343 struct mlx5_flow_counter *
5344 mlx5_counter_alloc(struct rte_eth_dev *dev)
5345 {
5346         const struct mlx5_flow_driver_ops *fops;
5347         struct rte_flow_attr attr = { .transfer = 0 };
5348
5349         if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
5350                 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
5351                 return fops->counter_alloc(dev);
5352         }
5353         DRV_LOG(ERR,
5354                 "port %u counter allocate is not supported.",
5355                  dev->data->port_id);
5356         return NULL;
5357 }
5358
5359 /**
5360  * Free a counter.
5361  *
5362  * @param[in] dev
5363  *   Pointer to Ethernet device structure.
5364  * @param[in] cnt
5365  *   Pointer to counter to be free.
5366  */
5367 void
5368 mlx5_counter_free(struct rte_eth_dev *dev, struct mlx5_flow_counter *cnt)
5369 {
5370         const struct mlx5_flow_driver_ops *fops;
5371         struct rte_flow_attr attr = { .transfer = 0 };
5372
5373         if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
5374                 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
5375                 fops->counter_free(dev, cnt);
5376                 return;
5377         }
5378         DRV_LOG(ERR,
5379                 "port %u counter free is not supported.",
5380                  dev->data->port_id);
5381 }
5382
5383 /**
5384  * Query counter statistics.
5385  *
5386  * @param[in] dev
5387  *   Pointer to Ethernet device structure.
5388  * @param[in] cnt
5389  *   Pointer to counter to query.
5390  * @param[in] clear
5391  *   Set to clear counter statistics.
5392  * @param[out] pkts
5393  *   The counter hits packets number to save.
5394  * @param[out] bytes
5395  *   The counter hits bytes number to save.
5396  *
5397  * @return
5398  *   0 on success, a negative errno value otherwise.
5399  */
5400 int
5401 mlx5_counter_query(struct rte_eth_dev *dev, struct mlx5_flow_counter *cnt,
5402                    bool clear, uint64_t *pkts, uint64_t *bytes)
5403 {
5404         const struct mlx5_flow_driver_ops *fops;
5405         struct rte_flow_attr attr = { .transfer = 0 };
5406
5407         if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
5408                 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
5409                 return fops->counter_query(dev, cnt, clear, pkts, bytes);
5410         }
5411         DRV_LOG(ERR,
5412                 "port %u counter query is not supported.",
5413                  dev->data->port_id);
5414         return -ENOTSUP;
5415 }
5416
5417 #define MLX5_POOL_QUERY_FREQ_US 1000000
5418
5419 /**
5420  * Set the periodic procedure for triggering asynchronous batch queries for all
5421  * the counter pools.
5422  *
5423  * @param[in] sh
5424  *   Pointer to mlx5_ibv_shared object.
5425  */
5426 void
5427 mlx5_set_query_alarm(struct mlx5_ibv_shared *sh)
5428 {
5429         struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(sh, 0, 0);
5430         uint32_t pools_n = rte_atomic16_read(&cont->n_valid);
5431         uint32_t us;
5432
5433         cont = MLX5_CNT_CONTAINER(sh, 1, 0);
5434         pools_n += rte_atomic16_read(&cont->n_valid);
5435         us = MLX5_POOL_QUERY_FREQ_US / pools_n;
5436         DRV_LOG(DEBUG, "Set alarm for %u pools each %u us", pools_n, us);
5437         if (rte_eal_alarm_set(us, mlx5_flow_query_alarm, sh)) {
5438                 sh->cmng.query_thread_on = 0;
5439                 DRV_LOG(ERR, "Cannot reinitialize query alarm");
5440         } else {
5441                 sh->cmng.query_thread_on = 1;
5442         }
5443 }
5444
5445 /**
5446  * The periodic procedure for triggering asynchronous batch queries for all the
5447  * counter pools. This function is probably called by the host thread.
5448  *
5449  * @param[in] arg
5450  *   The parameter for the alarm process.
5451  */
5452 void
5453 mlx5_flow_query_alarm(void *arg)
5454 {
5455         struct mlx5_ibv_shared *sh = arg;
5456         struct mlx5_devx_obj *dcs;
5457         uint16_t offset;
5458         int ret;
5459         uint8_t batch = sh->cmng.batch;
5460         uint16_t pool_index = sh->cmng.pool_index;
5461         struct mlx5_pools_container *cont;
5462         struct mlx5_pools_container *mcont;
5463         struct mlx5_flow_counter_pool *pool;
5464
5465         if (sh->cmng.pending_queries >= MLX5_MAX_PENDING_QUERIES)
5466                 goto set_alarm;
5467 next_container:
5468         cont = MLX5_CNT_CONTAINER(sh, batch, 1);
5469         mcont = MLX5_CNT_CONTAINER(sh, batch, 0);
5470         /* Check if resize was done and need to flip a container. */
5471         if (cont != mcont) {
5472                 if (cont->pools) {
5473                         /* Clean the old container. */
5474                         rte_free(cont->pools);
5475                         memset(cont, 0, sizeof(*cont));
5476                 }
5477                 rte_cio_wmb();
5478                  /* Flip the host container. */
5479                 sh->cmng.mhi[batch] ^= (uint8_t)2;
5480                 cont = mcont;
5481         }
5482         if (!cont->pools) {
5483                 /* 2 empty containers case is unexpected. */
5484                 if (unlikely(batch != sh->cmng.batch))
5485                         goto set_alarm;
5486                 batch ^= 0x1;
5487                 pool_index = 0;
5488                 goto next_container;
5489         }
5490         pool = cont->pools[pool_index];
5491         if (pool->raw_hw)
5492                 /* There is a pool query in progress. */
5493                 goto set_alarm;
5494         pool->raw_hw =
5495                 LIST_FIRST(&sh->cmng.free_stat_raws);
5496         if (!pool->raw_hw)
5497                 /* No free counter statistics raw memory. */
5498                 goto set_alarm;
5499         dcs = (struct mlx5_devx_obj *)(uintptr_t)rte_atomic64_read
5500                                                               (&pool->a64_dcs);
5501         offset = batch ? 0 : dcs->id % MLX5_COUNTERS_PER_POOL;
5502         ret = mlx5_devx_cmd_flow_counter_query(dcs, 0, MLX5_COUNTERS_PER_POOL -
5503                                                offset, NULL, NULL,
5504                                                pool->raw_hw->mem_mng->dm->id,
5505                                                (void *)(uintptr_t)
5506                                                (pool->raw_hw->data + offset),
5507                                                sh->devx_comp,
5508                                                (uint64_t)(uintptr_t)pool);
5509         if (ret) {
5510                 DRV_LOG(ERR, "Failed to trigger asynchronous query for dcs ID"
5511                         " %d", pool->min_dcs->id);
5512                 pool->raw_hw = NULL;
5513                 goto set_alarm;
5514         }
5515         pool->raw_hw->min_dcs_id = dcs->id;
5516         LIST_REMOVE(pool->raw_hw, next);
5517         sh->cmng.pending_queries++;
5518         pool_index++;
5519         if (pool_index >= rte_atomic16_read(&cont->n_valid)) {
5520                 batch ^= 0x1;
5521                 pool_index = 0;
5522         }
5523 set_alarm:
5524         sh->cmng.batch = batch;
5525         sh->cmng.pool_index = pool_index;
5526         mlx5_set_query_alarm(sh);
5527 }
5528
5529 /**
5530  * Handler for the HW respond about ready values from an asynchronous batch
5531  * query. This function is probably called by the host thread.
5532  *
5533  * @param[in] sh
5534  *   The pointer to the shared IB device context.
5535  * @param[in] async_id
5536  *   The Devx async ID.
5537  * @param[in] status
5538  *   The status of the completion.
5539  */
5540 void
5541 mlx5_flow_async_pool_query_handle(struct mlx5_ibv_shared *sh,
5542                                   uint64_t async_id, int status)
5543 {
5544         struct mlx5_flow_counter_pool *pool =
5545                 (struct mlx5_flow_counter_pool *)(uintptr_t)async_id;
5546         struct mlx5_counter_stats_raw *raw_to_free;
5547
5548         if (unlikely(status)) {
5549                 raw_to_free = pool->raw_hw;
5550         } else {
5551                 raw_to_free = pool->raw;
5552                 rte_spinlock_lock(&pool->sl);
5553                 pool->raw = pool->raw_hw;
5554                 rte_spinlock_unlock(&pool->sl);
5555                 rte_atomic64_add(&pool->query_gen, 1);
5556                 /* Be sure the new raw counters data is updated in memory. */
5557                 rte_cio_wmb();
5558         }
5559         LIST_INSERT_HEAD(&sh->cmng.free_stat_raws, raw_to_free, next);
5560         pool->raw_hw = NULL;
5561         sh->cmng.pending_queries--;
5562 }
5563
5564 /**
5565  * Translate the rte_flow group index to HW table value.
5566  *
5567  * @param[in] attributes
5568  *   Pointer to flow attributes
5569  * @param[in] external
5570  *   Value is part of flow rule created by request external to PMD.
5571  * @param[in] group
5572  *   rte_flow group index value.
5573  * @param[out] table
5574  *   HW table value.
5575  * @param[out] error
5576  *   Pointer to error structure.
5577  *
5578  * @return
5579  *   0 on success, a negative errno value otherwise and rte_errno is set.
5580  */
5581 int
5582 mlx5_flow_group_to_table(const struct rte_flow_attr *attributes, bool external,
5583                          uint32_t group, uint32_t *table,
5584                          struct rte_flow_error *error)
5585 {
5586         if (attributes->transfer && external) {
5587                 if (group == UINT32_MAX)
5588                         return rte_flow_error_set
5589                                                 (error, EINVAL,
5590                                                  RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
5591                                                  NULL,
5592                                                  "group index not supported");
5593                 *table = group + 1;
5594         } else {
5595                 *table = group;
5596         }
5597         return 0;
5598 }
5599
5600 /**
5601  * Discover availability of metadata reg_c's.
5602  *
5603  * Iteratively use test flows to check availability.
5604  *
5605  * @param[in] dev
5606  *   Pointer to the Ethernet device structure.
5607  *
5608  * @return
5609  *   0 on success, a negative errno value otherwise and rte_errno is set.
5610  */
5611 int
5612 mlx5_flow_discover_mreg_c(struct rte_eth_dev *dev)
5613 {
5614         struct mlx5_priv *priv = dev->data->dev_private;
5615         struct mlx5_dev_config *config = &priv->config;
5616         enum modify_reg idx;
5617         int n = 0;
5618
5619         /* reg_c[0] and reg_c[1] are reserved. */
5620         config->flow_mreg_c[n++] = REG_C_0;
5621         config->flow_mreg_c[n++] = REG_C_1;
5622         /* Discover availability of other reg_c's. */
5623         for (idx = REG_C_2; idx <= REG_C_7; ++idx) {
5624                 struct rte_flow_attr attr = {
5625                         .group = MLX5_FLOW_MREG_CP_TABLE_GROUP,
5626                         .priority = MLX5_FLOW_PRIO_RSVD,
5627                         .ingress = 1,
5628                 };
5629                 struct rte_flow_item items[] = {
5630                         [0] = {
5631                                 .type = RTE_FLOW_ITEM_TYPE_END,
5632                         },
5633                 };
5634                 struct rte_flow_action actions[] = {
5635                         [0] = {
5636                                 .type = MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
5637                                 .conf = &(struct mlx5_flow_action_copy_mreg){
5638                                         .src = REG_C_1,
5639                                         .dst = idx,
5640                                 },
5641                         },
5642                         [1] = {
5643                                 .type = RTE_FLOW_ACTION_TYPE_JUMP,
5644                                 .conf = &(struct rte_flow_action_jump){
5645                                         .group = MLX5_FLOW_MREG_ACT_TABLE_GROUP,
5646                                 },
5647                         },
5648                         [2] = {
5649                                 .type = RTE_FLOW_ACTION_TYPE_END,
5650                         },
5651                 };
5652                 struct rte_flow *flow;
5653                 struct rte_flow_error error;
5654
5655                 if (!config->dv_flow_en)
5656                         break;
5657                 /* Create internal flow, validation skips copy action. */
5658                 flow = flow_list_create(dev, NULL, &attr, items,
5659                                         actions, false, &error);
5660                 if (!flow)
5661                         continue;
5662                 if (dev->data->dev_started || !flow_drv_apply(dev, flow, NULL))
5663                         config->flow_mreg_c[n++] = idx;
5664                 flow_list_destroy(dev, NULL, flow);
5665         }
5666         for (; n < MLX5_MREG_C_NUM; ++n)
5667                 config->flow_mreg_c[n] = REG_NONE;
5668         return 0;
5669 }