52047dbdbbbe2e4e56b83c1b9f6e7e7999cb81e3
[dpdk.git] / drivers / net / mlx5 / mlx5_flow.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2016 6WIND S.A.
3  * Copyright 2016 Mellanox Technologies, Ltd
4  */
5
6 #include <netinet/in.h>
7 #include <sys/queue.h>
8 #include <stdalign.h>
9 #include <stdint.h>
10 #include <string.h>
11 #include <stdbool.h>
12
13 /* Verbs header. */
14 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
15 #ifdef PEDANTIC
16 #pragma GCC diagnostic ignored "-Wpedantic"
17 #endif
18 #include <infiniband/verbs.h>
19 #ifdef PEDANTIC
20 #pragma GCC diagnostic error "-Wpedantic"
21 #endif
22
23 #include <rte_common.h>
24 #include <rte_ether.h>
25 #include <rte_ethdev_driver.h>
26 #include <rte_flow.h>
27 #include <rte_cycles.h>
28 #include <rte_flow_driver.h>
29 #include <rte_malloc.h>
30 #include <rte_ip.h>
31
32 #include <mlx5_devx_cmds.h>
33 #include <mlx5_prm.h>
34 #include <mlx5_malloc.h>
35
36 #include "mlx5_defs.h"
37 #include "mlx5.h"
38 #include "mlx5_flow.h"
39 #include "mlx5_flow_os.h"
40 #include "mlx5_rxtx.h"
41
42 /** Device flow drivers. */
43 extern const struct mlx5_flow_driver_ops mlx5_flow_verbs_drv_ops;
44
45 const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops;
46
47 const struct mlx5_flow_driver_ops *flow_drv_ops[] = {
48         [MLX5_FLOW_TYPE_MIN] = &mlx5_flow_null_drv_ops,
49 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
50         [MLX5_FLOW_TYPE_DV] = &mlx5_flow_dv_drv_ops,
51 #endif
52         [MLX5_FLOW_TYPE_VERBS] = &mlx5_flow_verbs_drv_ops,
53         [MLX5_FLOW_TYPE_MAX] = &mlx5_flow_null_drv_ops
54 };
55
56 enum mlx5_expansion {
57         MLX5_EXPANSION_ROOT,
58         MLX5_EXPANSION_ROOT_OUTER,
59         MLX5_EXPANSION_ROOT_ETH_VLAN,
60         MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN,
61         MLX5_EXPANSION_OUTER_ETH,
62         MLX5_EXPANSION_OUTER_ETH_VLAN,
63         MLX5_EXPANSION_OUTER_VLAN,
64         MLX5_EXPANSION_OUTER_IPV4,
65         MLX5_EXPANSION_OUTER_IPV4_UDP,
66         MLX5_EXPANSION_OUTER_IPV4_TCP,
67         MLX5_EXPANSION_OUTER_IPV6,
68         MLX5_EXPANSION_OUTER_IPV6_UDP,
69         MLX5_EXPANSION_OUTER_IPV6_TCP,
70         MLX5_EXPANSION_VXLAN,
71         MLX5_EXPANSION_VXLAN_GPE,
72         MLX5_EXPANSION_GRE,
73         MLX5_EXPANSION_MPLS,
74         MLX5_EXPANSION_ETH,
75         MLX5_EXPANSION_ETH_VLAN,
76         MLX5_EXPANSION_VLAN,
77         MLX5_EXPANSION_IPV4,
78         MLX5_EXPANSION_IPV4_UDP,
79         MLX5_EXPANSION_IPV4_TCP,
80         MLX5_EXPANSION_IPV6,
81         MLX5_EXPANSION_IPV6_UDP,
82         MLX5_EXPANSION_IPV6_TCP,
83 };
84
85 /** Supported expansion of items. */
86 static const struct rte_flow_expand_node mlx5_support_expansion[] = {
87         [MLX5_EXPANSION_ROOT] = {
88                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
89                                                  MLX5_EXPANSION_IPV4,
90                                                  MLX5_EXPANSION_IPV6),
91                 .type = RTE_FLOW_ITEM_TYPE_END,
92         },
93         [MLX5_EXPANSION_ROOT_OUTER] = {
94                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_ETH,
95                                                  MLX5_EXPANSION_OUTER_IPV4,
96                                                  MLX5_EXPANSION_OUTER_IPV6),
97                 .type = RTE_FLOW_ITEM_TYPE_END,
98         },
99         [MLX5_EXPANSION_ROOT_ETH_VLAN] = {
100                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH_VLAN),
101                 .type = RTE_FLOW_ITEM_TYPE_END,
102         },
103         [MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN] = {
104                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_ETH_VLAN),
105                 .type = RTE_FLOW_ITEM_TYPE_END,
106         },
107         [MLX5_EXPANSION_OUTER_ETH] = {
108                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_IPV4,
109                                                  MLX5_EXPANSION_OUTER_IPV6,
110                                                  MLX5_EXPANSION_MPLS),
111                 .type = RTE_FLOW_ITEM_TYPE_ETH,
112                 .rss_types = 0,
113         },
114         [MLX5_EXPANSION_OUTER_ETH_VLAN] = {
115                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_VLAN),
116                 .type = RTE_FLOW_ITEM_TYPE_ETH,
117                 .rss_types = 0,
118         },
119         [MLX5_EXPANSION_OUTER_VLAN] = {
120                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_IPV4,
121                                                  MLX5_EXPANSION_OUTER_IPV6),
122                 .type = RTE_FLOW_ITEM_TYPE_VLAN,
123         },
124         [MLX5_EXPANSION_OUTER_IPV4] = {
125                 .next = RTE_FLOW_EXPAND_RSS_NEXT
126                         (MLX5_EXPANSION_OUTER_IPV4_UDP,
127                          MLX5_EXPANSION_OUTER_IPV4_TCP,
128                          MLX5_EXPANSION_GRE,
129                          MLX5_EXPANSION_IPV4,
130                          MLX5_EXPANSION_IPV6),
131                 .type = RTE_FLOW_ITEM_TYPE_IPV4,
132                 .rss_types = ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
133                         ETH_RSS_NONFRAG_IPV4_OTHER,
134         },
135         [MLX5_EXPANSION_OUTER_IPV4_UDP] = {
136                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN,
137                                                  MLX5_EXPANSION_VXLAN_GPE),
138                 .type = RTE_FLOW_ITEM_TYPE_UDP,
139                 .rss_types = ETH_RSS_NONFRAG_IPV4_UDP,
140         },
141         [MLX5_EXPANSION_OUTER_IPV4_TCP] = {
142                 .type = RTE_FLOW_ITEM_TYPE_TCP,
143                 .rss_types = ETH_RSS_NONFRAG_IPV4_TCP,
144         },
145         [MLX5_EXPANSION_OUTER_IPV6] = {
146                 .next = RTE_FLOW_EXPAND_RSS_NEXT
147                         (MLX5_EXPANSION_OUTER_IPV6_UDP,
148                          MLX5_EXPANSION_OUTER_IPV6_TCP,
149                          MLX5_EXPANSION_IPV4,
150                          MLX5_EXPANSION_IPV6),
151                 .type = RTE_FLOW_ITEM_TYPE_IPV6,
152                 .rss_types = ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 |
153                         ETH_RSS_NONFRAG_IPV6_OTHER,
154         },
155         [MLX5_EXPANSION_OUTER_IPV6_UDP] = {
156                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN,
157                                                  MLX5_EXPANSION_VXLAN_GPE),
158                 .type = RTE_FLOW_ITEM_TYPE_UDP,
159                 .rss_types = ETH_RSS_NONFRAG_IPV6_UDP,
160         },
161         [MLX5_EXPANSION_OUTER_IPV6_TCP] = {
162                 .type = RTE_FLOW_ITEM_TYPE_TCP,
163                 .rss_types = ETH_RSS_NONFRAG_IPV6_TCP,
164         },
165         [MLX5_EXPANSION_VXLAN] = {
166                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
167                                                  MLX5_EXPANSION_IPV4,
168                                                  MLX5_EXPANSION_IPV6),
169                 .type = RTE_FLOW_ITEM_TYPE_VXLAN,
170         },
171         [MLX5_EXPANSION_VXLAN_GPE] = {
172                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
173                                                  MLX5_EXPANSION_IPV4,
174                                                  MLX5_EXPANSION_IPV6),
175                 .type = RTE_FLOW_ITEM_TYPE_VXLAN_GPE,
176         },
177         [MLX5_EXPANSION_GRE] = {
178                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4),
179                 .type = RTE_FLOW_ITEM_TYPE_GRE,
180         },
181         [MLX5_EXPANSION_MPLS] = {
182                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
183                                                  MLX5_EXPANSION_IPV6),
184                 .type = RTE_FLOW_ITEM_TYPE_MPLS,
185         },
186         [MLX5_EXPANSION_ETH] = {
187                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
188                                                  MLX5_EXPANSION_IPV6),
189                 .type = RTE_FLOW_ITEM_TYPE_ETH,
190         },
191         [MLX5_EXPANSION_ETH_VLAN] = {
192                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VLAN),
193                 .type = RTE_FLOW_ITEM_TYPE_ETH,
194         },
195         [MLX5_EXPANSION_VLAN] = {
196                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
197                                                  MLX5_EXPANSION_IPV6),
198                 .type = RTE_FLOW_ITEM_TYPE_VLAN,
199         },
200         [MLX5_EXPANSION_IPV4] = {
201                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4_UDP,
202                                                  MLX5_EXPANSION_IPV4_TCP),
203                 .type = RTE_FLOW_ITEM_TYPE_IPV4,
204                 .rss_types = ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
205                         ETH_RSS_NONFRAG_IPV4_OTHER,
206         },
207         [MLX5_EXPANSION_IPV4_UDP] = {
208                 .type = RTE_FLOW_ITEM_TYPE_UDP,
209                 .rss_types = ETH_RSS_NONFRAG_IPV4_UDP,
210         },
211         [MLX5_EXPANSION_IPV4_TCP] = {
212                 .type = RTE_FLOW_ITEM_TYPE_TCP,
213                 .rss_types = ETH_RSS_NONFRAG_IPV4_TCP,
214         },
215         [MLX5_EXPANSION_IPV6] = {
216                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV6_UDP,
217                                                  MLX5_EXPANSION_IPV6_TCP),
218                 .type = RTE_FLOW_ITEM_TYPE_IPV6,
219                 .rss_types = ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 |
220                         ETH_RSS_NONFRAG_IPV6_OTHER,
221         },
222         [MLX5_EXPANSION_IPV6_UDP] = {
223                 .type = RTE_FLOW_ITEM_TYPE_UDP,
224                 .rss_types = ETH_RSS_NONFRAG_IPV6_UDP,
225         },
226         [MLX5_EXPANSION_IPV6_TCP] = {
227                 .type = RTE_FLOW_ITEM_TYPE_TCP,
228                 .rss_types = ETH_RSS_NONFRAG_IPV6_TCP,
229         },
230 };
231
232 static const struct rte_flow_ops mlx5_flow_ops = {
233         .validate = mlx5_flow_validate,
234         .create = mlx5_flow_create,
235         .destroy = mlx5_flow_destroy,
236         .flush = mlx5_flow_flush,
237         .isolate = mlx5_flow_isolate,
238         .query = mlx5_flow_query,
239         .dev_dump = mlx5_flow_dev_dump,
240         .get_aged_flows = mlx5_flow_get_aged_flows,
241 };
242
243 /* Convert FDIR request to Generic flow. */
244 struct mlx5_fdir {
245         struct rte_flow_attr attr;
246         struct rte_flow_item items[4];
247         struct rte_flow_item_eth l2;
248         struct rte_flow_item_eth l2_mask;
249         union {
250                 struct rte_flow_item_ipv4 ipv4;
251                 struct rte_flow_item_ipv6 ipv6;
252         } l3;
253         union {
254                 struct rte_flow_item_ipv4 ipv4;
255                 struct rte_flow_item_ipv6 ipv6;
256         } l3_mask;
257         union {
258                 struct rte_flow_item_udp udp;
259                 struct rte_flow_item_tcp tcp;
260         } l4;
261         union {
262                 struct rte_flow_item_udp udp;
263                 struct rte_flow_item_tcp tcp;
264         } l4_mask;
265         struct rte_flow_action actions[2];
266         struct rte_flow_action_queue queue;
267 };
268
269 /* Tunnel information. */
270 struct mlx5_flow_tunnel_info {
271         uint64_t tunnel; /**< Tunnel bit (see MLX5_FLOW_*). */
272         uint32_t ptype; /**< Tunnel Ptype (see RTE_PTYPE_*). */
273 };
274
275 static struct mlx5_flow_tunnel_info tunnels_info[] = {
276         {
277                 .tunnel = MLX5_FLOW_LAYER_VXLAN,
278                 .ptype = RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L4_UDP,
279         },
280         {
281                 .tunnel = MLX5_FLOW_LAYER_GENEVE,
282                 .ptype = RTE_PTYPE_TUNNEL_GENEVE | RTE_PTYPE_L4_UDP,
283         },
284         {
285                 .tunnel = MLX5_FLOW_LAYER_VXLAN_GPE,
286                 .ptype = RTE_PTYPE_TUNNEL_VXLAN_GPE | RTE_PTYPE_L4_UDP,
287         },
288         {
289                 .tunnel = MLX5_FLOW_LAYER_GRE,
290                 .ptype = RTE_PTYPE_TUNNEL_GRE,
291         },
292         {
293                 .tunnel = MLX5_FLOW_LAYER_MPLS | MLX5_FLOW_LAYER_OUTER_L4_UDP,
294                 .ptype = RTE_PTYPE_TUNNEL_MPLS_IN_UDP | RTE_PTYPE_L4_UDP,
295         },
296         {
297                 .tunnel = MLX5_FLOW_LAYER_MPLS,
298                 .ptype = RTE_PTYPE_TUNNEL_MPLS_IN_GRE,
299         },
300         {
301                 .tunnel = MLX5_FLOW_LAYER_NVGRE,
302                 .ptype = RTE_PTYPE_TUNNEL_NVGRE,
303         },
304         {
305                 .tunnel = MLX5_FLOW_LAYER_IPIP,
306                 .ptype = RTE_PTYPE_TUNNEL_IP,
307         },
308         {
309                 .tunnel = MLX5_FLOW_LAYER_IPV6_ENCAP,
310                 .ptype = RTE_PTYPE_TUNNEL_IP,
311         },
312         {
313                 .tunnel = MLX5_FLOW_LAYER_GTP,
314                 .ptype = RTE_PTYPE_TUNNEL_GTPU,
315         },
316 };
317
318 /**
319  * Translate tag ID to register.
320  *
321  * @param[in] dev
322  *   Pointer to the Ethernet device structure.
323  * @param[in] feature
324  *   The feature that request the register.
325  * @param[in] id
326  *   The request register ID.
327  * @param[out] error
328  *   Error description in case of any.
329  *
330  * @return
331  *   The request register on success, a negative errno
332  *   value otherwise and rte_errno is set.
333  */
334 int
335 mlx5_flow_get_reg_id(struct rte_eth_dev *dev,
336                      enum mlx5_feature_name feature,
337                      uint32_t id,
338                      struct rte_flow_error *error)
339 {
340         struct mlx5_priv *priv = dev->data->dev_private;
341         struct mlx5_dev_config *config = &priv->config;
342         enum modify_reg start_reg;
343         bool skip_mtr_reg = false;
344
345         switch (feature) {
346         case MLX5_HAIRPIN_RX:
347                 return REG_B;
348         case MLX5_HAIRPIN_TX:
349                 return REG_A;
350         case MLX5_METADATA_RX:
351                 switch (config->dv_xmeta_en) {
352                 case MLX5_XMETA_MODE_LEGACY:
353                         return REG_B;
354                 case MLX5_XMETA_MODE_META16:
355                         return REG_C_0;
356                 case MLX5_XMETA_MODE_META32:
357                         return REG_C_1;
358                 }
359                 break;
360         case MLX5_METADATA_TX:
361                 return REG_A;
362         case MLX5_METADATA_FDB:
363                 switch (config->dv_xmeta_en) {
364                 case MLX5_XMETA_MODE_LEGACY:
365                         return REG_NONE;
366                 case MLX5_XMETA_MODE_META16:
367                         return REG_C_0;
368                 case MLX5_XMETA_MODE_META32:
369                         return REG_C_1;
370                 }
371                 break;
372         case MLX5_FLOW_MARK:
373                 switch (config->dv_xmeta_en) {
374                 case MLX5_XMETA_MODE_LEGACY:
375                         return REG_NONE;
376                 case MLX5_XMETA_MODE_META16:
377                         return REG_C_1;
378                 case MLX5_XMETA_MODE_META32:
379                         return REG_C_0;
380                 }
381                 break;
382         case MLX5_MTR_SFX:
383                 /*
384                  * If meter color and flow match share one register, flow match
385                  * should use the meter color register for match.
386                  */
387                 if (priv->mtr_reg_share)
388                         return priv->mtr_color_reg;
389                 else
390                         return priv->mtr_color_reg != REG_C_2 ? REG_C_2 :
391                                REG_C_3;
392         case MLX5_MTR_COLOR:
393                 MLX5_ASSERT(priv->mtr_color_reg != REG_NONE);
394                 return priv->mtr_color_reg;
395         case MLX5_COPY_MARK:
396                 /*
397                  * Metadata COPY_MARK register using is in meter suffix sub
398                  * flow while with meter. It's safe to share the same register.
399                  */
400                 return priv->mtr_color_reg != REG_C_2 ? REG_C_2 : REG_C_3;
401         case MLX5_APP_TAG:
402                 /*
403                  * If meter is enable, it will engage the register for color
404                  * match and flow match. If meter color match is not using the
405                  * REG_C_2, need to skip the REG_C_x be used by meter color
406                  * match.
407                  * If meter is disable, free to use all available registers.
408                  */
409                 start_reg = priv->mtr_color_reg != REG_C_2 ? REG_C_2 :
410                             (priv->mtr_reg_share ? REG_C_3 : REG_C_4);
411                 skip_mtr_reg = !!(priv->mtr_en && start_reg == REG_C_2);
412                 if (id > (REG_C_7 - start_reg))
413                         return rte_flow_error_set(error, EINVAL,
414                                                   RTE_FLOW_ERROR_TYPE_ITEM,
415                                                   NULL, "invalid tag id");
416                 if (config->flow_mreg_c[id + start_reg - REG_C_0] == REG_NONE)
417                         return rte_flow_error_set(error, ENOTSUP,
418                                                   RTE_FLOW_ERROR_TYPE_ITEM,
419                                                   NULL, "unsupported tag id");
420                 /*
421                  * This case means meter is using the REG_C_x great than 2.
422                  * Take care not to conflict with meter color REG_C_x.
423                  * If the available index REG_C_y >= REG_C_x, skip the
424                  * color register.
425                  */
426                 if (skip_mtr_reg && config->flow_mreg_c
427                     [id + start_reg - REG_C_0] >= priv->mtr_color_reg) {
428                         if (id >= (REG_C_7 - start_reg))
429                                 return rte_flow_error_set(error, EINVAL,
430                                                        RTE_FLOW_ERROR_TYPE_ITEM,
431                                                         NULL, "invalid tag id");
432                         if (config->flow_mreg_c
433                             [id + 1 + start_reg - REG_C_0] != REG_NONE)
434                                 return config->flow_mreg_c
435                                                [id + 1 + start_reg - REG_C_0];
436                         return rte_flow_error_set(error, ENOTSUP,
437                                                   RTE_FLOW_ERROR_TYPE_ITEM,
438                                                   NULL, "unsupported tag id");
439                 }
440                 return config->flow_mreg_c[id + start_reg - REG_C_0];
441         }
442         MLX5_ASSERT(false);
443         return rte_flow_error_set(error, EINVAL,
444                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
445                                   NULL, "invalid feature name");
446 }
447
448 /**
449  * Check extensive flow metadata register support.
450  *
451  * @param dev
452  *   Pointer to rte_eth_dev structure.
453  *
454  * @return
455  *   True if device supports extensive flow metadata register, otherwise false.
456  */
457 bool
458 mlx5_flow_ext_mreg_supported(struct rte_eth_dev *dev)
459 {
460         struct mlx5_priv *priv = dev->data->dev_private;
461         struct mlx5_dev_config *config = &priv->config;
462
463         /*
464          * Having available reg_c can be regarded inclusively as supporting
465          * extensive flow metadata register, which could mean,
466          * - metadata register copy action by modify header.
467          * - 16 modify header actions is supported.
468          * - reg_c's are preserved across different domain (FDB and NIC) on
469          *   packet loopback by flow lookup miss.
470          */
471         return config->flow_mreg_c[2] != REG_NONE;
472 }
473
474 /**
475  * Verify the @p item specifications (spec, last, mask) are compatible with the
476  * NIC capabilities.
477  *
478  * @param[in] item
479  *   Item specification.
480  * @param[in] mask
481  *   @p item->mask or flow default bit-masks.
482  * @param[in] nic_mask
483  *   Bit-masks covering supported fields by the NIC to compare with user mask.
484  * @param[in] size
485  *   Bit-masks size in bytes.
486  * @param[out] error
487  *   Pointer to error structure.
488  *
489  * @return
490  *   0 on success, a negative errno value otherwise and rte_errno is set.
491  */
492 int
493 mlx5_flow_item_acceptable(const struct rte_flow_item *item,
494                           const uint8_t *mask,
495                           const uint8_t *nic_mask,
496                           unsigned int size,
497                           struct rte_flow_error *error)
498 {
499         unsigned int i;
500
501         MLX5_ASSERT(nic_mask);
502         for (i = 0; i < size; ++i)
503                 if ((nic_mask[i] | mask[i]) != nic_mask[i])
504                         return rte_flow_error_set(error, ENOTSUP,
505                                                   RTE_FLOW_ERROR_TYPE_ITEM,
506                                                   item,
507                                                   "mask enables non supported"
508                                                   " bits");
509         if (!item->spec && (item->mask || item->last))
510                 return rte_flow_error_set(error, EINVAL,
511                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
512                                           "mask/last without a spec is not"
513                                           " supported");
514         if (item->spec && item->last) {
515                 uint8_t spec[size];
516                 uint8_t last[size];
517                 unsigned int i;
518                 int ret;
519
520                 for (i = 0; i < size; ++i) {
521                         spec[i] = ((const uint8_t *)item->spec)[i] & mask[i];
522                         last[i] = ((const uint8_t *)item->last)[i] & mask[i];
523                 }
524                 ret = memcmp(spec, last, size);
525                 if (ret != 0)
526                         return rte_flow_error_set(error, EINVAL,
527                                                   RTE_FLOW_ERROR_TYPE_ITEM,
528                                                   item,
529                                                   "range is not valid");
530         }
531         return 0;
532 }
533
534 /**
535  * Adjust the hash fields according to the @p flow information.
536  *
537  * @param[in] dev_flow.
538  *   Pointer to the mlx5_flow.
539  * @param[in] tunnel
540  *   1 when the hash field is for a tunnel item.
541  * @param[in] layer_types
542  *   ETH_RSS_* types.
543  * @param[in] hash_fields
544  *   Item hash fields.
545  *
546  * @return
547  *   The hash fields that should be used.
548  */
549 uint64_t
550 mlx5_flow_hashfields_adjust(struct mlx5_flow_rss_desc *rss_desc,
551                             int tunnel __rte_unused, uint64_t layer_types,
552                             uint64_t hash_fields)
553 {
554 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
555         int rss_request_inner = rss_desc->level >= 2;
556
557         /* Check RSS hash level for tunnel. */
558         if (tunnel && rss_request_inner)
559                 hash_fields |= IBV_RX_HASH_INNER;
560         else if (tunnel || rss_request_inner)
561                 return 0;
562 #endif
563         /* Check if requested layer matches RSS hash fields. */
564         if (!(rss_desc->types & layer_types))
565                 return 0;
566         return hash_fields;
567 }
568
569 /**
570  * Lookup and set the ptype in the data Rx part.  A single Ptype can be used,
571  * if several tunnel rules are used on this queue, the tunnel ptype will be
572  * cleared.
573  *
574  * @param rxq_ctrl
575  *   Rx queue to update.
576  */
577 static void
578 flow_rxq_tunnel_ptype_update(struct mlx5_rxq_ctrl *rxq_ctrl)
579 {
580         unsigned int i;
581         uint32_t tunnel_ptype = 0;
582
583         /* Look up for the ptype to use. */
584         for (i = 0; i != MLX5_FLOW_TUNNEL; ++i) {
585                 if (!rxq_ctrl->flow_tunnels_n[i])
586                         continue;
587                 if (!tunnel_ptype) {
588                         tunnel_ptype = tunnels_info[i].ptype;
589                 } else {
590                         tunnel_ptype = 0;
591                         break;
592                 }
593         }
594         rxq_ctrl->rxq.tunnel = tunnel_ptype;
595 }
596
597 /**
598  * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) according to the devive
599  * flow.
600  *
601  * @param[in] dev
602  *   Pointer to the Ethernet device structure.
603  * @param[in] dev_handle
604  *   Pointer to device flow handle structure.
605  */
606 static void
607 flow_drv_rxq_flags_set(struct rte_eth_dev *dev,
608                        struct mlx5_flow_handle *dev_handle)
609 {
610         struct mlx5_priv *priv = dev->data->dev_private;
611         const int mark = dev_handle->mark;
612         const int tunnel = !!(dev_handle->layers & MLX5_FLOW_LAYER_TUNNEL);
613         struct mlx5_hrxq *hrxq;
614         unsigned int i;
615
616         if (dev_handle->fate_action != MLX5_FLOW_FATE_QUEUE)
617                 return;
618         hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ],
619                               dev_handle->rix_hrxq);
620         if (!hrxq)
621                 return;
622         for (i = 0; i != hrxq->ind_table->queues_n; ++i) {
623                 int idx = hrxq->ind_table->queues[i];
624                 struct mlx5_rxq_ctrl *rxq_ctrl =
625                         container_of((*priv->rxqs)[idx],
626                                      struct mlx5_rxq_ctrl, rxq);
627
628                 /*
629                  * To support metadata register copy on Tx loopback,
630                  * this must be always enabled (metadata may arive
631                  * from other port - not from local flows only.
632                  */
633                 if (priv->config.dv_flow_en &&
634                     priv->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY &&
635                     mlx5_flow_ext_mreg_supported(dev)) {
636                         rxq_ctrl->rxq.mark = 1;
637                         rxq_ctrl->flow_mark_n = 1;
638                 } else if (mark) {
639                         rxq_ctrl->rxq.mark = 1;
640                         rxq_ctrl->flow_mark_n++;
641                 }
642                 if (tunnel) {
643                         unsigned int j;
644
645                         /* Increase the counter matching the flow. */
646                         for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) {
647                                 if ((tunnels_info[j].tunnel &
648                                      dev_handle->layers) ==
649                                     tunnels_info[j].tunnel) {
650                                         rxq_ctrl->flow_tunnels_n[j]++;
651                                         break;
652                                 }
653                         }
654                         flow_rxq_tunnel_ptype_update(rxq_ctrl);
655                 }
656         }
657 }
658
659 /**
660  * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) for a flow
661  *
662  * @param[in] dev
663  *   Pointer to the Ethernet device structure.
664  * @param[in] flow
665  *   Pointer to flow structure.
666  */
667 static void
668 flow_rxq_flags_set(struct rte_eth_dev *dev, struct rte_flow *flow)
669 {
670         struct mlx5_priv *priv = dev->data->dev_private;
671         uint32_t handle_idx;
672         struct mlx5_flow_handle *dev_handle;
673
674         SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles,
675                        handle_idx, dev_handle, next)
676                 flow_drv_rxq_flags_set(dev, dev_handle);
677 }
678
679 /**
680  * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the
681  * device flow if no other flow uses it with the same kind of request.
682  *
683  * @param dev
684  *   Pointer to Ethernet device.
685  * @param[in] dev_handle
686  *   Pointer to the device flow handle structure.
687  */
688 static void
689 flow_drv_rxq_flags_trim(struct rte_eth_dev *dev,
690                         struct mlx5_flow_handle *dev_handle)
691 {
692         struct mlx5_priv *priv = dev->data->dev_private;
693         const int mark = dev_handle->mark;
694         const int tunnel = !!(dev_handle->layers & MLX5_FLOW_LAYER_TUNNEL);
695         struct mlx5_hrxq *hrxq;
696         unsigned int i;
697
698         if (dev_handle->fate_action != MLX5_FLOW_FATE_QUEUE)
699                 return;
700         hrxq = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_HRXQ],
701                               dev_handle->rix_hrxq);
702         if (!hrxq)
703                 return;
704         MLX5_ASSERT(dev->data->dev_started);
705         for (i = 0; i != hrxq->ind_table->queues_n; ++i) {
706                 int idx = hrxq->ind_table->queues[i];
707                 struct mlx5_rxq_ctrl *rxq_ctrl =
708                         container_of((*priv->rxqs)[idx],
709                                      struct mlx5_rxq_ctrl, rxq);
710
711                 if (priv->config.dv_flow_en &&
712                     priv->config.dv_xmeta_en != MLX5_XMETA_MODE_LEGACY &&
713                     mlx5_flow_ext_mreg_supported(dev)) {
714                         rxq_ctrl->rxq.mark = 1;
715                         rxq_ctrl->flow_mark_n = 1;
716                 } else if (mark) {
717                         rxq_ctrl->flow_mark_n--;
718                         rxq_ctrl->rxq.mark = !!rxq_ctrl->flow_mark_n;
719                 }
720                 if (tunnel) {
721                         unsigned int j;
722
723                         /* Decrease the counter matching the flow. */
724                         for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) {
725                                 if ((tunnels_info[j].tunnel &
726                                      dev_handle->layers) ==
727                                     tunnels_info[j].tunnel) {
728                                         rxq_ctrl->flow_tunnels_n[j]--;
729                                         break;
730                                 }
731                         }
732                         flow_rxq_tunnel_ptype_update(rxq_ctrl);
733                 }
734         }
735 }
736
737 /**
738  * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the
739  * @p flow if no other flow uses it with the same kind of request.
740  *
741  * @param dev
742  *   Pointer to Ethernet device.
743  * @param[in] flow
744  *   Pointer to the flow.
745  */
746 static void
747 flow_rxq_flags_trim(struct rte_eth_dev *dev, struct rte_flow *flow)
748 {
749         struct mlx5_priv *priv = dev->data->dev_private;
750         uint32_t handle_idx;
751         struct mlx5_flow_handle *dev_handle;
752
753         SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles,
754                        handle_idx, dev_handle, next)
755                 flow_drv_rxq_flags_trim(dev, dev_handle);
756 }
757
758 /**
759  * Clear the Mark/Flag and Tunnel ptype information in all Rx queues.
760  *
761  * @param dev
762  *   Pointer to Ethernet device.
763  */
764 static void
765 flow_rxq_flags_clear(struct rte_eth_dev *dev)
766 {
767         struct mlx5_priv *priv = dev->data->dev_private;
768         unsigned int i;
769
770         for (i = 0; i != priv->rxqs_n; ++i) {
771                 struct mlx5_rxq_ctrl *rxq_ctrl;
772                 unsigned int j;
773
774                 if (!(*priv->rxqs)[i])
775                         continue;
776                 rxq_ctrl = container_of((*priv->rxqs)[i],
777                                         struct mlx5_rxq_ctrl, rxq);
778                 rxq_ctrl->flow_mark_n = 0;
779                 rxq_ctrl->rxq.mark = 0;
780                 for (j = 0; j != MLX5_FLOW_TUNNEL; ++j)
781                         rxq_ctrl->flow_tunnels_n[j] = 0;
782                 rxq_ctrl->rxq.tunnel = 0;
783         }
784 }
785
786 /**
787  * Set the Rx queue dynamic metadata (mask and offset) for a flow
788  *
789  * @param[in] dev
790  *   Pointer to the Ethernet device structure.
791  */
792 void
793 mlx5_flow_rxq_dynf_metadata_set(struct rte_eth_dev *dev)
794 {
795         struct mlx5_priv *priv = dev->data->dev_private;
796         struct mlx5_rxq_data *data;
797         unsigned int i;
798
799         for (i = 0; i != priv->rxqs_n; ++i) {
800                 if (!(*priv->rxqs)[i])
801                         continue;
802                 data = (*priv->rxqs)[i];
803                 if (!rte_flow_dynf_metadata_avail()) {
804                         data->dynf_meta = 0;
805                         data->flow_meta_mask = 0;
806                         data->flow_meta_offset = -1;
807                 } else {
808                         data->dynf_meta = 1;
809                         data->flow_meta_mask = rte_flow_dynf_metadata_mask;
810                         data->flow_meta_offset = rte_flow_dynf_metadata_offs;
811                 }
812         }
813 }
814
815 /*
816  * return a pointer to the desired action in the list of actions.
817  *
818  * @param[in] actions
819  *   The list of actions to search the action in.
820  * @param[in] action
821  *   The action to find.
822  *
823  * @return
824  *   Pointer to the action in the list, if found. NULL otherwise.
825  */
826 const struct rte_flow_action *
827 mlx5_flow_find_action(const struct rte_flow_action *actions,
828                       enum rte_flow_action_type action)
829 {
830         if (actions == NULL)
831                 return NULL;
832         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++)
833                 if (actions->type == action)
834                         return actions;
835         return NULL;
836 }
837
838 /*
839  * Validate the flag action.
840  *
841  * @param[in] action_flags
842  *   Bit-fields that holds the actions detected until now.
843  * @param[in] attr
844  *   Attributes of flow that includes this action.
845  * @param[out] error
846  *   Pointer to error structure.
847  *
848  * @return
849  *   0 on success, a negative errno value otherwise and rte_errno is set.
850  */
851 int
852 mlx5_flow_validate_action_flag(uint64_t action_flags,
853                                const struct rte_flow_attr *attr,
854                                struct rte_flow_error *error)
855 {
856         if (action_flags & MLX5_FLOW_ACTION_MARK)
857                 return rte_flow_error_set(error, EINVAL,
858                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
859                                           "can't mark and flag in same flow");
860         if (action_flags & MLX5_FLOW_ACTION_FLAG)
861                 return rte_flow_error_set(error, EINVAL,
862                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
863                                           "can't have 2 flag"
864                                           " actions in same flow");
865         if (attr->egress)
866                 return rte_flow_error_set(error, ENOTSUP,
867                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
868                                           "flag action not supported for "
869                                           "egress");
870         return 0;
871 }
872
873 /*
874  * Validate the mark action.
875  *
876  * @param[in] action
877  *   Pointer to the queue action.
878  * @param[in] action_flags
879  *   Bit-fields that holds the actions detected until now.
880  * @param[in] attr
881  *   Attributes of flow that includes this action.
882  * @param[out] error
883  *   Pointer to error structure.
884  *
885  * @return
886  *   0 on success, a negative errno value otherwise and rte_errno is set.
887  */
888 int
889 mlx5_flow_validate_action_mark(const struct rte_flow_action *action,
890                                uint64_t action_flags,
891                                const struct rte_flow_attr *attr,
892                                struct rte_flow_error *error)
893 {
894         const struct rte_flow_action_mark *mark = action->conf;
895
896         if (!mark)
897                 return rte_flow_error_set(error, EINVAL,
898                                           RTE_FLOW_ERROR_TYPE_ACTION,
899                                           action,
900                                           "configuration cannot be null");
901         if (mark->id >= MLX5_FLOW_MARK_MAX)
902                 return rte_flow_error_set(error, EINVAL,
903                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
904                                           &mark->id,
905                                           "mark id must in 0 <= id < "
906                                           RTE_STR(MLX5_FLOW_MARK_MAX));
907         if (action_flags & MLX5_FLOW_ACTION_FLAG)
908                 return rte_flow_error_set(error, EINVAL,
909                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
910                                           "can't flag and mark in same flow");
911         if (action_flags & MLX5_FLOW_ACTION_MARK)
912                 return rte_flow_error_set(error, EINVAL,
913                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
914                                           "can't have 2 mark actions in same"
915                                           " flow");
916         if (attr->egress)
917                 return rte_flow_error_set(error, ENOTSUP,
918                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
919                                           "mark action not supported for "
920                                           "egress");
921         return 0;
922 }
923
924 /*
925  * Validate the drop action.
926  *
927  * @param[in] action_flags
928  *   Bit-fields that holds the actions detected until now.
929  * @param[in] attr
930  *   Attributes of flow that includes this action.
931  * @param[out] error
932  *   Pointer to error structure.
933  *
934  * @return
935  *   0 on success, a negative errno value otherwise and rte_errno is set.
936  */
937 int
938 mlx5_flow_validate_action_drop(uint64_t action_flags __rte_unused,
939                                const struct rte_flow_attr *attr,
940                                struct rte_flow_error *error)
941 {
942         if (attr->egress)
943                 return rte_flow_error_set(error, ENOTSUP,
944                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
945                                           "drop action not supported for "
946                                           "egress");
947         return 0;
948 }
949
950 /*
951  * Validate the queue action.
952  *
953  * @param[in] action
954  *   Pointer to the queue action.
955  * @param[in] action_flags
956  *   Bit-fields that holds the actions detected until now.
957  * @param[in] dev
958  *   Pointer to the Ethernet device structure.
959  * @param[in] attr
960  *   Attributes of flow that includes this action.
961  * @param[out] error
962  *   Pointer to error structure.
963  *
964  * @return
965  *   0 on success, a negative errno value otherwise and rte_errno is set.
966  */
967 int
968 mlx5_flow_validate_action_queue(const struct rte_flow_action *action,
969                                 uint64_t action_flags,
970                                 struct rte_eth_dev *dev,
971                                 const struct rte_flow_attr *attr,
972                                 struct rte_flow_error *error)
973 {
974         struct mlx5_priv *priv = dev->data->dev_private;
975         const struct rte_flow_action_queue *queue = action->conf;
976
977         if (action_flags & MLX5_FLOW_FATE_ACTIONS)
978                 return rte_flow_error_set(error, EINVAL,
979                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
980                                           "can't have 2 fate actions in"
981                                           " same flow");
982         if (!priv->rxqs_n)
983                 return rte_flow_error_set(error, EINVAL,
984                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
985                                           NULL, "No Rx queues configured");
986         if (queue->index >= priv->rxqs_n)
987                 return rte_flow_error_set(error, EINVAL,
988                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
989                                           &queue->index,
990                                           "queue index out of range");
991         if (!(*priv->rxqs)[queue->index])
992                 return rte_flow_error_set(error, EINVAL,
993                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
994                                           &queue->index,
995                                           "queue is not configured");
996         if (attr->egress)
997                 return rte_flow_error_set(error, ENOTSUP,
998                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
999                                           "queue action not supported for "
1000                                           "egress");
1001         return 0;
1002 }
1003
1004 /*
1005  * Validate the rss action.
1006  *
1007  * @param[in] action
1008  *   Pointer to the queue action.
1009  * @param[in] action_flags
1010  *   Bit-fields that holds the actions detected until now.
1011  * @param[in] dev
1012  *   Pointer to the Ethernet device structure.
1013  * @param[in] attr
1014  *   Attributes of flow that includes this action.
1015  * @param[in] item_flags
1016  *   Items that were detected.
1017  * @param[out] error
1018  *   Pointer to error structure.
1019  *
1020  * @return
1021  *   0 on success, a negative errno value otherwise and rte_errno is set.
1022  */
1023 int
1024 mlx5_flow_validate_action_rss(const struct rte_flow_action *action,
1025                               uint64_t action_flags,
1026                               struct rte_eth_dev *dev,
1027                               const struct rte_flow_attr *attr,
1028                               uint64_t item_flags,
1029                               struct rte_flow_error *error)
1030 {
1031         struct mlx5_priv *priv = dev->data->dev_private;
1032         const struct rte_flow_action_rss *rss = action->conf;
1033         int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1034         unsigned int i;
1035
1036         if (action_flags & MLX5_FLOW_FATE_ACTIONS)
1037                 return rte_flow_error_set(error, EINVAL,
1038                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1039                                           "can't have 2 fate actions"
1040                                           " in same flow");
1041         if (rss->func != RTE_ETH_HASH_FUNCTION_DEFAULT &&
1042             rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ)
1043                 return rte_flow_error_set(error, ENOTSUP,
1044                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1045                                           &rss->func,
1046                                           "RSS hash function not supported");
1047 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
1048         if (rss->level > 2)
1049 #else
1050         if (rss->level > 1)
1051 #endif
1052                 return rte_flow_error_set(error, ENOTSUP,
1053                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1054                                           &rss->level,
1055                                           "tunnel RSS is not supported");
1056         /* allow RSS key_len 0 in case of NULL (default) RSS key. */
1057         if (rss->key_len == 0 && rss->key != NULL)
1058                 return rte_flow_error_set(error, ENOTSUP,
1059                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1060                                           &rss->key_len,
1061                                           "RSS hash key length 0");
1062         if (rss->key_len > 0 && rss->key_len < MLX5_RSS_HASH_KEY_LEN)
1063                 return rte_flow_error_set(error, ENOTSUP,
1064                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1065                                           &rss->key_len,
1066                                           "RSS hash key too small");
1067         if (rss->key_len > MLX5_RSS_HASH_KEY_LEN)
1068                 return rte_flow_error_set(error, ENOTSUP,
1069                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1070                                           &rss->key_len,
1071                                           "RSS hash key too large");
1072         if (rss->queue_num > priv->config.ind_table_max_size)
1073                 return rte_flow_error_set(error, ENOTSUP,
1074                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1075                                           &rss->queue_num,
1076                                           "number of queues too large");
1077         if (rss->types & MLX5_RSS_HF_MASK)
1078                 return rte_flow_error_set(error, ENOTSUP,
1079                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1080                                           &rss->types,
1081                                           "some RSS protocols are not"
1082                                           " supported");
1083         if ((rss->types & (ETH_RSS_L3_SRC_ONLY | ETH_RSS_L3_DST_ONLY)) &&
1084             !(rss->types & ETH_RSS_IP))
1085                 return rte_flow_error_set(error, EINVAL,
1086                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
1087                                           "L3 partial RSS requested but L3 RSS"
1088                                           " type not specified");
1089         if ((rss->types & (ETH_RSS_L4_SRC_ONLY | ETH_RSS_L4_DST_ONLY)) &&
1090             !(rss->types & (ETH_RSS_UDP | ETH_RSS_TCP)))
1091                 return rte_flow_error_set(error, EINVAL,
1092                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
1093                                           "L4 partial RSS requested but L4 RSS"
1094                                           " type not specified");
1095         if (!priv->rxqs_n)
1096                 return rte_flow_error_set(error, EINVAL,
1097                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1098                                           NULL, "No Rx queues configured");
1099         if (!rss->queue_num)
1100                 return rte_flow_error_set(error, EINVAL,
1101                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1102                                           NULL, "No queues configured");
1103         for (i = 0; i != rss->queue_num; ++i) {
1104                 if (rss->queue[i] >= priv->rxqs_n)
1105                         return rte_flow_error_set
1106                                 (error, EINVAL,
1107                                  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1108                                  &rss->queue[i], "queue index out of range");
1109                 if (!(*priv->rxqs)[rss->queue[i]])
1110                         return rte_flow_error_set
1111                                 (error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1112                                  &rss->queue[i], "queue is not configured");
1113         }
1114         if (attr->egress)
1115                 return rte_flow_error_set(error, ENOTSUP,
1116                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1117                                           "rss action not supported for "
1118                                           "egress");
1119         if (rss->level > 1 && !tunnel)
1120                 return rte_flow_error_set(error, EINVAL,
1121                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
1122                                           "inner RSS is not supported for "
1123                                           "non-tunnel flows");
1124         if ((item_flags & MLX5_FLOW_LAYER_ECPRI) &&
1125             !(item_flags & MLX5_FLOW_LAYER_INNER_L4_UDP)) {
1126                 return rte_flow_error_set(error, EINVAL,
1127                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
1128                                           "RSS on eCPRI is not supported now");
1129         }
1130         return 0;
1131 }
1132
1133 /*
1134  * Validate the default miss action.
1135  *
1136  * @param[in] action_flags
1137  *   Bit-fields that holds the actions detected until now.
1138  * @param[out] error
1139  *   Pointer to error structure.
1140  *
1141  * @return
1142  *   0 on success, a negative errno value otherwise and rte_errno is set.
1143  */
1144 int
1145 mlx5_flow_validate_action_default_miss(uint64_t action_flags,
1146                                 const struct rte_flow_attr *attr,
1147                                 struct rte_flow_error *error)
1148 {
1149         if (action_flags & MLX5_FLOW_FATE_ACTIONS)
1150                 return rte_flow_error_set(error, EINVAL,
1151                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
1152                                           "can't have 2 fate actions in"
1153                                           " same flow");
1154         if (attr->egress)
1155                 return rte_flow_error_set(error, ENOTSUP,
1156                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1157                                           "default miss action not supported "
1158                                           "for egress");
1159         if (attr->group)
1160                 return rte_flow_error_set(error, ENOTSUP,
1161                                           RTE_FLOW_ERROR_TYPE_ATTR_GROUP, NULL,
1162                                           "only group 0 is supported");
1163         if (attr->transfer)
1164                 return rte_flow_error_set(error, ENOTSUP,
1165                                           RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
1166                                           NULL, "transfer is not supported");
1167         return 0;
1168 }
1169
1170 /*
1171  * Validate the count action.
1172  *
1173  * @param[in] dev
1174  *   Pointer to the Ethernet device structure.
1175  * @param[in] attr
1176  *   Attributes of flow that includes this action.
1177  * @param[out] error
1178  *   Pointer to error structure.
1179  *
1180  * @return
1181  *   0 on success, a negative errno value otherwise and rte_errno is set.
1182  */
1183 int
1184 mlx5_flow_validate_action_count(struct rte_eth_dev *dev __rte_unused,
1185                                 const struct rte_flow_attr *attr,
1186                                 struct rte_flow_error *error)
1187 {
1188         if (attr->egress)
1189                 return rte_flow_error_set(error, ENOTSUP,
1190                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1191                                           "count action not supported for "
1192                                           "egress");
1193         return 0;
1194 }
1195
1196 /**
1197  * Verify the @p attributes will be correctly understood by the NIC and store
1198  * them in the @p flow if everything is correct.
1199  *
1200  * @param[in] dev
1201  *   Pointer to the Ethernet device structure.
1202  * @param[in] attributes
1203  *   Pointer to flow attributes
1204  * @param[out] error
1205  *   Pointer to error structure.
1206  *
1207  * @return
1208  *   0 on success, a negative errno value otherwise and rte_errno is set.
1209  */
1210 int
1211 mlx5_flow_validate_attributes(struct rte_eth_dev *dev,
1212                               const struct rte_flow_attr *attributes,
1213                               struct rte_flow_error *error)
1214 {
1215         struct mlx5_priv *priv = dev->data->dev_private;
1216         uint32_t priority_max = priv->config.flow_prio - 1;
1217
1218         if (attributes->group)
1219                 return rte_flow_error_set(error, ENOTSUP,
1220                                           RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
1221                                           NULL, "groups is not supported");
1222         if (attributes->priority != MLX5_FLOW_PRIO_RSVD &&
1223             attributes->priority >= priority_max)
1224                 return rte_flow_error_set(error, ENOTSUP,
1225                                           RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
1226                                           NULL, "priority out of range");
1227         if (attributes->egress)
1228                 return rte_flow_error_set(error, ENOTSUP,
1229                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1230                                           "egress is not supported");
1231         if (attributes->transfer && !priv->config.dv_esw_en)
1232                 return rte_flow_error_set(error, ENOTSUP,
1233                                           RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
1234                                           NULL, "transfer is not supported");
1235         if (!attributes->ingress)
1236                 return rte_flow_error_set(error, EINVAL,
1237                                           RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
1238                                           NULL,
1239                                           "ingress attribute is mandatory");
1240         return 0;
1241 }
1242
1243 /**
1244  * Validate ICMP6 item.
1245  *
1246  * @param[in] item
1247  *   Item specification.
1248  * @param[in] item_flags
1249  *   Bit-fields that holds the items detected until now.
1250  * @param[out] error
1251  *   Pointer to error structure.
1252  *
1253  * @return
1254  *   0 on success, a negative errno value otherwise and rte_errno is set.
1255  */
1256 int
1257 mlx5_flow_validate_item_icmp6(const struct rte_flow_item *item,
1258                                uint64_t item_flags,
1259                                uint8_t target_protocol,
1260                                struct rte_flow_error *error)
1261 {
1262         const struct rte_flow_item_icmp6 *mask = item->mask;
1263         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1264         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
1265                                       MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1266         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
1267                                       MLX5_FLOW_LAYER_OUTER_L4;
1268         int ret;
1269
1270         if (target_protocol != 0xFF && target_protocol != IPPROTO_ICMPV6)
1271                 return rte_flow_error_set(error, EINVAL,
1272                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1273                                           "protocol filtering not compatible"
1274                                           " with ICMP6 layer");
1275         if (!(item_flags & l3m))
1276                 return rte_flow_error_set(error, EINVAL,
1277                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1278                                           "IPv6 is mandatory to filter on"
1279                                           " ICMP6");
1280         if (item_flags & l4m)
1281                 return rte_flow_error_set(error, EINVAL,
1282                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1283                                           "multiple L4 layers not supported");
1284         if (!mask)
1285                 mask = &rte_flow_item_icmp6_mask;
1286         ret = mlx5_flow_item_acceptable
1287                 (item, (const uint8_t *)mask,
1288                  (const uint8_t *)&rte_flow_item_icmp6_mask,
1289                  sizeof(struct rte_flow_item_icmp6), error);
1290         if (ret < 0)
1291                 return ret;
1292         return 0;
1293 }
1294
1295 /**
1296  * Validate ICMP item.
1297  *
1298  * @param[in] item
1299  *   Item specification.
1300  * @param[in] item_flags
1301  *   Bit-fields that holds the items detected until now.
1302  * @param[out] error
1303  *   Pointer to error structure.
1304  *
1305  * @return
1306  *   0 on success, a negative errno value otherwise and rte_errno is set.
1307  */
1308 int
1309 mlx5_flow_validate_item_icmp(const struct rte_flow_item *item,
1310                              uint64_t item_flags,
1311                              uint8_t target_protocol,
1312                              struct rte_flow_error *error)
1313 {
1314         const struct rte_flow_item_icmp *mask = item->mask;
1315         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1316         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
1317                                       MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1318         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
1319                                       MLX5_FLOW_LAYER_OUTER_L4;
1320         int ret;
1321
1322         if (target_protocol != 0xFF && target_protocol != IPPROTO_ICMP)
1323                 return rte_flow_error_set(error, EINVAL,
1324                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1325                                           "protocol filtering not compatible"
1326                                           " with ICMP layer");
1327         if (!(item_flags & l3m))
1328                 return rte_flow_error_set(error, EINVAL,
1329                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1330                                           "IPv4 is mandatory to filter"
1331                                           " on ICMP");
1332         if (item_flags & l4m)
1333                 return rte_flow_error_set(error, EINVAL,
1334                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1335                                           "multiple L4 layers not supported");
1336         if (!mask)
1337                 mask = &rte_flow_item_icmp_mask;
1338         ret = mlx5_flow_item_acceptable
1339                 (item, (const uint8_t *)mask,
1340                  (const uint8_t *)&rte_flow_item_icmp_mask,
1341                  sizeof(struct rte_flow_item_icmp), error);
1342         if (ret < 0)
1343                 return ret;
1344         return 0;
1345 }
1346
1347 /**
1348  * Validate Ethernet item.
1349  *
1350  * @param[in] item
1351  *   Item specification.
1352  * @param[in] item_flags
1353  *   Bit-fields that holds the items detected until now.
1354  * @param[out] error
1355  *   Pointer to error structure.
1356  *
1357  * @return
1358  *   0 on success, a negative errno value otherwise and rte_errno is set.
1359  */
1360 int
1361 mlx5_flow_validate_item_eth(const struct rte_flow_item *item,
1362                             uint64_t item_flags,
1363                             struct rte_flow_error *error)
1364 {
1365         const struct rte_flow_item_eth *mask = item->mask;
1366         const struct rte_flow_item_eth nic_mask = {
1367                 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1368                 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1369                 .type = RTE_BE16(0xffff),
1370         };
1371         int ret;
1372         int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1373         const uint64_t ethm = tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
1374                                        MLX5_FLOW_LAYER_OUTER_L2;
1375
1376         if (item_flags & ethm)
1377                 return rte_flow_error_set(error, ENOTSUP,
1378                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1379                                           "multiple L2 layers not supported");
1380         if ((!tunnel && (item_flags & MLX5_FLOW_LAYER_OUTER_L3)) ||
1381             (tunnel && (item_flags & MLX5_FLOW_LAYER_INNER_L3)))
1382                 return rte_flow_error_set(error, EINVAL,
1383                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1384                                           "L2 layer should not follow "
1385                                           "L3 layers");
1386         if ((!tunnel && (item_flags & MLX5_FLOW_LAYER_OUTER_VLAN)) ||
1387             (tunnel && (item_flags & MLX5_FLOW_LAYER_INNER_VLAN)))
1388                 return rte_flow_error_set(error, EINVAL,
1389                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1390                                           "L2 layer should not follow VLAN");
1391         if (!mask)
1392                 mask = &rte_flow_item_eth_mask;
1393         ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
1394                                         (const uint8_t *)&nic_mask,
1395                                         sizeof(struct rte_flow_item_eth),
1396                                         error);
1397         return ret;
1398 }
1399
1400 /**
1401  * Validate VLAN item.
1402  *
1403  * @param[in] item
1404  *   Item specification.
1405  * @param[in] item_flags
1406  *   Bit-fields that holds the items detected until now.
1407  * @param[in] dev
1408  *   Ethernet device flow is being created on.
1409  * @param[out] error
1410  *   Pointer to error structure.
1411  *
1412  * @return
1413  *   0 on success, a negative errno value otherwise and rte_errno is set.
1414  */
1415 int
1416 mlx5_flow_validate_item_vlan(const struct rte_flow_item *item,
1417                              uint64_t item_flags,
1418                              struct rte_eth_dev *dev,
1419                              struct rte_flow_error *error)
1420 {
1421         const struct rte_flow_item_vlan *spec = item->spec;
1422         const struct rte_flow_item_vlan *mask = item->mask;
1423         const struct rte_flow_item_vlan nic_mask = {
1424                 .tci = RTE_BE16(UINT16_MAX),
1425                 .inner_type = RTE_BE16(UINT16_MAX),
1426         };
1427         uint16_t vlan_tag = 0;
1428         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1429         int ret;
1430         const uint64_t l34m = tunnel ? (MLX5_FLOW_LAYER_INNER_L3 |
1431                                         MLX5_FLOW_LAYER_INNER_L4) :
1432                                        (MLX5_FLOW_LAYER_OUTER_L3 |
1433                                         MLX5_FLOW_LAYER_OUTER_L4);
1434         const uint64_t vlanm = tunnel ? MLX5_FLOW_LAYER_INNER_VLAN :
1435                                         MLX5_FLOW_LAYER_OUTER_VLAN;
1436
1437         if (item_flags & vlanm)
1438                 return rte_flow_error_set(error, EINVAL,
1439                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1440                                           "multiple VLAN layers not supported");
1441         else if ((item_flags & l34m) != 0)
1442                 return rte_flow_error_set(error, EINVAL,
1443                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1444                                           "VLAN cannot follow L3/L4 layer");
1445         if (!mask)
1446                 mask = &rte_flow_item_vlan_mask;
1447         ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
1448                                         (const uint8_t *)&nic_mask,
1449                                         sizeof(struct rte_flow_item_vlan),
1450                                         error);
1451         if (ret)
1452                 return ret;
1453         if (!tunnel && mask->tci != RTE_BE16(0x0fff)) {
1454                 struct mlx5_priv *priv = dev->data->dev_private;
1455
1456                 if (priv->vmwa_context) {
1457                         /*
1458                          * Non-NULL context means we have a virtual machine
1459                          * and SR-IOV enabled, we have to create VLAN interface
1460                          * to make hypervisor to setup E-Switch vport
1461                          * context correctly. We avoid creating the multiple
1462                          * VLAN interfaces, so we cannot support VLAN tag mask.
1463                          */
1464                         return rte_flow_error_set(error, EINVAL,
1465                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1466                                                   item,
1467                                                   "VLAN tag mask is not"
1468                                                   " supported in virtual"
1469                                                   " environment");
1470                 }
1471         }
1472         if (spec) {
1473                 vlan_tag = spec->tci;
1474                 vlan_tag &= mask->tci;
1475         }
1476         /*
1477          * From verbs perspective an empty VLAN is equivalent
1478          * to a packet without VLAN layer.
1479          */
1480         if (!vlan_tag)
1481                 return rte_flow_error_set(error, EINVAL,
1482                                           RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
1483                                           item->spec,
1484                                           "VLAN cannot be empty");
1485         return 0;
1486 }
1487
1488 /**
1489  * Validate IPV4 item.
1490  *
1491  * @param[in] item
1492  *   Item specification.
1493  * @param[in] item_flags
1494  *   Bit-fields that holds the items detected until now.
1495  * @param[in] last_item
1496  *   Previous validated item in the pattern items.
1497  * @param[in] ether_type
1498  *   Type in the ethernet layer header (including dot1q).
1499  * @param[in] acc_mask
1500  *   Acceptable mask, if NULL default internal default mask
1501  *   will be used to check whether item fields are supported.
1502  * @param[out] error
1503  *   Pointer to error structure.
1504  *
1505  * @return
1506  *   0 on success, a negative errno value otherwise and rte_errno is set.
1507  */
1508 int
1509 mlx5_flow_validate_item_ipv4(const struct rte_flow_item *item,
1510                              uint64_t item_flags,
1511                              uint64_t last_item,
1512                              uint16_t ether_type,
1513                              const struct rte_flow_item_ipv4 *acc_mask,
1514                              struct rte_flow_error *error)
1515 {
1516         const struct rte_flow_item_ipv4 *mask = item->mask;
1517         const struct rte_flow_item_ipv4 *spec = item->spec;
1518         const struct rte_flow_item_ipv4 nic_mask = {
1519                 .hdr = {
1520                         .src_addr = RTE_BE32(0xffffffff),
1521                         .dst_addr = RTE_BE32(0xffffffff),
1522                         .type_of_service = 0xff,
1523                         .next_proto_id = 0xff,
1524                 },
1525         };
1526         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1527         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
1528                                       MLX5_FLOW_LAYER_OUTER_L3;
1529         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
1530                                       MLX5_FLOW_LAYER_OUTER_L4;
1531         int ret;
1532         uint8_t next_proto = 0xFF;
1533         const uint64_t l2_vlan = (MLX5_FLOW_LAYER_L2 |
1534                                   MLX5_FLOW_LAYER_OUTER_VLAN |
1535                                   MLX5_FLOW_LAYER_INNER_VLAN);
1536
1537         if ((last_item & l2_vlan) && ether_type &&
1538             ether_type != RTE_ETHER_TYPE_IPV4)
1539                 return rte_flow_error_set(error, EINVAL,
1540                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1541                                           "IPv4 cannot follow L2/VLAN layer "
1542                                           "which ether type is not IPv4");
1543         if (item_flags & MLX5_FLOW_LAYER_IPIP) {
1544                 if (mask && spec)
1545                         next_proto = mask->hdr.next_proto_id &
1546                                      spec->hdr.next_proto_id;
1547                 if (next_proto == IPPROTO_IPIP || next_proto == IPPROTO_IPV6)
1548                         return rte_flow_error_set(error, EINVAL,
1549                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1550                                                   item,
1551                                                   "multiple tunnel "
1552                                                   "not supported");
1553         }
1554         if (item_flags & MLX5_FLOW_LAYER_IPV6_ENCAP)
1555                 return rte_flow_error_set(error, EINVAL,
1556                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1557                                           "wrong tunnel type - IPv6 specified "
1558                                           "but IPv4 item provided");
1559         if (item_flags & l3m)
1560                 return rte_flow_error_set(error, ENOTSUP,
1561                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1562                                           "multiple L3 layers not supported");
1563         else if (item_flags & l4m)
1564                 return rte_flow_error_set(error, EINVAL,
1565                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1566                                           "L3 cannot follow an L4 layer.");
1567         else if ((item_flags & MLX5_FLOW_LAYER_NVGRE) &&
1568                   !(item_flags & MLX5_FLOW_LAYER_INNER_L2))
1569                 return rte_flow_error_set(error, EINVAL,
1570                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1571                                           "L3 cannot follow an NVGRE layer.");
1572         if (!mask)
1573                 mask = &rte_flow_item_ipv4_mask;
1574         else if (mask->hdr.next_proto_id != 0 &&
1575                  mask->hdr.next_proto_id != 0xff)
1576                 return rte_flow_error_set(error, EINVAL,
1577                                           RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
1578                                           "partial mask is not supported"
1579                                           " for protocol");
1580         ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
1581                                         acc_mask ? (const uint8_t *)acc_mask
1582                                                  : (const uint8_t *)&nic_mask,
1583                                         sizeof(struct rte_flow_item_ipv4),
1584                                         error);
1585         if (ret < 0)
1586                 return ret;
1587         return 0;
1588 }
1589
1590 /**
1591  * Validate IPV6 item.
1592  *
1593  * @param[in] item
1594  *   Item specification.
1595  * @param[in] item_flags
1596  *   Bit-fields that holds the items detected until now.
1597  * @param[in] last_item
1598  *   Previous validated item in the pattern items.
1599  * @param[in] ether_type
1600  *   Type in the ethernet layer header (including dot1q).
1601  * @param[in] acc_mask
1602  *   Acceptable mask, if NULL default internal default mask
1603  *   will be used to check whether item fields are supported.
1604  * @param[out] error
1605  *   Pointer to error structure.
1606  *
1607  * @return
1608  *   0 on success, a negative errno value otherwise and rte_errno is set.
1609  */
1610 int
1611 mlx5_flow_validate_item_ipv6(const struct rte_flow_item *item,
1612                              uint64_t item_flags,
1613                              uint64_t last_item,
1614                              uint16_t ether_type,
1615                              const struct rte_flow_item_ipv6 *acc_mask,
1616                              struct rte_flow_error *error)
1617 {
1618         const struct rte_flow_item_ipv6 *mask = item->mask;
1619         const struct rte_flow_item_ipv6 *spec = item->spec;
1620         const struct rte_flow_item_ipv6 nic_mask = {
1621                 .hdr = {
1622                         .src_addr =
1623                                 "\xff\xff\xff\xff\xff\xff\xff\xff"
1624                                 "\xff\xff\xff\xff\xff\xff\xff\xff",
1625                         .dst_addr =
1626                                 "\xff\xff\xff\xff\xff\xff\xff\xff"
1627                                 "\xff\xff\xff\xff\xff\xff\xff\xff",
1628                         .vtc_flow = RTE_BE32(0xffffffff),
1629                         .proto = 0xff,
1630                 },
1631         };
1632         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1633         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
1634                                       MLX5_FLOW_LAYER_OUTER_L3;
1635         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
1636                                       MLX5_FLOW_LAYER_OUTER_L4;
1637         int ret;
1638         uint8_t next_proto = 0xFF;
1639         const uint64_t l2_vlan = (MLX5_FLOW_LAYER_L2 |
1640                                   MLX5_FLOW_LAYER_OUTER_VLAN |
1641                                   MLX5_FLOW_LAYER_INNER_VLAN);
1642
1643         if ((last_item & l2_vlan) && ether_type &&
1644             ether_type != RTE_ETHER_TYPE_IPV6)
1645                 return rte_flow_error_set(error, EINVAL,
1646                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1647                                           "IPv6 cannot follow L2/VLAN layer "
1648                                           "which ether type is not IPv6");
1649         if (item_flags & MLX5_FLOW_LAYER_IPV6_ENCAP) {
1650                 if (mask && spec)
1651                         next_proto = mask->hdr.proto & spec->hdr.proto;
1652                 if (next_proto == IPPROTO_IPIP || next_proto == IPPROTO_IPV6)
1653                         return rte_flow_error_set(error, EINVAL,
1654                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1655                                                   item,
1656                                                   "multiple tunnel "
1657                                                   "not supported");
1658         }
1659         if (item_flags & MLX5_FLOW_LAYER_IPIP)
1660                 return rte_flow_error_set(error, EINVAL,
1661                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1662                                           "wrong tunnel type - IPv4 specified "
1663                                           "but IPv6 item provided");
1664         if (item_flags & l3m)
1665                 return rte_flow_error_set(error, ENOTSUP,
1666                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1667                                           "multiple L3 layers not supported");
1668         else if (item_flags & l4m)
1669                 return rte_flow_error_set(error, EINVAL,
1670                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1671                                           "L3 cannot follow an L4 layer.");
1672         else if ((item_flags & MLX5_FLOW_LAYER_NVGRE) &&
1673                   !(item_flags & MLX5_FLOW_LAYER_INNER_L2))
1674                 return rte_flow_error_set(error, EINVAL,
1675                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1676                                           "L3 cannot follow an NVGRE layer.");
1677         if (!mask)
1678                 mask = &rte_flow_item_ipv6_mask;
1679         ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
1680                                         acc_mask ? (const uint8_t *)acc_mask
1681                                                  : (const uint8_t *)&nic_mask,
1682                                         sizeof(struct rte_flow_item_ipv6),
1683                                         error);
1684         if (ret < 0)
1685                 return ret;
1686         return 0;
1687 }
1688
1689 /**
1690  * Validate UDP item.
1691  *
1692  * @param[in] item
1693  *   Item specification.
1694  * @param[in] item_flags
1695  *   Bit-fields that holds the items detected until now.
1696  * @param[in] target_protocol
1697  *   The next protocol in the previous item.
1698  * @param[in] flow_mask
1699  *   mlx5 flow-specific (DV, verbs, etc.) supported header fields mask.
1700  * @param[out] error
1701  *   Pointer to error structure.
1702  *
1703  * @return
1704  *   0 on success, a negative errno value otherwise and rte_errno is set.
1705  */
1706 int
1707 mlx5_flow_validate_item_udp(const struct rte_flow_item *item,
1708                             uint64_t item_flags,
1709                             uint8_t target_protocol,
1710                             struct rte_flow_error *error)
1711 {
1712         const struct rte_flow_item_udp *mask = item->mask;
1713         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1714         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
1715                                       MLX5_FLOW_LAYER_OUTER_L3;
1716         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
1717                                       MLX5_FLOW_LAYER_OUTER_L4;
1718         int ret;
1719
1720         if (target_protocol != 0xff && target_protocol != IPPROTO_UDP)
1721                 return rte_flow_error_set(error, EINVAL,
1722                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1723                                           "protocol filtering not compatible"
1724                                           " with UDP layer");
1725         if (!(item_flags & l3m))
1726                 return rte_flow_error_set(error, EINVAL,
1727                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1728                                           "L3 is mandatory to filter on L4");
1729         if (item_flags & l4m)
1730                 return rte_flow_error_set(error, EINVAL,
1731                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1732                                           "multiple L4 layers not supported");
1733         if (!mask)
1734                 mask = &rte_flow_item_udp_mask;
1735         ret = mlx5_flow_item_acceptable
1736                 (item, (const uint8_t *)mask,
1737                  (const uint8_t *)&rte_flow_item_udp_mask,
1738                  sizeof(struct rte_flow_item_udp), error);
1739         if (ret < 0)
1740                 return ret;
1741         return 0;
1742 }
1743
1744 /**
1745  * Validate TCP item.
1746  *
1747  * @param[in] item
1748  *   Item specification.
1749  * @param[in] item_flags
1750  *   Bit-fields that holds the items detected until now.
1751  * @param[in] target_protocol
1752  *   The next protocol in the previous item.
1753  * @param[out] error
1754  *   Pointer to error structure.
1755  *
1756  * @return
1757  *   0 on success, a negative errno value otherwise and rte_errno is set.
1758  */
1759 int
1760 mlx5_flow_validate_item_tcp(const struct rte_flow_item *item,
1761                             uint64_t item_flags,
1762                             uint8_t target_protocol,
1763                             const struct rte_flow_item_tcp *flow_mask,
1764                             struct rte_flow_error *error)
1765 {
1766         const struct rte_flow_item_tcp *mask = item->mask;
1767         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1768         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
1769                                       MLX5_FLOW_LAYER_OUTER_L3;
1770         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
1771                                       MLX5_FLOW_LAYER_OUTER_L4;
1772         int ret;
1773
1774         MLX5_ASSERT(flow_mask);
1775         if (target_protocol != 0xff && target_protocol != IPPROTO_TCP)
1776                 return rte_flow_error_set(error, EINVAL,
1777                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1778                                           "protocol filtering not compatible"
1779                                           " with TCP layer");
1780         if (!(item_flags & l3m))
1781                 return rte_flow_error_set(error, EINVAL,
1782                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1783                                           "L3 is mandatory to filter on L4");
1784         if (item_flags & l4m)
1785                 return rte_flow_error_set(error, EINVAL,
1786                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1787                                           "multiple L4 layers not supported");
1788         if (!mask)
1789                 mask = &rte_flow_item_tcp_mask;
1790         ret = mlx5_flow_item_acceptable
1791                 (item, (const uint8_t *)mask,
1792                  (const uint8_t *)flow_mask,
1793                  sizeof(struct rte_flow_item_tcp), error);
1794         if (ret < 0)
1795                 return ret;
1796         return 0;
1797 }
1798
1799 /**
1800  * Validate VXLAN item.
1801  *
1802  * @param[in] item
1803  *   Item specification.
1804  * @param[in] item_flags
1805  *   Bit-fields that holds the items detected until now.
1806  * @param[in] target_protocol
1807  *   The next protocol in the previous item.
1808  * @param[out] error
1809  *   Pointer to error structure.
1810  *
1811  * @return
1812  *   0 on success, a negative errno value otherwise and rte_errno is set.
1813  */
1814 int
1815 mlx5_flow_validate_item_vxlan(const struct rte_flow_item *item,
1816                               uint64_t item_flags,
1817                               struct rte_flow_error *error)
1818 {
1819         const struct rte_flow_item_vxlan *spec = item->spec;
1820         const struct rte_flow_item_vxlan *mask = item->mask;
1821         int ret;
1822         union vni {
1823                 uint32_t vlan_id;
1824                 uint8_t vni[4];
1825         } id = { .vlan_id = 0, };
1826
1827
1828         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
1829                 return rte_flow_error_set(error, ENOTSUP,
1830                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1831                                           "multiple tunnel layers not"
1832                                           " supported");
1833         /*
1834          * Verify only UDPv4 is present as defined in
1835          * https://tools.ietf.org/html/rfc7348
1836          */
1837         if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
1838                 return rte_flow_error_set(error, EINVAL,
1839                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1840                                           "no outer UDP layer found");
1841         if (!mask)
1842                 mask = &rte_flow_item_vxlan_mask;
1843         ret = mlx5_flow_item_acceptable
1844                 (item, (const uint8_t *)mask,
1845                  (const uint8_t *)&rte_flow_item_vxlan_mask,
1846                  sizeof(struct rte_flow_item_vxlan),
1847                  error);
1848         if (ret < 0)
1849                 return ret;
1850         if (spec) {
1851                 memcpy(&id.vni[1], spec->vni, 3);
1852                 memcpy(&id.vni[1], mask->vni, 3);
1853         }
1854         if (!(item_flags & MLX5_FLOW_LAYER_OUTER))
1855                 return rte_flow_error_set(error, ENOTSUP,
1856                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1857                                           "VXLAN tunnel must be fully defined");
1858         return 0;
1859 }
1860
1861 /**
1862  * Validate VXLAN_GPE item.
1863  *
1864  * @param[in] item
1865  *   Item specification.
1866  * @param[in] item_flags
1867  *   Bit-fields that holds the items detected until now.
1868  * @param[in] priv
1869  *   Pointer to the private data structure.
1870  * @param[in] target_protocol
1871  *   The next protocol in the previous item.
1872  * @param[out] error
1873  *   Pointer to error structure.
1874  *
1875  * @return
1876  *   0 on success, a negative errno value otherwise and rte_errno is set.
1877  */
1878 int
1879 mlx5_flow_validate_item_vxlan_gpe(const struct rte_flow_item *item,
1880                                   uint64_t item_flags,
1881                                   struct rte_eth_dev *dev,
1882                                   struct rte_flow_error *error)
1883 {
1884         struct mlx5_priv *priv = dev->data->dev_private;
1885         const struct rte_flow_item_vxlan_gpe *spec = item->spec;
1886         const struct rte_flow_item_vxlan_gpe *mask = item->mask;
1887         int ret;
1888         union vni {
1889                 uint32_t vlan_id;
1890                 uint8_t vni[4];
1891         } id = { .vlan_id = 0, };
1892
1893         if (!priv->config.l3_vxlan_en)
1894                 return rte_flow_error_set(error, ENOTSUP,
1895                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1896                                           "L3 VXLAN is not enabled by device"
1897                                           " parameter and/or not configured in"
1898                                           " firmware");
1899         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
1900                 return rte_flow_error_set(error, ENOTSUP,
1901                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1902                                           "multiple tunnel layers not"
1903                                           " supported");
1904         /*
1905          * Verify only UDPv4 is present as defined in
1906          * https://tools.ietf.org/html/rfc7348
1907          */
1908         if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
1909                 return rte_flow_error_set(error, EINVAL,
1910                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1911                                           "no outer UDP layer found");
1912         if (!mask)
1913                 mask = &rte_flow_item_vxlan_gpe_mask;
1914         ret = mlx5_flow_item_acceptable
1915                 (item, (const uint8_t *)mask,
1916                  (const uint8_t *)&rte_flow_item_vxlan_gpe_mask,
1917                  sizeof(struct rte_flow_item_vxlan_gpe),
1918                  error);
1919         if (ret < 0)
1920                 return ret;
1921         if (spec) {
1922                 if (spec->protocol)
1923                         return rte_flow_error_set(error, ENOTSUP,
1924                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1925                                                   item,
1926                                                   "VxLAN-GPE protocol"
1927                                                   " not supported");
1928                 memcpy(&id.vni[1], spec->vni, 3);
1929                 memcpy(&id.vni[1], mask->vni, 3);
1930         }
1931         if (!(item_flags & MLX5_FLOW_LAYER_OUTER))
1932                 return rte_flow_error_set(error, ENOTSUP,
1933                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1934                                           "VXLAN-GPE tunnel must be fully"
1935                                           " defined");
1936         return 0;
1937 }
1938 /**
1939  * Validate GRE Key item.
1940  *
1941  * @param[in] item
1942  *   Item specification.
1943  * @param[in] item_flags
1944  *   Bit flags to mark detected items.
1945  * @param[in] gre_item
1946  *   Pointer to gre_item
1947  * @param[out] error
1948  *   Pointer to error structure.
1949  *
1950  * @return
1951  *   0 on success, a negative errno value otherwise and rte_errno is set.
1952  */
1953 int
1954 mlx5_flow_validate_item_gre_key(const struct rte_flow_item *item,
1955                                 uint64_t item_flags,
1956                                 const struct rte_flow_item *gre_item,
1957                                 struct rte_flow_error *error)
1958 {
1959         const rte_be32_t *mask = item->mask;
1960         int ret = 0;
1961         rte_be32_t gre_key_default_mask = RTE_BE32(UINT32_MAX);
1962         const struct rte_flow_item_gre *gre_spec;
1963         const struct rte_flow_item_gre *gre_mask;
1964
1965         if (item_flags & MLX5_FLOW_LAYER_GRE_KEY)
1966                 return rte_flow_error_set(error, ENOTSUP,
1967                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1968                                           "Multiple GRE key not support");
1969         if (!(item_flags & MLX5_FLOW_LAYER_GRE))
1970                 return rte_flow_error_set(error, ENOTSUP,
1971                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1972                                           "No preceding GRE header");
1973         if (item_flags & MLX5_FLOW_LAYER_INNER)
1974                 return rte_flow_error_set(error, ENOTSUP,
1975                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1976                                           "GRE key following a wrong item");
1977         gre_mask = gre_item->mask;
1978         if (!gre_mask)
1979                 gre_mask = &rte_flow_item_gre_mask;
1980         gre_spec = gre_item->spec;
1981         if (gre_spec && (gre_mask->c_rsvd0_ver & RTE_BE16(0x2000)) &&
1982                          !(gre_spec->c_rsvd0_ver & RTE_BE16(0x2000)))
1983                 return rte_flow_error_set(error, EINVAL,
1984                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1985                                           "Key bit must be on");
1986
1987         if (!mask)
1988                 mask = &gre_key_default_mask;
1989         ret = mlx5_flow_item_acceptable
1990                 (item, (const uint8_t *)mask,
1991                  (const uint8_t *)&gre_key_default_mask,
1992                  sizeof(rte_be32_t), error);
1993         return ret;
1994 }
1995
1996 /**
1997  * Validate GRE item.
1998  *
1999  * @param[in] item
2000  *   Item specification.
2001  * @param[in] item_flags
2002  *   Bit flags to mark detected items.
2003  * @param[in] target_protocol
2004  *   The next protocol in the previous item.
2005  * @param[out] error
2006  *   Pointer to error structure.
2007  *
2008  * @return
2009  *   0 on success, a negative errno value otherwise and rte_errno is set.
2010  */
2011 int
2012 mlx5_flow_validate_item_gre(const struct rte_flow_item *item,
2013                             uint64_t item_flags,
2014                             uint8_t target_protocol,
2015                             struct rte_flow_error *error)
2016 {
2017         const struct rte_flow_item_gre *spec __rte_unused = item->spec;
2018         const struct rte_flow_item_gre *mask = item->mask;
2019         int ret;
2020         const struct rte_flow_item_gre nic_mask = {
2021                 .c_rsvd0_ver = RTE_BE16(0xB000),
2022                 .protocol = RTE_BE16(UINT16_MAX),
2023         };
2024
2025         if (target_protocol != 0xff && target_protocol != IPPROTO_GRE)
2026                 return rte_flow_error_set(error, EINVAL,
2027                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2028                                           "protocol filtering not compatible"
2029                                           " with this GRE layer");
2030         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2031                 return rte_flow_error_set(error, ENOTSUP,
2032                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2033                                           "multiple tunnel layers not"
2034                                           " supported");
2035         if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3))
2036                 return rte_flow_error_set(error, ENOTSUP,
2037                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2038                                           "L3 Layer is missing");
2039         if (!mask)
2040                 mask = &rte_flow_item_gre_mask;
2041         ret = mlx5_flow_item_acceptable
2042                 (item, (const uint8_t *)mask,
2043                  (const uint8_t *)&nic_mask,
2044                  sizeof(struct rte_flow_item_gre), error);
2045         if (ret < 0)
2046                 return ret;
2047 #ifndef HAVE_MLX5DV_DR
2048 #ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT
2049         if (spec && (spec->protocol & mask->protocol))
2050                 return rte_flow_error_set(error, ENOTSUP,
2051                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2052                                           "without MPLS support the"
2053                                           " specification cannot be used for"
2054                                           " filtering");
2055 #endif
2056 #endif
2057         return 0;
2058 }
2059
2060 /**
2061  * Validate Geneve item.
2062  *
2063  * @param[in] item
2064  *   Item specification.
2065  * @param[in] itemFlags
2066  *   Bit-fields that holds the items detected until now.
2067  * @param[in] enPriv
2068  *   Pointer to the private data structure.
2069  * @param[out] error
2070  *   Pointer to error structure.
2071  *
2072  * @return
2073  *   0 on success, a negative errno value otherwise and rte_errno is set.
2074  */
2075
2076 int
2077 mlx5_flow_validate_item_geneve(const struct rte_flow_item *item,
2078                                uint64_t item_flags,
2079                                struct rte_eth_dev *dev,
2080                                struct rte_flow_error *error)
2081 {
2082         struct mlx5_priv *priv = dev->data->dev_private;
2083         const struct rte_flow_item_geneve *spec = item->spec;
2084         const struct rte_flow_item_geneve *mask = item->mask;
2085         int ret;
2086         uint16_t gbhdr;
2087         uint8_t opt_len = priv->config.hca_attr.geneve_max_opt_len ?
2088                           MLX5_GENEVE_OPT_LEN_1 : MLX5_GENEVE_OPT_LEN_0;
2089         const struct rte_flow_item_geneve nic_mask = {
2090                 .ver_opt_len_o_c_rsvd0 = RTE_BE16(0x3f80),
2091                 .vni = "\xff\xff\xff",
2092                 .protocol = RTE_BE16(UINT16_MAX),
2093         };
2094
2095         if (!priv->config.hca_attr.tunnel_stateless_geneve_rx)
2096                 return rte_flow_error_set(error, ENOTSUP,
2097                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2098                                           "L3 Geneve is not enabled by device"
2099                                           " parameter and/or not configured in"
2100                                           " firmware");
2101         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2102                 return rte_flow_error_set(error, ENOTSUP,
2103                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2104                                           "multiple tunnel layers not"
2105                                           " supported");
2106         /*
2107          * Verify only UDPv4 is present as defined in
2108          * https://tools.ietf.org/html/rfc7348
2109          */
2110         if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
2111                 return rte_flow_error_set(error, EINVAL,
2112                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2113                                           "no outer UDP layer found");
2114         if (!mask)
2115                 mask = &rte_flow_item_geneve_mask;
2116         ret = mlx5_flow_item_acceptable
2117                                   (item, (const uint8_t *)mask,
2118                                    (const uint8_t *)&nic_mask,
2119                                    sizeof(struct rte_flow_item_geneve), error);
2120         if (ret)
2121                 return ret;
2122         if (spec) {
2123                 gbhdr = rte_be_to_cpu_16(spec->ver_opt_len_o_c_rsvd0);
2124                 if (MLX5_GENEVE_VER_VAL(gbhdr) ||
2125                      MLX5_GENEVE_CRITO_VAL(gbhdr) ||
2126                      MLX5_GENEVE_RSVD_VAL(gbhdr) || spec->rsvd1)
2127                         return rte_flow_error_set(error, ENOTSUP,
2128                                                   RTE_FLOW_ERROR_TYPE_ITEM,
2129                                                   item,
2130                                                   "Geneve protocol unsupported"
2131                                                   " fields are being used");
2132                 if (MLX5_GENEVE_OPTLEN_VAL(gbhdr) > opt_len)
2133                         return rte_flow_error_set
2134                                         (error, ENOTSUP,
2135                                          RTE_FLOW_ERROR_TYPE_ITEM,
2136                                          item,
2137                                          "Unsupported Geneve options length");
2138         }
2139         if (!(item_flags & MLX5_FLOW_LAYER_OUTER))
2140                 return rte_flow_error_set
2141                                     (error, ENOTSUP,
2142                                      RTE_FLOW_ERROR_TYPE_ITEM, item,
2143                                      "Geneve tunnel must be fully defined");
2144         return 0;
2145 }
2146
2147 /**
2148  * Validate MPLS item.
2149  *
2150  * @param[in] dev
2151  *   Pointer to the rte_eth_dev structure.
2152  * @param[in] item
2153  *   Item specification.
2154  * @param[in] item_flags
2155  *   Bit-fields that holds the items detected until now.
2156  * @param[in] prev_layer
2157  *   The protocol layer indicated in previous item.
2158  * @param[out] error
2159  *   Pointer to error structure.
2160  *
2161  * @return
2162  *   0 on success, a negative errno value otherwise and rte_errno is set.
2163  */
2164 int
2165 mlx5_flow_validate_item_mpls(struct rte_eth_dev *dev __rte_unused,
2166                              const struct rte_flow_item *item __rte_unused,
2167                              uint64_t item_flags __rte_unused,
2168                              uint64_t prev_layer __rte_unused,
2169                              struct rte_flow_error *error)
2170 {
2171 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
2172         const struct rte_flow_item_mpls *mask = item->mask;
2173         struct mlx5_priv *priv = dev->data->dev_private;
2174         int ret;
2175
2176         if (!priv->config.mpls_en)
2177                 return rte_flow_error_set(error, ENOTSUP,
2178                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2179                                           "MPLS not supported or"
2180                                           " disabled in firmware"
2181                                           " configuration.");
2182         /* MPLS over IP, UDP, GRE is allowed */
2183         if (!(prev_layer & (MLX5_FLOW_LAYER_OUTER_L3 |
2184                             MLX5_FLOW_LAYER_OUTER_L4_UDP |
2185                             MLX5_FLOW_LAYER_GRE)))
2186                 return rte_flow_error_set(error, EINVAL,
2187                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2188                                           "protocol filtering not compatible"
2189                                           " with MPLS layer");
2190         /* Multi-tunnel isn't allowed but MPLS over GRE is an exception. */
2191         if ((item_flags & MLX5_FLOW_LAYER_TUNNEL) &&
2192             !(item_flags & MLX5_FLOW_LAYER_GRE))
2193                 return rte_flow_error_set(error, ENOTSUP,
2194                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2195                                           "multiple tunnel layers not"
2196                                           " supported");
2197         if (!mask)
2198                 mask = &rte_flow_item_mpls_mask;
2199         ret = mlx5_flow_item_acceptable
2200                 (item, (const uint8_t *)mask,
2201                  (const uint8_t *)&rte_flow_item_mpls_mask,
2202                  sizeof(struct rte_flow_item_mpls), error);
2203         if (ret < 0)
2204                 return ret;
2205         return 0;
2206 #else
2207         return rte_flow_error_set(error, ENOTSUP,
2208                                   RTE_FLOW_ERROR_TYPE_ITEM, item,
2209                                   "MPLS is not supported by Verbs, please"
2210                                   " update.");
2211 #endif
2212 }
2213
2214 /**
2215  * Validate NVGRE item.
2216  *
2217  * @param[in] item
2218  *   Item specification.
2219  * @param[in] item_flags
2220  *   Bit flags to mark detected items.
2221  * @param[in] target_protocol
2222  *   The next protocol in the previous item.
2223  * @param[out] error
2224  *   Pointer to error structure.
2225  *
2226  * @return
2227  *   0 on success, a negative errno value otherwise and rte_errno is set.
2228  */
2229 int
2230 mlx5_flow_validate_item_nvgre(const struct rte_flow_item *item,
2231                               uint64_t item_flags,
2232                               uint8_t target_protocol,
2233                               struct rte_flow_error *error)
2234 {
2235         const struct rte_flow_item_nvgre *mask = item->mask;
2236         int ret;
2237
2238         if (target_protocol != 0xff && target_protocol != IPPROTO_GRE)
2239                 return rte_flow_error_set(error, EINVAL,
2240                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2241                                           "protocol filtering not compatible"
2242                                           " with this GRE layer");
2243         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2244                 return rte_flow_error_set(error, ENOTSUP,
2245                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2246                                           "multiple tunnel layers not"
2247                                           " supported");
2248         if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3))
2249                 return rte_flow_error_set(error, ENOTSUP,
2250                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2251                                           "L3 Layer is missing");
2252         if (!mask)
2253                 mask = &rte_flow_item_nvgre_mask;
2254         ret = mlx5_flow_item_acceptable
2255                 (item, (const uint8_t *)mask,
2256                  (const uint8_t *)&rte_flow_item_nvgre_mask,
2257                  sizeof(struct rte_flow_item_nvgre), error);
2258         if (ret < 0)
2259                 return ret;
2260         return 0;
2261 }
2262
2263 /**
2264  * Validate eCPRI item.
2265  *
2266  * @param[in] item
2267  *   Item specification.
2268  * @param[in] item_flags
2269  *   Bit-fields that holds the items detected until now.
2270  * @param[in] last_item
2271  *   Previous validated item in the pattern items.
2272  * @param[in] ether_type
2273  *   Type in the ethernet layer header (including dot1q).
2274  * @param[in] acc_mask
2275  *   Acceptable mask, if NULL default internal default mask
2276  *   will be used to check whether item fields are supported.
2277  * @param[out] error
2278  *   Pointer to error structure.
2279  *
2280  * @return
2281  *   0 on success, a negative errno value otherwise and rte_errno is set.
2282  */
2283 int
2284 mlx5_flow_validate_item_ecpri(const struct rte_flow_item *item,
2285                               uint64_t item_flags,
2286                               uint64_t last_item,
2287                               uint16_t ether_type,
2288                               const struct rte_flow_item_ecpri *acc_mask,
2289                               struct rte_flow_error *error)
2290 {
2291         const struct rte_flow_item_ecpri *mask = item->mask;
2292         const struct rte_flow_item_ecpri nic_mask = {
2293                 .hdr = {
2294                         .common = {
2295                                 .u32 =
2296                                 RTE_BE32(((const struct rte_ecpri_common_hdr) {
2297                                         .type = 0xFF,
2298                                         }).u32),
2299                         },
2300                         .dummy[0] = 0xFFFFFFFF,
2301                 },
2302         };
2303         const uint64_t outer_l2_vlan = (MLX5_FLOW_LAYER_OUTER_L2 |
2304                                         MLX5_FLOW_LAYER_OUTER_VLAN);
2305         struct rte_flow_item_ecpri mask_lo;
2306
2307         if ((last_item & outer_l2_vlan) && ether_type &&
2308             ether_type != RTE_ETHER_TYPE_ECPRI)
2309                 return rte_flow_error_set(error, EINVAL,
2310                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2311                                           "eCPRI cannot follow L2/VLAN layer "
2312                                           "which ether type is not 0xAEFE.");
2313         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2314                 return rte_flow_error_set(error, EINVAL,
2315                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2316                                           "eCPRI with tunnel is not supported "
2317                                           "right now.");
2318         if (item_flags & MLX5_FLOW_LAYER_OUTER_L3)
2319                 return rte_flow_error_set(error, ENOTSUP,
2320                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2321                                           "multiple L3 layers not supported");
2322         else if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_TCP)
2323                 return rte_flow_error_set(error, EINVAL,
2324                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2325                                           "eCPRI cannot follow a TCP layer.");
2326         /* In specification, eCPRI could be over UDP layer. */
2327         else if (item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP)
2328                 return rte_flow_error_set(error, EINVAL,
2329                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2330                                           "eCPRI over UDP layer is not yet "
2331                                           "supported right now.");
2332         /* Mask for type field in common header could be zero. */
2333         if (!mask)
2334                 mask = &rte_flow_item_ecpri_mask;
2335         mask_lo.hdr.common.u32 = rte_be_to_cpu_32(mask->hdr.common.u32);
2336         /* Input mask is in big-endian format. */
2337         if (mask_lo.hdr.common.type != 0 && mask_lo.hdr.common.type != 0xff)
2338                 return rte_flow_error_set(error, EINVAL,
2339                                           RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
2340                                           "partial mask is not supported "
2341                                           "for protocol");
2342         else if (mask_lo.hdr.common.type == 0 && mask->hdr.dummy[0] != 0)
2343                 return rte_flow_error_set(error, EINVAL,
2344                                           RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
2345                                           "message header mask must be after "
2346                                           "a type mask");
2347         return mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
2348                                          acc_mask ? (const uint8_t *)acc_mask
2349                                                   : (const uint8_t *)&nic_mask,
2350                                          sizeof(struct rte_flow_item_ecpri),
2351                                          error);
2352 }
2353
2354 /* Allocate unique ID for the split Q/RSS subflows. */
2355 static uint32_t
2356 flow_qrss_get_id(struct rte_eth_dev *dev)
2357 {
2358         struct mlx5_priv *priv = dev->data->dev_private;
2359         uint32_t qrss_id, ret;
2360
2361         ret = mlx5_flow_id_get(priv->qrss_id_pool, &qrss_id);
2362         if (ret)
2363                 return 0;
2364         MLX5_ASSERT(qrss_id);
2365         return qrss_id;
2366 }
2367
2368 /* Free unique ID for the split Q/RSS subflows. */
2369 static void
2370 flow_qrss_free_id(struct rte_eth_dev *dev,  uint32_t qrss_id)
2371 {
2372         struct mlx5_priv *priv = dev->data->dev_private;
2373
2374         if (qrss_id)
2375                 mlx5_flow_id_release(priv->qrss_id_pool, qrss_id);
2376 }
2377
2378 /**
2379  * Release resource related QUEUE/RSS action split.
2380  *
2381  * @param dev
2382  *   Pointer to Ethernet device.
2383  * @param flow
2384  *   Flow to release id's from.
2385  */
2386 static void
2387 flow_mreg_split_qrss_release(struct rte_eth_dev *dev,
2388                              struct rte_flow *flow)
2389 {
2390         struct mlx5_priv *priv = dev->data->dev_private;
2391         uint32_t handle_idx;
2392         struct mlx5_flow_handle *dev_handle;
2393
2394         SILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_MLX5_FLOW], flow->dev_handles,
2395                        handle_idx, dev_handle, next)
2396                 if (dev_handle->split_flow_id)
2397                         flow_qrss_free_id(dev, dev_handle->split_flow_id);
2398 }
2399
2400 static int
2401 flow_null_validate(struct rte_eth_dev *dev __rte_unused,
2402                    const struct rte_flow_attr *attr __rte_unused,
2403                    const struct rte_flow_item items[] __rte_unused,
2404                    const struct rte_flow_action actions[] __rte_unused,
2405                    bool external __rte_unused,
2406                    int hairpin __rte_unused,
2407                    struct rte_flow_error *error)
2408 {
2409         return rte_flow_error_set(error, ENOTSUP,
2410                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
2411 }
2412
2413 static struct mlx5_flow *
2414 flow_null_prepare(struct rte_eth_dev *dev __rte_unused,
2415                   const struct rte_flow_attr *attr __rte_unused,
2416                   const struct rte_flow_item items[] __rte_unused,
2417                   const struct rte_flow_action actions[] __rte_unused,
2418                   struct rte_flow_error *error)
2419 {
2420         rte_flow_error_set(error, ENOTSUP,
2421                            RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
2422         return NULL;
2423 }
2424
2425 static int
2426 flow_null_translate(struct rte_eth_dev *dev __rte_unused,
2427                     struct mlx5_flow *dev_flow __rte_unused,
2428                     const struct rte_flow_attr *attr __rte_unused,
2429                     const struct rte_flow_item items[] __rte_unused,
2430                     const struct rte_flow_action actions[] __rte_unused,
2431                     struct rte_flow_error *error)
2432 {
2433         return rte_flow_error_set(error, ENOTSUP,
2434                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
2435 }
2436
2437 static int
2438 flow_null_apply(struct rte_eth_dev *dev __rte_unused,
2439                 struct rte_flow *flow __rte_unused,
2440                 struct rte_flow_error *error)
2441 {
2442         return rte_flow_error_set(error, ENOTSUP,
2443                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
2444 }
2445
2446 static void
2447 flow_null_remove(struct rte_eth_dev *dev __rte_unused,
2448                  struct rte_flow *flow __rte_unused)
2449 {
2450 }
2451
2452 static void
2453 flow_null_destroy(struct rte_eth_dev *dev __rte_unused,
2454                   struct rte_flow *flow __rte_unused)
2455 {
2456 }
2457
2458 static int
2459 flow_null_query(struct rte_eth_dev *dev __rte_unused,
2460                 struct rte_flow *flow __rte_unused,
2461                 const struct rte_flow_action *actions __rte_unused,
2462                 void *data __rte_unused,
2463                 struct rte_flow_error *error)
2464 {
2465         return rte_flow_error_set(error, ENOTSUP,
2466                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
2467 }
2468
2469 /* Void driver to protect from null pointer reference. */
2470 const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops = {
2471         .validate = flow_null_validate,
2472         .prepare = flow_null_prepare,
2473         .translate = flow_null_translate,
2474         .apply = flow_null_apply,
2475         .remove = flow_null_remove,
2476         .destroy = flow_null_destroy,
2477         .query = flow_null_query,
2478 };
2479
2480 /**
2481  * Select flow driver type according to flow attributes and device
2482  * configuration.
2483  *
2484  * @param[in] dev
2485  *   Pointer to the dev structure.
2486  * @param[in] attr
2487  *   Pointer to the flow attributes.
2488  *
2489  * @return
2490  *   flow driver type, MLX5_FLOW_TYPE_MAX otherwise.
2491  */
2492 static enum mlx5_flow_drv_type
2493 flow_get_drv_type(struct rte_eth_dev *dev, const struct rte_flow_attr *attr)
2494 {
2495         struct mlx5_priv *priv = dev->data->dev_private;
2496         /* The OS can determine first a specific flow type (DV, VERBS) */
2497         enum mlx5_flow_drv_type type = mlx5_flow_os_get_type();
2498
2499         if (type != MLX5_FLOW_TYPE_MAX)
2500                 return type;
2501         /* If no OS specific type - continue with DV/VERBS selection */
2502         if (attr->transfer && priv->config.dv_esw_en)
2503                 type = MLX5_FLOW_TYPE_DV;
2504         if (!attr->transfer)
2505                 type = priv->config.dv_flow_en ? MLX5_FLOW_TYPE_DV :
2506                                                  MLX5_FLOW_TYPE_VERBS;
2507         return type;
2508 }
2509
2510 #define flow_get_drv_ops(type) flow_drv_ops[type]
2511
2512 /**
2513  * Flow driver validation API. This abstracts calling driver specific functions.
2514  * The type of flow driver is determined according to flow attributes.
2515  *
2516  * @param[in] dev
2517  *   Pointer to the dev structure.
2518  * @param[in] attr
2519  *   Pointer to the flow attributes.
2520  * @param[in] items
2521  *   Pointer to the list of items.
2522  * @param[in] actions
2523  *   Pointer to the list of actions.
2524  * @param[in] external
2525  *   This flow rule is created by request external to PMD.
2526  * @param[in] hairpin
2527  *   Number of hairpin TX actions, 0 means classic flow.
2528  * @param[out] error
2529  *   Pointer to the error structure.
2530  *
2531  * @return
2532  *   0 on success, a negative errno value otherwise and rte_errno is set.
2533  */
2534 static inline int
2535 flow_drv_validate(struct rte_eth_dev *dev,
2536                   const struct rte_flow_attr *attr,
2537                   const struct rte_flow_item items[],
2538                   const struct rte_flow_action actions[],
2539                   bool external, int hairpin, struct rte_flow_error *error)
2540 {
2541         const struct mlx5_flow_driver_ops *fops;
2542         enum mlx5_flow_drv_type type = flow_get_drv_type(dev, attr);
2543
2544         fops = flow_get_drv_ops(type);
2545         return fops->validate(dev, attr, items, actions, external,
2546                               hairpin, error);
2547 }
2548
2549 /**
2550  * Flow driver preparation API. This abstracts calling driver specific
2551  * functions. Parent flow (rte_flow) should have driver type (drv_type). It
2552  * calculates the size of memory required for device flow, allocates the memory,
2553  * initializes the device flow and returns the pointer.
2554  *
2555  * @note
2556  *   This function initializes device flow structure such as dv or verbs in
2557  *   struct mlx5_flow. However, it is caller's responsibility to initialize the
2558  *   rest. For example, adding returning device flow to flow->dev_flow list and
2559  *   setting backward reference to the flow should be done out of this function.
2560  *   layers field is not filled either.
2561  *
2562  * @param[in] dev
2563  *   Pointer to the dev structure.
2564  * @param[in] attr
2565  *   Pointer to the flow attributes.
2566  * @param[in] items
2567  *   Pointer to the list of items.
2568  * @param[in] actions
2569  *   Pointer to the list of actions.
2570  * @param[in] flow_idx
2571  *   This memory pool index to the flow.
2572  * @param[out] error
2573  *   Pointer to the error structure.
2574  *
2575  * @return
2576  *   Pointer to device flow on success, otherwise NULL and rte_errno is set.
2577  */
2578 static inline struct mlx5_flow *
2579 flow_drv_prepare(struct rte_eth_dev *dev,
2580                  const struct rte_flow *flow,
2581                  const struct rte_flow_attr *attr,
2582                  const struct rte_flow_item items[],
2583                  const struct rte_flow_action actions[],
2584                  uint32_t flow_idx,
2585                  struct rte_flow_error *error)
2586 {
2587         const struct mlx5_flow_driver_ops *fops;
2588         enum mlx5_flow_drv_type type = flow->drv_type;
2589         struct mlx5_flow *mlx5_flow = NULL;
2590
2591         MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
2592         fops = flow_get_drv_ops(type);
2593         mlx5_flow = fops->prepare(dev, attr, items, actions, error);
2594         if (mlx5_flow)
2595                 mlx5_flow->flow_idx = flow_idx;
2596         return mlx5_flow;
2597 }
2598
2599 /**
2600  * Flow driver translation API. This abstracts calling driver specific
2601  * functions. Parent flow (rte_flow) should have driver type (drv_type). It
2602  * translates a generic flow into a driver flow. flow_drv_prepare() must
2603  * precede.
2604  *
2605  * @note
2606  *   dev_flow->layers could be filled as a result of parsing during translation
2607  *   if needed by flow_drv_apply(). dev_flow->flow->actions can also be filled
2608  *   if necessary. As a flow can have multiple dev_flows by RSS flow expansion,
2609  *   flow->actions could be overwritten even though all the expanded dev_flows
2610  *   have the same actions.
2611  *
2612  * @param[in] dev
2613  *   Pointer to the rte dev structure.
2614  * @param[in, out] dev_flow
2615  *   Pointer to the mlx5 flow.
2616  * @param[in] attr
2617  *   Pointer to the flow attributes.
2618  * @param[in] items
2619  *   Pointer to the list of items.
2620  * @param[in] actions
2621  *   Pointer to the list of actions.
2622  * @param[out] error
2623  *   Pointer to the error structure.
2624  *
2625  * @return
2626  *   0 on success, a negative errno value otherwise and rte_errno is set.
2627  */
2628 static inline int
2629 flow_drv_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
2630                    const struct rte_flow_attr *attr,
2631                    const struct rte_flow_item items[],
2632                    const struct rte_flow_action actions[],
2633                    struct rte_flow_error *error)
2634 {
2635         const struct mlx5_flow_driver_ops *fops;
2636         enum mlx5_flow_drv_type type = dev_flow->flow->drv_type;
2637
2638         MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
2639         fops = flow_get_drv_ops(type);
2640         return fops->translate(dev, dev_flow, attr, items, actions, error);
2641 }
2642
2643 /**
2644  * Flow driver apply API. This abstracts calling driver specific functions.
2645  * Parent flow (rte_flow) should have driver type (drv_type). It applies
2646  * translated driver flows on to device. flow_drv_translate() must precede.
2647  *
2648  * @param[in] dev
2649  *   Pointer to Ethernet device structure.
2650  * @param[in, out] flow
2651  *   Pointer to flow structure.
2652  * @param[out] error
2653  *   Pointer to error structure.
2654  *
2655  * @return
2656  *   0 on success, a negative errno value otherwise and rte_errno is set.
2657  */
2658 static inline int
2659 flow_drv_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
2660                struct rte_flow_error *error)
2661 {
2662         const struct mlx5_flow_driver_ops *fops;
2663         enum mlx5_flow_drv_type type = flow->drv_type;
2664
2665         MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
2666         fops = flow_get_drv_ops(type);
2667         return fops->apply(dev, flow, error);
2668 }
2669
2670 /**
2671  * Flow driver remove API. This abstracts calling driver specific functions.
2672  * Parent flow (rte_flow) should have driver type (drv_type). It removes a flow
2673  * on device. All the resources of the flow should be freed by calling
2674  * flow_drv_destroy().
2675  *
2676  * @param[in] dev
2677  *   Pointer to Ethernet device.
2678  * @param[in, out] flow
2679  *   Pointer to flow structure.
2680  */
2681 static inline void
2682 flow_drv_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
2683 {
2684         const struct mlx5_flow_driver_ops *fops;
2685         enum mlx5_flow_drv_type type = flow->drv_type;
2686
2687         MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
2688         fops = flow_get_drv_ops(type);
2689         fops->remove(dev, flow);
2690 }
2691
2692 /**
2693  * Flow driver destroy API. This abstracts calling driver specific functions.
2694  * Parent flow (rte_flow) should have driver type (drv_type). It removes a flow
2695  * on device and releases resources of the flow.
2696  *
2697  * @param[in] dev
2698  *   Pointer to Ethernet device.
2699  * @param[in, out] flow
2700  *   Pointer to flow structure.
2701  */
2702 static inline void
2703 flow_drv_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
2704 {
2705         const struct mlx5_flow_driver_ops *fops;
2706         enum mlx5_flow_drv_type type = flow->drv_type;
2707
2708         flow_mreg_split_qrss_release(dev, flow);
2709         MLX5_ASSERT(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
2710         fops = flow_get_drv_ops(type);
2711         fops->destroy(dev, flow);
2712 }
2713
2714 /**
2715  * Get RSS action from the action list.
2716  *
2717  * @param[in] actions
2718  *   Pointer to the list of actions.
2719  *
2720  * @return
2721  *   Pointer to the RSS action if exist, else return NULL.
2722  */
2723 static const struct rte_flow_action_rss*
2724 flow_get_rss_action(const struct rte_flow_action actions[])
2725 {
2726         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
2727                 switch (actions->type) {
2728                 case RTE_FLOW_ACTION_TYPE_RSS:
2729                         return (const struct rte_flow_action_rss *)
2730                                actions->conf;
2731                 default:
2732                         break;
2733                 }
2734         }
2735         return NULL;
2736 }
2737
2738 static unsigned int
2739 find_graph_root(const struct rte_flow_item pattern[], uint32_t rss_level)
2740 {
2741         const struct rte_flow_item *item;
2742         unsigned int has_vlan = 0;
2743
2744         for (item = pattern; item->type != RTE_FLOW_ITEM_TYPE_END; item++) {
2745                 if (item->type == RTE_FLOW_ITEM_TYPE_VLAN) {
2746                         has_vlan = 1;
2747                         break;
2748                 }
2749         }
2750         if (has_vlan)
2751                 return rss_level < 2 ? MLX5_EXPANSION_ROOT_ETH_VLAN :
2752                                        MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN;
2753         return rss_level < 2 ? MLX5_EXPANSION_ROOT :
2754                                MLX5_EXPANSION_ROOT_OUTER;
2755 }
2756
2757 /**
2758  *  Get layer flags from the prefix flow.
2759  *
2760  *  Some flows may be split to several subflows, the prefix subflow gets the
2761  *  match items and the suffix sub flow gets the actions.
2762  *  Some actions need the user defined match item flags to get the detail for
2763  *  the action.
2764  *  This function helps the suffix flow to get the item layer flags from prefix
2765  *  subflow.
2766  *
2767  * @param[in] dev_flow
2768  *   Pointer the created preifx subflow.
2769  *
2770  * @return
2771  *   The layers get from prefix subflow.
2772  */
2773 static inline uint64_t
2774 flow_get_prefix_layer_flags(struct mlx5_flow *dev_flow)
2775 {
2776         uint64_t layers = 0;
2777
2778         /*
2779          * Layers bits could be localization, but usually the compiler will
2780          * help to do the optimization work for source code.
2781          * If no decap actions, use the layers directly.
2782          */
2783         if (!(dev_flow->act_flags & MLX5_FLOW_ACTION_DECAP))
2784                 return dev_flow->handle->layers;
2785         /* Convert L3 layers with decap action. */
2786         if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L3_IPV4)
2787                 layers |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
2788         else if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L3_IPV6)
2789                 layers |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
2790         /* Convert L4 layers with decap action.  */
2791         if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L4_TCP)
2792                 layers |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
2793         else if (dev_flow->handle->layers & MLX5_FLOW_LAYER_INNER_L4_UDP)
2794                 layers |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
2795         return layers;
2796 }
2797
2798 /**
2799  * Get metadata split action information.
2800  *
2801  * @param[in] actions
2802  *   Pointer to the list of actions.
2803  * @param[out] qrss
2804  *   Pointer to the return pointer.
2805  * @param[out] qrss_type
2806  *   Pointer to the action type to return. RTE_FLOW_ACTION_TYPE_END is returned
2807  *   if no QUEUE/RSS is found.
2808  * @param[out] encap_idx
2809  *   Pointer to the index of the encap action if exists, otherwise the last
2810  *   action index.
2811  *
2812  * @return
2813  *   Total number of actions.
2814  */
2815 static int
2816 flow_parse_metadata_split_actions_info(const struct rte_flow_action actions[],
2817                                        const struct rte_flow_action **qrss,
2818                                        int *encap_idx)
2819 {
2820         const struct rte_flow_action_raw_encap *raw_encap;
2821         int actions_n = 0;
2822         int raw_decap_idx = -1;
2823
2824         *encap_idx = -1;
2825         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
2826                 switch (actions->type) {
2827                 case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
2828                 case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
2829                         *encap_idx = actions_n;
2830                         break;
2831                 case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
2832                         raw_decap_idx = actions_n;
2833                         break;
2834                 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
2835                         raw_encap = actions->conf;
2836                         if (raw_encap->size > MLX5_ENCAPSULATION_DECISION_SIZE)
2837                                 *encap_idx = raw_decap_idx != -1 ?
2838                                                       raw_decap_idx : actions_n;
2839                         break;
2840                 case RTE_FLOW_ACTION_TYPE_QUEUE:
2841                 case RTE_FLOW_ACTION_TYPE_RSS:
2842                         *qrss = actions;
2843                         break;
2844                 default:
2845                         break;
2846                 }
2847                 actions_n++;
2848         }
2849         if (*encap_idx == -1)
2850                 *encap_idx = actions_n;
2851         /* Count RTE_FLOW_ACTION_TYPE_END. */
2852         return actions_n + 1;
2853 }
2854
2855 /**
2856  * Check meter action from the action list.
2857  *
2858  * @param[in] actions
2859  *   Pointer to the list of actions.
2860  * @param[out] mtr
2861  *   Pointer to the meter exist flag.
2862  *
2863  * @return
2864  *   Total number of actions.
2865  */
2866 static int
2867 flow_check_meter_action(const struct rte_flow_action actions[], uint32_t *mtr)
2868 {
2869         int actions_n = 0;
2870
2871         MLX5_ASSERT(mtr);
2872         *mtr = 0;
2873         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
2874                 switch (actions->type) {
2875                 case RTE_FLOW_ACTION_TYPE_METER:
2876                         *mtr = 1;
2877                         break;
2878                 default:
2879                         break;
2880                 }
2881                 actions_n++;
2882         }
2883         /* Count RTE_FLOW_ACTION_TYPE_END. */
2884         return actions_n + 1;
2885 }
2886
2887 /**
2888  * Check if the flow should be splited due to hairpin.
2889  * The reason for the split is that in current HW we can't
2890  * support encap on Rx, so if a flow have encap we move it
2891  * to Tx.
2892  *
2893  * @param dev
2894  *   Pointer to Ethernet device.
2895  * @param[in] attr
2896  *   Flow rule attributes.
2897  * @param[in] actions
2898  *   Associated actions (list terminated by the END action).
2899  *
2900  * @return
2901  *   > 0 the number of actions and the flow should be split,
2902  *   0 when no split required.
2903  */
2904 static int
2905 flow_check_hairpin_split(struct rte_eth_dev *dev,
2906                          const struct rte_flow_attr *attr,
2907                          const struct rte_flow_action actions[])
2908 {
2909         int queue_action = 0;
2910         int action_n = 0;
2911         int encap = 0;
2912         const struct rte_flow_action_queue *queue;
2913         const struct rte_flow_action_rss *rss;
2914         const struct rte_flow_action_raw_encap *raw_encap;
2915
2916         if (!attr->ingress)
2917                 return 0;
2918         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
2919                 switch (actions->type) {
2920                 case RTE_FLOW_ACTION_TYPE_QUEUE:
2921                         queue = actions->conf;
2922                         if (queue == NULL)
2923                                 return 0;
2924                         if (mlx5_rxq_get_type(dev, queue->index) !=
2925                             MLX5_RXQ_TYPE_HAIRPIN)
2926                                 return 0;
2927                         queue_action = 1;
2928                         action_n++;
2929                         break;
2930                 case RTE_FLOW_ACTION_TYPE_RSS:
2931                         rss = actions->conf;
2932                         if (rss == NULL || rss->queue_num == 0)
2933                                 return 0;
2934                         if (mlx5_rxq_get_type(dev, rss->queue[0]) !=
2935                             MLX5_RXQ_TYPE_HAIRPIN)
2936                                 return 0;
2937                         queue_action = 1;
2938                         action_n++;
2939                         break;
2940                 case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
2941                 case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
2942                         encap = 1;
2943                         action_n++;
2944                         break;
2945                 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
2946                         raw_encap = actions->conf;
2947                         if (raw_encap->size >
2948                             (sizeof(struct rte_flow_item_eth) +
2949                              sizeof(struct rte_flow_item_ipv4)))
2950                                 encap = 1;
2951                         action_n++;
2952                         break;
2953                 default:
2954                         action_n++;
2955                         break;
2956                 }
2957         }
2958         if (encap == 1 && queue_action)
2959                 return action_n;
2960         return 0;
2961 }
2962
2963 /* Declare flow create/destroy prototype in advance. */
2964 static uint32_t
2965 flow_list_create(struct rte_eth_dev *dev, uint32_t *list,
2966                  const struct rte_flow_attr *attr,
2967                  const struct rte_flow_item items[],
2968                  const struct rte_flow_action actions[],
2969                  bool external, struct rte_flow_error *error);
2970
2971 static void
2972 flow_list_destroy(struct rte_eth_dev *dev, uint32_t *list,
2973                   uint32_t flow_idx);
2974
2975 /**
2976  * Add a flow of copying flow metadata registers in RX_CP_TBL.
2977  *
2978  * As mark_id is unique, if there's already a registered flow for the mark_id,
2979  * return by increasing the reference counter of the resource. Otherwise, create
2980  * the resource (mcp_res) and flow.
2981  *
2982  * Flow looks like,
2983  *   - If ingress port is ANY and reg_c[1] is mark_id,
2984  *     flow_tag := mark_id, reg_b := reg_c[0] and jump to RX_ACT_TBL.
2985  *
2986  * For default flow (zero mark_id), flow is like,
2987  *   - If ingress port is ANY,
2988  *     reg_b := reg_c[0] and jump to RX_ACT_TBL.
2989  *
2990  * @param dev
2991  *   Pointer to Ethernet device.
2992  * @param mark_id
2993  *   ID of MARK action, zero means default flow for META.
2994  * @param[out] error
2995  *   Perform verbose error reporting if not NULL.
2996  *
2997  * @return
2998  *   Associated resource on success, NULL otherwise and rte_errno is set.
2999  */
3000 static struct mlx5_flow_mreg_copy_resource *
3001 flow_mreg_add_copy_action(struct rte_eth_dev *dev, uint32_t mark_id,
3002                           struct rte_flow_error *error)
3003 {
3004         struct mlx5_priv *priv = dev->data->dev_private;
3005         struct rte_flow_attr attr = {
3006                 .group = MLX5_FLOW_MREG_CP_TABLE_GROUP,
3007                 .ingress = 1,
3008         };
3009         struct mlx5_rte_flow_item_tag tag_spec = {
3010                 .data = mark_id,
3011         };
3012         struct rte_flow_item items[] = {
3013                 [1] = { .type = RTE_FLOW_ITEM_TYPE_END, },
3014         };
3015         struct rte_flow_action_mark ftag = {
3016                 .id = mark_id,
3017         };
3018         struct mlx5_flow_action_copy_mreg cp_mreg = {
3019                 .dst = REG_B,
3020                 .src = 0,
3021         };
3022         struct rte_flow_action_jump jump = {
3023                 .group = MLX5_FLOW_MREG_ACT_TABLE_GROUP,
3024         };
3025         struct rte_flow_action actions[] = {
3026                 [3] = { .type = RTE_FLOW_ACTION_TYPE_END, },
3027         };
3028         struct mlx5_flow_mreg_copy_resource *mcp_res;
3029         uint32_t idx = 0;
3030         int ret;
3031
3032         /* Fill the register fileds in the flow. */
3033         ret = mlx5_flow_get_reg_id(dev, MLX5_FLOW_MARK, 0, error);
3034         if (ret < 0)
3035                 return NULL;
3036         tag_spec.id = ret;
3037         ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_RX, 0, error);
3038         if (ret < 0)
3039                 return NULL;
3040         cp_mreg.src = ret;
3041         /* Check if already registered. */
3042         MLX5_ASSERT(priv->mreg_cp_tbl);
3043         mcp_res = (void *)mlx5_hlist_lookup(priv->mreg_cp_tbl, mark_id);
3044         if (mcp_res) {
3045                 /* For non-default rule. */
3046                 if (mark_id != MLX5_DEFAULT_COPY_ID)
3047                         mcp_res->refcnt++;
3048                 MLX5_ASSERT(mark_id != MLX5_DEFAULT_COPY_ID ||
3049                             mcp_res->refcnt == 1);
3050                 return mcp_res;
3051         }
3052         /* Provide the full width of FLAG specific value. */
3053         if (mark_id == (priv->sh->dv_regc0_mask & MLX5_FLOW_MARK_DEFAULT))
3054                 tag_spec.data = MLX5_FLOW_MARK_DEFAULT;
3055         /* Build a new flow. */
3056         if (mark_id != MLX5_DEFAULT_COPY_ID) {
3057                 items[0] = (struct rte_flow_item){
3058                         .type = (enum rte_flow_item_type)
3059                                 MLX5_RTE_FLOW_ITEM_TYPE_TAG,
3060                         .spec = &tag_spec,
3061                 };
3062                 items[1] = (struct rte_flow_item){
3063                         .type = RTE_FLOW_ITEM_TYPE_END,
3064                 };
3065                 actions[0] = (struct rte_flow_action){
3066                         .type = (enum rte_flow_action_type)
3067                                 MLX5_RTE_FLOW_ACTION_TYPE_MARK,
3068                         .conf = &ftag,
3069                 };
3070                 actions[1] = (struct rte_flow_action){
3071                         .type = (enum rte_flow_action_type)
3072                                 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
3073                         .conf = &cp_mreg,
3074                 };
3075                 actions[2] = (struct rte_flow_action){
3076                         .type = RTE_FLOW_ACTION_TYPE_JUMP,
3077                         .conf = &jump,
3078                 };
3079                 actions[3] = (struct rte_flow_action){
3080                         .type = RTE_FLOW_ACTION_TYPE_END,
3081                 };
3082         } else {
3083                 /* Default rule, wildcard match. */
3084                 attr.priority = MLX5_FLOW_PRIO_RSVD;
3085                 items[0] = (struct rte_flow_item){
3086                         .type = RTE_FLOW_ITEM_TYPE_END,
3087                 };
3088                 actions[0] = (struct rte_flow_action){
3089                         .type = (enum rte_flow_action_type)
3090                                 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
3091                         .conf = &cp_mreg,
3092                 };
3093                 actions[1] = (struct rte_flow_action){
3094                         .type = RTE_FLOW_ACTION_TYPE_JUMP,
3095                         .conf = &jump,
3096                 };
3097                 actions[2] = (struct rte_flow_action){
3098                         .type = RTE_FLOW_ACTION_TYPE_END,
3099                 };
3100         }
3101         /* Build a new entry. */
3102         mcp_res = mlx5_ipool_zmalloc(priv->sh->ipool[MLX5_IPOOL_MCP], &idx);
3103         if (!mcp_res) {
3104                 rte_errno = ENOMEM;
3105                 return NULL;
3106         }
3107         mcp_res->idx = idx;
3108         /*
3109          * The copy Flows are not included in any list. There
3110          * ones are referenced from other Flows and can not
3111          * be applied, removed, deleted in ardbitrary order
3112          * by list traversing.
3113          */
3114         mcp_res->rix_flow = flow_list_create(dev, NULL, &attr, items,
3115                                          actions, false, error);
3116         if (!mcp_res->rix_flow)
3117                 goto error;
3118         mcp_res->refcnt++;
3119         mcp_res->hlist_ent.key = mark_id;
3120         ret = mlx5_hlist_insert(priv->mreg_cp_tbl,
3121                                 &mcp_res->hlist_ent);
3122         MLX5_ASSERT(!ret);
3123         if (ret)
3124                 goto error;
3125         return mcp_res;
3126 error:
3127         if (mcp_res->rix_flow)
3128                 flow_list_destroy(dev, NULL, mcp_res->rix_flow);
3129         mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MCP], mcp_res->idx);
3130         return NULL;
3131 }
3132
3133 /**
3134  * Release flow in RX_CP_TBL.
3135  *
3136  * @param dev
3137  *   Pointer to Ethernet device.
3138  * @flow
3139  *   Parent flow for wich copying is provided.
3140  */
3141 static void
3142 flow_mreg_del_copy_action(struct rte_eth_dev *dev,
3143                           struct rte_flow *flow)
3144 {
3145         struct mlx5_flow_mreg_copy_resource *mcp_res;
3146         struct mlx5_priv *priv = dev->data->dev_private;
3147
3148         if (!flow->rix_mreg_copy)
3149                 return;
3150         mcp_res = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_MCP],
3151                                  flow->rix_mreg_copy);
3152         if (!mcp_res || !priv->mreg_cp_tbl)
3153                 return;
3154         if (flow->copy_applied) {
3155                 MLX5_ASSERT(mcp_res->appcnt);
3156                 flow->copy_applied = 0;
3157                 --mcp_res->appcnt;
3158                 if (!mcp_res->appcnt) {
3159                         struct rte_flow *mcp_flow = mlx5_ipool_get
3160                                         (priv->sh->ipool[MLX5_IPOOL_RTE_FLOW],
3161                                         mcp_res->rix_flow);
3162
3163                         if (mcp_flow)
3164                                 flow_drv_remove(dev, mcp_flow);
3165                 }
3166         }
3167         /*
3168          * We do not check availability of metadata registers here,
3169          * because copy resources are not allocated in this case.
3170          */
3171         if (--mcp_res->refcnt)
3172                 return;
3173         MLX5_ASSERT(mcp_res->rix_flow);
3174         flow_list_destroy(dev, NULL, mcp_res->rix_flow);
3175         mlx5_hlist_remove(priv->mreg_cp_tbl, &mcp_res->hlist_ent);
3176         mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MCP], mcp_res->idx);
3177         flow->rix_mreg_copy = 0;
3178 }
3179
3180 /**
3181  * Start flow in RX_CP_TBL.
3182  *
3183  * @param dev
3184  *   Pointer to Ethernet device.
3185  * @flow
3186  *   Parent flow for wich copying is provided.
3187  *
3188  * @return
3189  *   0 on success, a negative errno value otherwise and rte_errno is set.
3190  */
3191 static int
3192 flow_mreg_start_copy_action(struct rte_eth_dev *dev,
3193                             struct rte_flow *flow)
3194 {
3195         struct mlx5_flow_mreg_copy_resource *mcp_res;
3196         struct mlx5_priv *priv = dev->data->dev_private;
3197         int ret;
3198
3199         if (!flow->rix_mreg_copy || flow->copy_applied)
3200                 return 0;
3201         mcp_res = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_MCP],
3202                                  flow->rix_mreg_copy);
3203         if (!mcp_res)
3204                 return 0;
3205         if (!mcp_res->appcnt) {
3206                 struct rte_flow *mcp_flow = mlx5_ipool_get
3207                                 (priv->sh->ipool[MLX5_IPOOL_RTE_FLOW],
3208                                 mcp_res->rix_flow);
3209
3210                 if (mcp_flow) {
3211                         ret = flow_drv_apply(dev, mcp_flow, NULL);
3212                         if (ret)
3213                                 return ret;
3214                 }
3215         }
3216         ++mcp_res->appcnt;
3217         flow->copy_applied = 1;
3218         return 0;
3219 }
3220
3221 /**
3222  * Stop flow in RX_CP_TBL.
3223  *
3224  * @param dev
3225  *   Pointer to Ethernet device.
3226  * @flow
3227  *   Parent flow for wich copying is provided.
3228  */
3229 static void
3230 flow_mreg_stop_copy_action(struct rte_eth_dev *dev,
3231                            struct rte_flow *flow)
3232 {
3233         struct mlx5_flow_mreg_copy_resource *mcp_res;
3234         struct mlx5_priv *priv = dev->data->dev_private;
3235
3236         if (!flow->rix_mreg_copy || !flow->copy_applied)
3237                 return;
3238         mcp_res = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_MCP],
3239                                  flow->rix_mreg_copy);
3240         if (!mcp_res)
3241                 return;
3242         MLX5_ASSERT(mcp_res->appcnt);
3243         --mcp_res->appcnt;
3244         flow->copy_applied = 0;
3245         if (!mcp_res->appcnt) {
3246                 struct rte_flow *mcp_flow = mlx5_ipool_get
3247                                 (priv->sh->ipool[MLX5_IPOOL_RTE_FLOW],
3248                                 mcp_res->rix_flow);
3249
3250                 if (mcp_flow)
3251                         flow_drv_remove(dev, mcp_flow);
3252         }
3253 }
3254
3255 /**
3256  * Remove the default copy action from RX_CP_TBL.
3257  *
3258  * @param dev
3259  *   Pointer to Ethernet device.
3260  */
3261 static void
3262 flow_mreg_del_default_copy_action(struct rte_eth_dev *dev)
3263 {
3264         struct mlx5_flow_mreg_copy_resource *mcp_res;
3265         struct mlx5_priv *priv = dev->data->dev_private;
3266
3267         /* Check if default flow is registered. */
3268         if (!priv->mreg_cp_tbl)
3269                 return;
3270         mcp_res = (void *)mlx5_hlist_lookup(priv->mreg_cp_tbl,
3271                                             MLX5_DEFAULT_COPY_ID);
3272         if (!mcp_res)
3273                 return;
3274         MLX5_ASSERT(mcp_res->rix_flow);
3275         flow_list_destroy(dev, NULL, mcp_res->rix_flow);
3276         mlx5_hlist_remove(priv->mreg_cp_tbl, &mcp_res->hlist_ent);
3277         mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_MCP], mcp_res->idx);
3278 }
3279
3280 /**
3281  * Add the default copy action in in RX_CP_TBL.
3282  *
3283  * @param dev
3284  *   Pointer to Ethernet device.
3285  * @param[out] error
3286  *   Perform verbose error reporting if not NULL.
3287  *
3288  * @return
3289  *   0 for success, negative value otherwise and rte_errno is set.
3290  */
3291 static int
3292 flow_mreg_add_default_copy_action(struct rte_eth_dev *dev,
3293                                   struct rte_flow_error *error)
3294 {
3295         struct mlx5_priv *priv = dev->data->dev_private;
3296         struct mlx5_flow_mreg_copy_resource *mcp_res;
3297
3298         /* Check whether extensive metadata feature is engaged. */
3299         if (!priv->config.dv_flow_en ||
3300             priv->config.dv_xmeta_en == MLX5_XMETA_MODE_LEGACY ||
3301             !mlx5_flow_ext_mreg_supported(dev) ||
3302             !priv->sh->dv_regc0_mask)
3303                 return 0;
3304         mcp_res = flow_mreg_add_copy_action(dev, MLX5_DEFAULT_COPY_ID, error);
3305         if (!mcp_res)
3306                 return -rte_errno;
3307         return 0;
3308 }
3309
3310 /**
3311  * Add a flow of copying flow metadata registers in RX_CP_TBL.
3312  *
3313  * All the flow having Q/RSS action should be split by
3314  * flow_mreg_split_qrss_prep() to pass by RX_CP_TBL. A flow in the RX_CP_TBL
3315  * performs the following,
3316  *   - CQE->flow_tag := reg_c[1] (MARK)
3317  *   - CQE->flow_table_metadata (reg_b) := reg_c[0] (META)
3318  * As CQE's flow_tag is not a register, it can't be simply copied from reg_c[1]
3319  * but there should be a flow per each MARK ID set by MARK action.
3320  *
3321  * For the aforementioned reason, if there's a MARK action in flow's action
3322  * list, a corresponding flow should be added to the RX_CP_TBL in order to copy
3323  * the MARK ID to CQE's flow_tag like,
3324  *   - If reg_c[1] is mark_id,
3325  *     flow_tag := mark_id, reg_b := reg_c[0] and jump to RX_ACT_TBL.
3326  *
3327  * For SET_META action which stores value in reg_c[0], as the destination is
3328  * also a flow metadata register (reg_b), adding a default flow is enough. Zero
3329  * MARK ID means the default flow. The default flow looks like,
3330  *   - For all flow, reg_b := reg_c[0] and jump to RX_ACT_TBL.
3331  *
3332  * @param dev
3333  *   Pointer to Ethernet device.
3334  * @param flow
3335  *   Pointer to flow structure.
3336  * @param[in] actions
3337  *   Pointer to the list of actions.
3338  * @param[out] error
3339  *   Perform verbose error reporting if not NULL.
3340  *
3341  * @return
3342  *   0 on success, negative value otherwise and rte_errno is set.
3343  */
3344 static int
3345 flow_mreg_update_copy_table(struct rte_eth_dev *dev,
3346                             struct rte_flow *flow,
3347                             const struct rte_flow_action *actions,
3348                             struct rte_flow_error *error)
3349 {
3350         struct mlx5_priv *priv = dev->data->dev_private;
3351         struct mlx5_dev_config *config = &priv->config;
3352         struct mlx5_flow_mreg_copy_resource *mcp_res;
3353         const struct rte_flow_action_mark *mark;
3354
3355         /* Check whether extensive metadata feature is engaged. */
3356         if (!config->dv_flow_en ||
3357             config->dv_xmeta_en == MLX5_XMETA_MODE_LEGACY ||
3358             !mlx5_flow_ext_mreg_supported(dev) ||
3359             !priv->sh->dv_regc0_mask)
3360                 return 0;
3361         /* Find MARK action. */
3362         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
3363                 switch (actions->type) {
3364                 case RTE_FLOW_ACTION_TYPE_FLAG:
3365                         mcp_res = flow_mreg_add_copy_action
3366                                 (dev, MLX5_FLOW_MARK_DEFAULT, error);
3367                         if (!mcp_res)
3368                                 return -rte_errno;
3369                         flow->rix_mreg_copy = mcp_res->idx;
3370                         if (dev->data->dev_started) {
3371                                 mcp_res->appcnt++;
3372                                 flow->copy_applied = 1;
3373                         }
3374                         return 0;
3375                 case RTE_FLOW_ACTION_TYPE_MARK:
3376                         mark = (const struct rte_flow_action_mark *)
3377                                 actions->conf;
3378                         mcp_res =
3379                                 flow_mreg_add_copy_action(dev, mark->id, error);
3380                         if (!mcp_res)
3381                                 return -rte_errno;
3382                         flow->rix_mreg_copy = mcp_res->idx;
3383                         if (dev->data->dev_started) {
3384                                 mcp_res->appcnt++;
3385                                 flow->copy_applied = 1;
3386                         }
3387                         return 0;
3388                 default:
3389                         break;
3390                 }
3391         }
3392         return 0;
3393 }
3394
3395 #define MLX5_MAX_SPLIT_ACTIONS 24
3396 #define MLX5_MAX_SPLIT_ITEMS 24
3397
3398 /**
3399  * Split the hairpin flow.
3400  * Since HW can't support encap on Rx we move the encap to Tx.
3401  * If the count action is after the encap then we also
3402  * move the count action. in this case the count will also measure
3403  * the outer bytes.
3404  *
3405  * @param dev
3406  *   Pointer to Ethernet device.
3407  * @param[in] actions
3408  *   Associated actions (list terminated by the END action).
3409  * @param[out] actions_rx
3410  *   Rx flow actions.
3411  * @param[out] actions_tx
3412  *   Tx flow actions..
3413  * @param[out] pattern_tx
3414  *   The pattern items for the Tx flow.
3415  * @param[out] flow_id
3416  *   The flow ID connected to this flow.
3417  *
3418  * @return
3419  *   0 on success.
3420  */
3421 static int
3422 flow_hairpin_split(struct rte_eth_dev *dev,
3423                    const struct rte_flow_action actions[],
3424                    struct rte_flow_action actions_rx[],
3425                    struct rte_flow_action actions_tx[],
3426                    struct rte_flow_item pattern_tx[],
3427                    uint32_t *flow_id)
3428 {
3429         struct mlx5_priv *priv = dev->data->dev_private;
3430         const struct rte_flow_action_raw_encap *raw_encap;
3431         const struct rte_flow_action_raw_decap *raw_decap;
3432         struct mlx5_rte_flow_action_set_tag *set_tag;
3433         struct rte_flow_action *tag_action;
3434         struct mlx5_rte_flow_item_tag *tag_item;
3435         struct rte_flow_item *item;
3436         char *addr;
3437         int encap = 0;
3438
3439         mlx5_flow_id_get(priv->sh->flow_id_pool, flow_id);
3440         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
3441                 switch (actions->type) {
3442                 case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
3443                 case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
3444                         rte_memcpy(actions_tx, actions,
3445                                sizeof(struct rte_flow_action));
3446                         actions_tx++;
3447                         break;
3448                 case RTE_FLOW_ACTION_TYPE_COUNT:
3449                         if (encap) {
3450                                 rte_memcpy(actions_tx, actions,
3451                                            sizeof(struct rte_flow_action));
3452                                 actions_tx++;
3453                         } else {
3454                                 rte_memcpy(actions_rx, actions,
3455                                            sizeof(struct rte_flow_action));
3456                                 actions_rx++;
3457                         }
3458                         break;
3459                 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
3460                         raw_encap = actions->conf;
3461                         if (raw_encap->size >
3462                             (sizeof(struct rte_flow_item_eth) +
3463                              sizeof(struct rte_flow_item_ipv4))) {
3464                                 memcpy(actions_tx, actions,
3465                                        sizeof(struct rte_flow_action));
3466                                 actions_tx++;
3467                                 encap = 1;
3468                         } else {
3469                                 rte_memcpy(actions_rx, actions,
3470                                            sizeof(struct rte_flow_action));
3471                                 actions_rx++;
3472                         }
3473                         break;
3474                 case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
3475                         raw_decap = actions->conf;
3476                         if (raw_decap->size <
3477                             (sizeof(struct rte_flow_item_eth) +
3478                              sizeof(struct rte_flow_item_ipv4))) {
3479                                 memcpy(actions_tx, actions,
3480                                        sizeof(struct rte_flow_action));
3481                                 actions_tx++;
3482                         } else {
3483                                 rte_memcpy(actions_rx, actions,
3484                                            sizeof(struct rte_flow_action));
3485                                 actions_rx++;
3486                         }
3487                         break;
3488                 default:
3489                         rte_memcpy(actions_rx, actions,
3490                                    sizeof(struct rte_flow_action));
3491                         actions_rx++;
3492                         break;
3493                 }
3494         }
3495         /* Add set meta action and end action for the Rx flow. */
3496         tag_action = actions_rx;
3497         tag_action->type = (enum rte_flow_action_type)
3498                            MLX5_RTE_FLOW_ACTION_TYPE_TAG;
3499         actions_rx++;
3500         rte_memcpy(actions_rx, actions, sizeof(struct rte_flow_action));
3501         actions_rx++;
3502         set_tag = (void *)actions_rx;
3503         set_tag->id = mlx5_flow_get_reg_id(dev, MLX5_HAIRPIN_RX, 0, NULL);
3504         MLX5_ASSERT(set_tag->id > REG_NONE);
3505         set_tag->data = *flow_id;
3506         tag_action->conf = set_tag;
3507         /* Create Tx item list. */
3508         rte_memcpy(actions_tx, actions, sizeof(struct rte_flow_action));
3509         addr = (void *)&pattern_tx[2];
3510         item = pattern_tx;
3511         item->type = (enum rte_flow_item_type)
3512                      MLX5_RTE_FLOW_ITEM_TYPE_TAG;
3513         tag_item = (void *)addr;
3514         tag_item->data = *flow_id;
3515         tag_item->id = mlx5_flow_get_reg_id(dev, MLX5_HAIRPIN_TX, 0, NULL);
3516         MLX5_ASSERT(set_tag->id > REG_NONE);
3517         item->spec = tag_item;
3518         addr += sizeof(struct mlx5_rte_flow_item_tag);
3519         tag_item = (void *)addr;
3520         tag_item->data = UINT32_MAX;
3521         tag_item->id = UINT16_MAX;
3522         item->mask = tag_item;
3523         addr += sizeof(struct mlx5_rte_flow_item_tag);
3524         item->last = NULL;
3525         item++;
3526         item->type = RTE_FLOW_ITEM_TYPE_END;
3527         return 0;
3528 }
3529
3530 /**
3531  * The last stage of splitting chain, just creates the subflow
3532  * without any modification.
3533  *
3534  * @param[in] dev
3535  *   Pointer to Ethernet device.
3536  * @param[in] flow
3537  *   Parent flow structure pointer.
3538  * @param[in, out] sub_flow
3539  *   Pointer to return the created subflow, may be NULL.
3540  * @param[in] prefix_layers
3541  *   Prefix subflow layers, may be 0.
3542  * @param[in] attr
3543  *   Flow rule attributes.
3544  * @param[in] items
3545  *   Pattern specification (list terminated by the END pattern item).
3546  * @param[in] actions
3547  *   Associated actions (list terminated by the END action).
3548  * @param[in] external
3549  *   This flow rule is created by request external to PMD.
3550  * @param[in] flow_idx
3551  *   This memory pool index to the flow.
3552  * @param[out] error
3553  *   Perform verbose error reporting if not NULL.
3554  * @return
3555  *   0 on success, negative value otherwise
3556  */
3557 static int
3558 flow_create_split_inner(struct rte_eth_dev *dev,
3559                         struct rte_flow *flow,
3560                         struct mlx5_flow **sub_flow,
3561                         uint64_t prefix_layers,
3562                         const struct rte_flow_attr *attr,
3563                         const struct rte_flow_item items[],
3564                         const struct rte_flow_action actions[],
3565                         bool external, uint32_t flow_idx,
3566                         struct rte_flow_error *error)
3567 {
3568         struct mlx5_flow *dev_flow;
3569
3570         dev_flow = flow_drv_prepare(dev, flow, attr, items, actions,
3571                 flow_idx, error);
3572         if (!dev_flow)
3573                 return -rte_errno;
3574         dev_flow->flow = flow;
3575         dev_flow->external = external;
3576         /* Subflow object was created, we must include one in the list. */
3577         SILIST_INSERT(&flow->dev_handles, dev_flow->handle_idx,
3578                       dev_flow->handle, next);
3579         /*
3580          * If dev_flow is as one of the suffix flow, some actions in suffix
3581          * flow may need some user defined item layer flags.
3582          */
3583         if (prefix_layers)
3584                 dev_flow->handle->layers = prefix_layers;
3585         if (sub_flow)
3586                 *sub_flow = dev_flow;
3587         return flow_drv_translate(dev, dev_flow, attr, items, actions, error);
3588 }
3589
3590 /**
3591  * Split the meter flow.
3592  *
3593  * As meter flow will split to three sub flow, other than meter
3594  * action, the other actions make sense to only meter accepts
3595  * the packet. If it need to be dropped, no other additional
3596  * actions should be take.
3597  *
3598  * One kind of special action which decapsulates the L3 tunnel
3599  * header will be in the prefix sub flow, as not to take the
3600  * L3 tunnel header into account.
3601  *
3602  * @param dev
3603  *   Pointer to Ethernet device.
3604  * @param[in] items
3605  *   Pattern specification (list terminated by the END pattern item).
3606  * @param[out] sfx_items
3607  *   Suffix flow match items (list terminated by the END pattern item).
3608  * @param[in] actions
3609  *   Associated actions (list terminated by the END action).
3610  * @param[out] actions_sfx
3611  *   Suffix flow actions.
3612  * @param[out] actions_pre
3613  *   Prefix flow actions.
3614  * @param[out] pattern_sfx
3615  *   The pattern items for the suffix flow.
3616  * @param[out] tag_sfx
3617  *   Pointer to suffix flow tag.
3618  *
3619  * @return
3620  *   0 on success.
3621  */
3622 static int
3623 flow_meter_split_prep(struct rte_eth_dev *dev,
3624                  const struct rte_flow_item items[],
3625                  struct rte_flow_item sfx_items[],
3626                  const struct rte_flow_action actions[],
3627                  struct rte_flow_action actions_sfx[],
3628                  struct rte_flow_action actions_pre[])
3629 {
3630         struct rte_flow_action *tag_action = NULL;
3631         struct rte_flow_item *tag_item;
3632         struct mlx5_rte_flow_action_set_tag *set_tag;
3633         struct rte_flow_error error;
3634         const struct rte_flow_action_raw_encap *raw_encap;
3635         const struct rte_flow_action_raw_decap *raw_decap;
3636         struct mlx5_rte_flow_item_tag *tag_spec;
3637         struct mlx5_rte_flow_item_tag *tag_mask;
3638         uint32_t tag_id;
3639         bool copy_vlan = false;
3640
3641         /* Prepare the actions for prefix and suffix flow. */
3642         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
3643                 struct rte_flow_action **action_cur = NULL;
3644
3645                 switch (actions->type) {
3646                 case RTE_FLOW_ACTION_TYPE_METER:
3647                         /* Add the extra tag action first. */
3648                         tag_action = actions_pre;
3649                         tag_action->type = (enum rte_flow_action_type)
3650                                            MLX5_RTE_FLOW_ACTION_TYPE_TAG;
3651                         actions_pre++;
3652                         action_cur = &actions_pre;
3653                         break;
3654                 case RTE_FLOW_ACTION_TYPE_VXLAN_DECAP:
3655                 case RTE_FLOW_ACTION_TYPE_NVGRE_DECAP:
3656                         action_cur = &actions_pre;
3657                         break;
3658                 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
3659                         raw_encap = actions->conf;
3660                         if (raw_encap->size < MLX5_ENCAPSULATION_DECISION_SIZE)
3661                                 action_cur = &actions_pre;
3662                         break;
3663                 case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
3664                         raw_decap = actions->conf;
3665                         if (raw_decap->size > MLX5_ENCAPSULATION_DECISION_SIZE)
3666                                 action_cur = &actions_pre;
3667                         break;
3668                 case RTE_FLOW_ACTION_TYPE_OF_PUSH_VLAN:
3669                 case RTE_FLOW_ACTION_TYPE_OF_SET_VLAN_VID:
3670                         copy_vlan = true;
3671                         break;
3672                 default:
3673                         break;
3674                 }
3675                 if (!action_cur)
3676                         action_cur = &actions_sfx;
3677                 memcpy(*action_cur, actions, sizeof(struct rte_flow_action));
3678                 (*action_cur)++;
3679         }
3680         /* Add end action to the actions. */
3681         actions_sfx->type = RTE_FLOW_ACTION_TYPE_END;
3682         actions_pre->type = RTE_FLOW_ACTION_TYPE_END;
3683         actions_pre++;
3684         /* Set the tag. */
3685         set_tag = (void *)actions_pre;
3686         set_tag->id = mlx5_flow_get_reg_id(dev, MLX5_MTR_SFX, 0, &error);
3687         /*
3688          * Get the id from the qrss_pool to make qrss share the id with meter.
3689          */
3690         tag_id = flow_qrss_get_id(dev);
3691         set_tag->data = tag_id << MLX5_MTR_COLOR_BITS;
3692         assert(tag_action);
3693         tag_action->conf = set_tag;
3694         /* Prepare the suffix subflow items. */
3695         tag_item = sfx_items++;
3696         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
3697                 int item_type = items->type;
3698
3699                 switch (item_type) {
3700                 case RTE_FLOW_ITEM_TYPE_PORT_ID:
3701                         memcpy(sfx_items, items, sizeof(*sfx_items));
3702                         sfx_items++;
3703                         break;
3704                 case RTE_FLOW_ITEM_TYPE_VLAN:
3705                         if (copy_vlan) {
3706                                 memcpy(sfx_items, items, sizeof(*sfx_items));
3707                                 /*
3708                                  * Convert to internal match item, it is used
3709                                  * for vlan push and set vid.
3710                                  */
3711                                 sfx_items->type = (enum rte_flow_item_type)
3712                                                   MLX5_RTE_FLOW_ITEM_TYPE_VLAN;
3713                                 sfx_items++;
3714                         }
3715                         break;
3716                 default:
3717                         break;
3718                 }
3719         }
3720         sfx_items->type = RTE_FLOW_ITEM_TYPE_END;
3721         sfx_items++;
3722         tag_spec = (struct mlx5_rte_flow_item_tag *)sfx_items;
3723         tag_spec->data = tag_id << MLX5_MTR_COLOR_BITS;
3724         tag_spec->id = mlx5_flow_get_reg_id(dev, MLX5_MTR_SFX, 0, &error);
3725         tag_mask = tag_spec + 1;
3726         tag_mask->data = 0xffffff00;
3727         tag_item->type = (enum rte_flow_item_type)
3728                          MLX5_RTE_FLOW_ITEM_TYPE_TAG;
3729         tag_item->spec = tag_spec;
3730         tag_item->last = NULL;
3731         tag_item->mask = tag_mask;
3732         return tag_id;
3733 }
3734
3735 /**
3736  * Split action list having QUEUE/RSS for metadata register copy.
3737  *
3738  * Once Q/RSS action is detected in user's action list, the flow action
3739  * should be split in order to copy metadata registers, which will happen in
3740  * RX_CP_TBL like,
3741  *   - CQE->flow_tag := reg_c[1] (MARK)
3742  *   - CQE->flow_table_metadata (reg_b) := reg_c[0] (META)
3743  * The Q/RSS action will be performed on RX_ACT_TBL after passing by RX_CP_TBL.
3744  * This is because the last action of each flow must be a terminal action
3745  * (QUEUE, RSS or DROP).
3746  *
3747  * Flow ID must be allocated to identify actions in the RX_ACT_TBL and it is
3748  * stored and kept in the mlx5_flow structure per each sub_flow.
3749  *
3750  * The Q/RSS action is replaced with,
3751  *   - SET_TAG, setting the allocated flow ID to reg_c[2].
3752  * And the following JUMP action is added at the end,
3753  *   - JUMP, to RX_CP_TBL.
3754  *
3755  * A flow to perform remained Q/RSS action will be created in RX_ACT_TBL by
3756  * flow_create_split_metadata() routine. The flow will look like,
3757  *   - If flow ID matches (reg_c[2]), perform Q/RSS.
3758  *
3759  * @param dev
3760  *   Pointer to Ethernet device.
3761  * @param[out] split_actions
3762  *   Pointer to store split actions to jump to CP_TBL.
3763  * @param[in] actions
3764  *   Pointer to the list of original flow actions.
3765  * @param[in] qrss
3766  *   Pointer to the Q/RSS action.
3767  * @param[in] actions_n
3768  *   Number of original actions.
3769  * @param[out] error
3770  *   Perform verbose error reporting if not NULL.
3771  *
3772  * @return
3773  *   non-zero unique flow_id on success, otherwise 0 and
3774  *   error/rte_error are set.
3775  */
3776 static uint32_t
3777 flow_mreg_split_qrss_prep(struct rte_eth_dev *dev,
3778                           struct rte_flow_action *split_actions,
3779                           const struct rte_flow_action *actions,
3780                           const struct rte_flow_action *qrss,
3781                           int actions_n, struct rte_flow_error *error)
3782 {
3783         struct mlx5_rte_flow_action_set_tag *set_tag;
3784         struct rte_flow_action_jump *jump;
3785         const int qrss_idx = qrss - actions;
3786         uint32_t flow_id = 0;
3787         int ret = 0;
3788
3789         /*
3790          * Given actions will be split
3791          * - Replace QUEUE/RSS action with SET_TAG to set flow ID.
3792          * - Add jump to mreg CP_TBL.
3793          * As a result, there will be one more action.
3794          */
3795         ++actions_n;
3796         memcpy(split_actions, actions, sizeof(*split_actions) * actions_n);
3797         set_tag = (void *)(split_actions + actions_n);
3798         /*
3799          * If tag action is not set to void(it means we are not the meter
3800          * suffix flow), add the tag action. Since meter suffix flow already
3801          * has the tag added.
3802          */
3803         if (split_actions[qrss_idx].type != RTE_FLOW_ACTION_TYPE_VOID) {
3804                 /*
3805                  * Allocate the new subflow ID. This one is unique within
3806                  * device and not shared with representors. Otherwise,
3807                  * we would have to resolve multi-thread access synch
3808                  * issue. Each flow on the shared device is appended
3809                  * with source vport identifier, so the resulting
3810                  * flows will be unique in the shared (by master and
3811                  * representors) domain even if they have coinciding
3812                  * IDs.
3813                  */
3814                 flow_id = flow_qrss_get_id(dev);
3815                 if (!flow_id)
3816                         return rte_flow_error_set(error, ENOMEM,
3817                                                   RTE_FLOW_ERROR_TYPE_ACTION,
3818                                                   NULL, "can't allocate id "
3819                                                   "for split Q/RSS subflow");
3820                 /* Internal SET_TAG action to set flow ID. */
3821                 *set_tag = (struct mlx5_rte_flow_action_set_tag){
3822                         .data = flow_id,
3823                 };
3824                 ret = mlx5_flow_get_reg_id(dev, MLX5_COPY_MARK, 0, error);
3825                 if (ret < 0)
3826                         return ret;
3827                 set_tag->id = ret;
3828                 /* Construct new actions array. */
3829                 /* Replace QUEUE/RSS action. */
3830                 split_actions[qrss_idx] = (struct rte_flow_action){
3831                         .type = (enum rte_flow_action_type)
3832                                 MLX5_RTE_FLOW_ACTION_TYPE_TAG,
3833                         .conf = set_tag,
3834                 };
3835         }
3836         /* JUMP action to jump to mreg copy table (CP_TBL). */
3837         jump = (void *)(set_tag + 1);
3838         *jump = (struct rte_flow_action_jump){
3839                 .group = MLX5_FLOW_MREG_CP_TABLE_GROUP,
3840         };
3841         split_actions[actions_n - 2] = (struct rte_flow_action){
3842                 .type = RTE_FLOW_ACTION_TYPE_JUMP,
3843                 .conf = jump,
3844         };
3845         split_actions[actions_n - 1] = (struct rte_flow_action){
3846                 .type = RTE_FLOW_ACTION_TYPE_END,
3847         };
3848         return flow_id;
3849 }
3850
3851 /**
3852  * Extend the given action list for Tx metadata copy.
3853  *
3854  * Copy the given action list to the ext_actions and add flow metadata register
3855  * copy action in order to copy reg_a set by WQE to reg_c[0].
3856  *
3857  * @param[out] ext_actions
3858  *   Pointer to the extended action list.
3859  * @param[in] actions
3860  *   Pointer to the list of actions.
3861  * @param[in] actions_n
3862  *   Number of actions in the list.
3863  * @param[out] error
3864  *   Perform verbose error reporting if not NULL.
3865  * @param[in] encap_idx
3866  *   The encap action inndex.
3867  *
3868  * @return
3869  *   0 on success, negative value otherwise
3870  */
3871 static int
3872 flow_mreg_tx_copy_prep(struct rte_eth_dev *dev,
3873                        struct rte_flow_action *ext_actions,
3874                        const struct rte_flow_action *actions,
3875                        int actions_n, struct rte_flow_error *error,
3876                        int encap_idx)
3877 {
3878         struct mlx5_flow_action_copy_mreg *cp_mreg =
3879                 (struct mlx5_flow_action_copy_mreg *)
3880                         (ext_actions + actions_n + 1);
3881         int ret;
3882
3883         ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_RX, 0, error);
3884         if (ret < 0)
3885                 return ret;
3886         cp_mreg->dst = ret;
3887         ret = mlx5_flow_get_reg_id(dev, MLX5_METADATA_TX, 0, error);
3888         if (ret < 0)
3889                 return ret;
3890         cp_mreg->src = ret;
3891         if (encap_idx != 0)
3892                 memcpy(ext_actions, actions, sizeof(*ext_actions) * encap_idx);
3893         if (encap_idx == actions_n - 1) {
3894                 ext_actions[actions_n - 1] = (struct rte_flow_action){
3895                         .type = (enum rte_flow_action_type)
3896                                 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
3897                         .conf = cp_mreg,
3898                 };
3899                 ext_actions[actions_n] = (struct rte_flow_action){
3900                         .type = RTE_FLOW_ACTION_TYPE_END,
3901                 };
3902         } else {
3903                 ext_actions[encap_idx] = (struct rte_flow_action){
3904                         .type = (enum rte_flow_action_type)
3905                                 MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
3906                         .conf = cp_mreg,
3907                 };
3908                 memcpy(ext_actions + encap_idx + 1, actions + encap_idx,
3909                                 sizeof(*ext_actions) * (actions_n - encap_idx));
3910         }
3911         return 0;
3912 }
3913
3914 /**
3915  * The splitting for metadata feature.
3916  *
3917  * - Q/RSS action on NIC Rx should be split in order to pass by
3918  *   the mreg copy table (RX_CP_TBL) and then it jumps to the
3919  *   action table (RX_ACT_TBL) which has the split Q/RSS action.
3920  *
3921  * - All the actions on NIC Tx should have a mreg copy action to
3922  *   copy reg_a from WQE to reg_c[0].
3923  *
3924  * @param dev
3925  *   Pointer to Ethernet device.
3926  * @param[in] flow
3927  *   Parent flow structure pointer.
3928  * @param[in] prefix_layers
3929  *   Prefix flow layer flags.
3930  * @param[in] attr
3931  *   Flow rule attributes.
3932  * @param[in] items
3933  *   Pattern specification (list terminated by the END pattern item).
3934  * @param[in] actions
3935  *   Associated actions (list terminated by the END action).
3936  * @param[in] external
3937  *   This flow rule is created by request external to PMD.
3938  * @param[in] flow_idx
3939  *   This memory pool index to the flow.
3940  * @param[out] error
3941  *   Perform verbose error reporting if not NULL.
3942  * @return
3943  *   0 on success, negative value otherwise
3944  */
3945 static int
3946 flow_create_split_metadata(struct rte_eth_dev *dev,
3947                            struct rte_flow *flow,
3948                            uint64_t prefix_layers,
3949                            const struct rte_flow_attr *attr,
3950                            const struct rte_flow_item items[],
3951                            const struct rte_flow_action actions[],
3952                            bool external, uint32_t flow_idx,
3953                            struct rte_flow_error *error)
3954 {
3955         struct mlx5_priv *priv = dev->data->dev_private;
3956         struct mlx5_dev_config *config = &priv->config;
3957         const struct rte_flow_action *qrss = NULL;
3958         struct rte_flow_action *ext_actions = NULL;
3959         struct mlx5_flow *dev_flow = NULL;
3960         uint32_t qrss_id = 0;
3961         int mtr_sfx = 0;
3962         size_t act_size;
3963         int actions_n;
3964         int encap_idx;
3965         int ret;
3966
3967         /* Check whether extensive metadata feature is engaged. */
3968         if (!config->dv_flow_en ||
3969             config->dv_xmeta_en == MLX5_XMETA_MODE_LEGACY ||
3970             !mlx5_flow_ext_mreg_supported(dev))
3971                 return flow_create_split_inner(dev, flow, NULL, prefix_layers,
3972                                                attr, items, actions, external,
3973                                                flow_idx, error);
3974         actions_n = flow_parse_metadata_split_actions_info(actions, &qrss,
3975                                                            &encap_idx);
3976         if (qrss) {
3977                 /* Exclude hairpin flows from splitting. */
3978                 if (qrss->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
3979                         const struct rte_flow_action_queue *queue;
3980
3981                         queue = qrss->conf;
3982                         if (mlx5_rxq_get_type(dev, queue->index) ==
3983                             MLX5_RXQ_TYPE_HAIRPIN)
3984                                 qrss = NULL;
3985                 } else if (qrss->type == RTE_FLOW_ACTION_TYPE_RSS) {
3986                         const struct rte_flow_action_rss *rss;
3987
3988                         rss = qrss->conf;
3989                         if (mlx5_rxq_get_type(dev, rss->queue[0]) ==
3990                             MLX5_RXQ_TYPE_HAIRPIN)
3991                                 qrss = NULL;
3992                 }
3993         }
3994         if (qrss) {
3995                 /* Check if it is in meter suffix table. */
3996                 mtr_sfx = attr->group == (attr->transfer ?
3997                           (MLX5_FLOW_TABLE_LEVEL_SUFFIX - 1) :
3998                           MLX5_FLOW_TABLE_LEVEL_SUFFIX);
3999                 /*
4000                  * Q/RSS action on NIC Rx should be split in order to pass by
4001                  * the mreg copy table (RX_CP_TBL) and then it jumps to the
4002                  * action table (RX_ACT_TBL) which has the split Q/RSS action.
4003                  */
4004                 act_size = sizeof(struct rte_flow_action) * (actions_n + 1) +
4005                            sizeof(struct rte_flow_action_set_tag) +
4006                            sizeof(struct rte_flow_action_jump);
4007                 ext_actions = mlx5_malloc(MLX5_MEM_ZERO, act_size, 0,
4008                                           SOCKET_ID_ANY);
4009                 if (!ext_actions)
4010                         return rte_flow_error_set(error, ENOMEM,
4011                                                   RTE_FLOW_ERROR_TYPE_ACTION,
4012                                                   NULL, "no memory to split "
4013                                                   "metadata flow");
4014                 /*
4015                  * If we are the suffix flow of meter, tag already exist.
4016                  * Set the tag action to void.
4017                  */
4018                 if (mtr_sfx)
4019                         ext_actions[qrss - actions].type =
4020                                                 RTE_FLOW_ACTION_TYPE_VOID;
4021                 else
4022                         ext_actions[qrss - actions].type =
4023                                                 (enum rte_flow_action_type)
4024                                                 MLX5_RTE_FLOW_ACTION_TYPE_TAG;
4025                 /*
4026                  * Create the new actions list with removed Q/RSS action
4027                  * and appended set tag and jump to register copy table
4028                  * (RX_CP_TBL). We should preallocate unique tag ID here
4029                  * in advance, because it is needed for set tag action.
4030                  */
4031                 qrss_id = flow_mreg_split_qrss_prep(dev, ext_actions, actions,
4032                                                     qrss, actions_n, error);
4033                 if (!mtr_sfx && !qrss_id) {
4034                         ret = -rte_errno;
4035                         goto exit;
4036                 }
4037         } else if (attr->egress && !attr->transfer) {
4038                 /*
4039                  * All the actions on NIC Tx should have a metadata register
4040                  * copy action to copy reg_a from WQE to reg_c[meta]
4041                  */
4042                 act_size = sizeof(struct rte_flow_action) * (actions_n + 1) +
4043                            sizeof(struct mlx5_flow_action_copy_mreg);
4044                 ext_actions = mlx5_malloc(MLX5_MEM_ZERO, act_size, 0,
4045                                           SOCKET_ID_ANY);
4046                 if (!ext_actions)
4047                         return rte_flow_error_set(error, ENOMEM,
4048                                                   RTE_FLOW_ERROR_TYPE_ACTION,
4049                                                   NULL, "no memory to split "
4050                                                   "metadata flow");
4051                 /* Create the action list appended with copy register. */
4052                 ret = flow_mreg_tx_copy_prep(dev, ext_actions, actions,
4053                                              actions_n, error, encap_idx);
4054                 if (ret < 0)
4055                         goto exit;
4056         }
4057         /* Add the unmodified original or prefix subflow. */
4058         ret = flow_create_split_inner(dev, flow, &dev_flow, prefix_layers, attr,
4059                                       items, ext_actions ? ext_actions :
4060                                       actions, external, flow_idx, error);
4061         if (ret < 0)
4062                 goto exit;
4063         MLX5_ASSERT(dev_flow);
4064         if (qrss) {
4065                 const struct rte_flow_attr q_attr = {
4066                         .group = MLX5_FLOW_MREG_ACT_TABLE_GROUP,
4067                         .ingress = 1,
4068                 };
4069                 /* Internal PMD action to set register. */
4070                 struct mlx5_rte_flow_item_tag q_tag_spec = {
4071                         .data = qrss_id,
4072                         .id = 0,
4073                 };
4074                 struct rte_flow_item q_items[] = {
4075                         {
4076                                 .type = (enum rte_flow_item_type)
4077                                         MLX5_RTE_FLOW_ITEM_TYPE_TAG,
4078                                 .spec = &q_tag_spec,
4079                                 .last = NULL,
4080                                 .mask = NULL,
4081                         },
4082                         {
4083                                 .type = RTE_FLOW_ITEM_TYPE_END,
4084                         },
4085                 };
4086                 struct rte_flow_action q_actions[] = {
4087                         {
4088                                 .type = qrss->type,
4089                                 .conf = qrss->conf,
4090                         },
4091                         {
4092                                 .type = RTE_FLOW_ACTION_TYPE_END,
4093                         },
4094                 };
4095                 uint64_t layers = flow_get_prefix_layer_flags(dev_flow);
4096
4097                 /*
4098                  * Configure the tag item only if there is no meter subflow.
4099                  * Since tag is already marked in the meter suffix subflow
4100                  * we can just use the meter suffix items as is.
4101                  */
4102                 if (qrss_id) {
4103                         /* Not meter subflow. */
4104                         MLX5_ASSERT(!mtr_sfx);
4105                         /*
4106                          * Put unique id in prefix flow due to it is destroyed
4107                          * after suffix flow and id will be freed after there
4108                          * is no actual flows with this id and identifier
4109                          * reallocation becomes possible (for example, for
4110                          * other flows in other threads).
4111                          */
4112                         dev_flow->handle->split_flow_id = qrss_id;
4113                         ret = mlx5_flow_get_reg_id(dev, MLX5_COPY_MARK, 0,
4114                                                    error);
4115                         if (ret < 0)
4116                                 goto exit;
4117                         q_tag_spec.id = ret;
4118                 }
4119                 dev_flow = NULL;
4120                 /* Add suffix subflow to execute Q/RSS. */
4121                 ret = flow_create_split_inner(dev, flow, &dev_flow, layers,
4122                                               &q_attr, mtr_sfx ? items :
4123                                               q_items, q_actions,
4124                                               external, flow_idx, error);
4125                 if (ret < 0)
4126                         goto exit;
4127                 /* qrss ID should be freed if failed. */
4128                 qrss_id = 0;
4129                 MLX5_ASSERT(dev_flow);
4130         }
4131
4132 exit:
4133         /*
4134          * We do not destroy the partially created sub_flows in case of error.
4135          * These ones are included into parent flow list and will be destroyed
4136          * by flow_drv_destroy.
4137          */
4138         flow_qrss_free_id(dev, qrss_id);
4139         mlx5_free(ext_actions);
4140         return ret;
4141 }
4142
4143 /**
4144  * The splitting for meter feature.
4145  *
4146  * - The meter flow will be split to two flows as prefix and
4147  *   suffix flow. The packets make sense only it pass the prefix
4148  *   meter action.
4149  *
4150  * - Reg_C_5 is used for the packet to match betweend prefix and
4151  *   suffix flow.
4152  *
4153  * @param dev
4154  *   Pointer to Ethernet device.
4155  * @param[in] flow
4156  *   Parent flow structure pointer.
4157  * @param[in] attr
4158  *   Flow rule attributes.
4159  * @param[in] items
4160  *   Pattern specification (list terminated by the END pattern item).
4161  * @param[in] actions
4162  *   Associated actions (list terminated by the END action).
4163  * @param[in] external
4164  *   This flow rule is created by request external to PMD.
4165  * @param[in] flow_idx
4166  *   This memory pool index to the flow.
4167  * @param[out] error
4168  *   Perform verbose error reporting if not NULL.
4169  * @return
4170  *   0 on success, negative value otherwise
4171  */
4172 static int
4173 flow_create_split_meter(struct rte_eth_dev *dev,
4174                            struct rte_flow *flow,
4175                            const struct rte_flow_attr *attr,
4176                            const struct rte_flow_item items[],
4177                            const struct rte_flow_action actions[],
4178                            bool external, uint32_t flow_idx,
4179                            struct rte_flow_error *error)
4180 {
4181         struct mlx5_priv *priv = dev->data->dev_private;
4182         struct rte_flow_action *sfx_actions = NULL;
4183         struct rte_flow_action *pre_actions = NULL;
4184         struct rte_flow_item *sfx_items = NULL;
4185         struct mlx5_flow *dev_flow = NULL;
4186         struct rte_flow_attr sfx_attr = *attr;
4187         uint32_t mtr = 0;
4188         uint32_t mtr_tag_id = 0;
4189         size_t act_size;
4190         size_t item_size;
4191         int actions_n = 0;
4192         int ret;
4193
4194         if (priv->mtr_en)
4195                 actions_n = flow_check_meter_action(actions, &mtr);
4196         if (mtr) {
4197                 /* The five prefix actions: meter, decap, encap, tag, end. */
4198                 act_size = sizeof(struct rte_flow_action) * (actions_n + 5) +
4199                            sizeof(struct mlx5_rte_flow_action_set_tag);
4200                 /* tag, vlan, port id, end. */
4201 #define METER_SUFFIX_ITEM 4
4202                 item_size = sizeof(struct rte_flow_item) * METER_SUFFIX_ITEM +
4203                             sizeof(struct mlx5_rte_flow_item_tag) * 2;
4204                 sfx_actions = mlx5_malloc(MLX5_MEM_ZERO, (act_size + item_size),
4205                                           0, SOCKET_ID_ANY);
4206                 if (!sfx_actions)
4207                         return rte_flow_error_set(error, ENOMEM,
4208                                                   RTE_FLOW_ERROR_TYPE_ACTION,
4209                                                   NULL, "no memory to split "
4210                                                   "meter flow");
4211                 sfx_items = (struct rte_flow_item *)((char *)sfx_actions +
4212                              act_size);
4213                 pre_actions = sfx_actions + actions_n;
4214                 mtr_tag_id = flow_meter_split_prep(dev, items, sfx_items,
4215                                                    actions, sfx_actions,
4216                                                    pre_actions);
4217                 if (!mtr_tag_id) {
4218                         ret = -rte_errno;
4219                         goto exit;
4220                 }
4221                 /* Add the prefix subflow. */
4222                 ret = flow_create_split_inner(dev, flow, &dev_flow, 0, attr,
4223                                               items, pre_actions, external,
4224                                               flow_idx, error);
4225                 if (ret) {
4226                         ret = -rte_errno;
4227                         goto exit;
4228                 }
4229                 dev_flow->handle->split_flow_id = mtr_tag_id;
4230                 /* Setting the sfx group atrr. */
4231                 sfx_attr.group = sfx_attr.transfer ?
4232                                 (MLX5_FLOW_TABLE_LEVEL_SUFFIX - 1) :
4233                                  MLX5_FLOW_TABLE_LEVEL_SUFFIX;
4234         }
4235         /* Add the prefix subflow. */
4236         ret = flow_create_split_metadata(dev, flow, dev_flow ?
4237                                          flow_get_prefix_layer_flags(dev_flow) :
4238                                          0, &sfx_attr,
4239                                          sfx_items ? sfx_items : items,
4240                                          sfx_actions ? sfx_actions : actions,
4241                                          external, flow_idx, error);
4242 exit:
4243         if (sfx_actions)
4244                 mlx5_free(sfx_actions);
4245         return ret;
4246 }
4247
4248 /**
4249  * Split the flow to subflow set. The splitters might be linked
4250  * in the chain, like this:
4251  * flow_create_split_outer() calls:
4252  *   flow_create_split_meter() calls:
4253  *     flow_create_split_metadata(meter_subflow_0) calls:
4254  *       flow_create_split_inner(metadata_subflow_0)
4255  *       flow_create_split_inner(metadata_subflow_1)
4256  *       flow_create_split_inner(metadata_subflow_2)
4257  *     flow_create_split_metadata(meter_subflow_1) calls:
4258  *       flow_create_split_inner(metadata_subflow_0)
4259  *       flow_create_split_inner(metadata_subflow_1)
4260  *       flow_create_split_inner(metadata_subflow_2)
4261  *
4262  * This provide flexible way to add new levels of flow splitting.
4263  * The all of successfully created subflows are included to the
4264  * parent flow dev_flow list.
4265  *
4266  * @param dev
4267  *   Pointer to Ethernet device.
4268  * @param[in] flow
4269  *   Parent flow structure pointer.
4270  * @param[in] attr
4271  *   Flow rule attributes.
4272  * @param[in] items
4273  *   Pattern specification (list terminated by the END pattern item).
4274  * @param[in] actions
4275  *   Associated actions (list terminated by the END action).
4276  * @param[in] external
4277  *   This flow rule is created by request external to PMD.
4278  * @param[in] flow_idx
4279  *   This memory pool index to the flow.
4280  * @param[out] error
4281  *   Perform verbose error reporting if not NULL.
4282  * @return
4283  *   0 on success, negative value otherwise
4284  */
4285 static int
4286 flow_create_split_outer(struct rte_eth_dev *dev,
4287                         struct rte_flow *flow,
4288                         const struct rte_flow_attr *attr,
4289                         const struct rte_flow_item items[],
4290                         const struct rte_flow_action actions[],
4291                         bool external, uint32_t flow_idx,
4292                         struct rte_flow_error *error)
4293 {
4294         int ret;
4295
4296         ret = flow_create_split_meter(dev, flow, attr, items,
4297                                          actions, external, flow_idx, error);
4298         MLX5_ASSERT(ret <= 0);
4299         return ret;
4300 }
4301
4302 /**
4303  * Create a flow and add it to @p list.
4304  *
4305  * @param dev
4306  *   Pointer to Ethernet device.
4307  * @param list
4308  *   Pointer to a TAILQ flow list. If this parameter NULL,
4309  *   no list insertion occurred, flow is just created,
4310  *   this is caller's responsibility to track the
4311  *   created flow.
4312  * @param[in] attr
4313  *   Flow rule attributes.
4314  * @param[in] items
4315  *   Pattern specification (list terminated by the END pattern item).
4316  * @param[in] actions
4317  *   Associated actions (list terminated by the END action).
4318  * @param[in] external
4319  *   This flow rule is created by request external to PMD.
4320  * @param[out] error
4321  *   Perform verbose error reporting if not NULL.
4322  *
4323  * @return
4324  *   A flow index on success, 0 otherwise and rte_errno is set.
4325  */
4326 static uint32_t
4327 flow_list_create(struct rte_eth_dev *dev, uint32_t *list,
4328                  const struct rte_flow_attr *attr,
4329                  const struct rte_flow_item items[],
4330                  const struct rte_flow_action actions[],
4331                  bool external, struct rte_flow_error *error)
4332 {
4333         struct mlx5_priv *priv = dev->data->dev_private;
4334         struct rte_flow *flow = NULL;
4335         struct mlx5_flow *dev_flow;
4336         const struct rte_flow_action_rss *rss;
4337         union {
4338                 struct rte_flow_expand_rss buf;
4339                 uint8_t buffer[2048];
4340         } expand_buffer;
4341         union {
4342                 struct rte_flow_action actions[MLX5_MAX_SPLIT_ACTIONS];
4343                 uint8_t buffer[2048];
4344         } actions_rx;
4345         union {
4346                 struct rte_flow_action actions[MLX5_MAX_SPLIT_ACTIONS];
4347                 uint8_t buffer[2048];
4348         } actions_hairpin_tx;
4349         union {
4350                 struct rte_flow_item items[MLX5_MAX_SPLIT_ITEMS];
4351                 uint8_t buffer[2048];
4352         } items_tx;
4353         struct rte_flow_expand_rss *buf = &expand_buffer.buf;
4354         struct mlx5_flow_rss_desc *rss_desc = &((struct mlx5_flow_rss_desc *)
4355                                               priv->rss_desc)[!!priv->flow_idx];
4356         const struct rte_flow_action *p_actions_rx = actions;
4357         uint32_t i;
4358         uint32_t idx = 0;
4359         int hairpin_flow;
4360         uint32_t hairpin_id = 0;
4361         struct rte_flow_attr attr_tx = { .priority = 0 };
4362         int ret;
4363
4364         hairpin_flow = flow_check_hairpin_split(dev, attr, actions);
4365         ret = flow_drv_validate(dev, attr, items, p_actions_rx,
4366                                 external, hairpin_flow, error);
4367         if (ret < 0)
4368                 return 0;
4369         if (hairpin_flow > 0) {
4370                 if (hairpin_flow > MLX5_MAX_SPLIT_ACTIONS) {
4371                         rte_errno = EINVAL;
4372                         return 0;
4373                 }
4374                 flow_hairpin_split(dev, actions, actions_rx.actions,
4375                                    actions_hairpin_tx.actions, items_tx.items,
4376                                    &hairpin_id);
4377                 p_actions_rx = actions_rx.actions;
4378         }
4379         flow = mlx5_ipool_zmalloc(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], &idx);
4380         if (!flow) {
4381                 rte_errno = ENOMEM;
4382                 goto error_before_flow;
4383         }
4384         flow->drv_type = flow_get_drv_type(dev, attr);
4385         if (hairpin_id != 0)
4386                 flow->hairpin_flow_id = hairpin_id;
4387         MLX5_ASSERT(flow->drv_type > MLX5_FLOW_TYPE_MIN &&
4388                     flow->drv_type < MLX5_FLOW_TYPE_MAX);
4389         memset(rss_desc, 0, sizeof(*rss_desc));
4390         rss = flow_get_rss_action(p_actions_rx);
4391         if (rss) {
4392                 /*
4393                  * The following information is required by
4394                  * mlx5_flow_hashfields_adjust() in advance.
4395                  */
4396                 rss_desc->level = rss->level;
4397                 /* RSS type 0 indicates default RSS type (ETH_RSS_IP). */
4398                 rss_desc->types = !rss->types ? ETH_RSS_IP : rss->types;
4399         }
4400         flow->dev_handles = 0;
4401         if (rss && rss->types) {
4402                 unsigned int graph_root;
4403
4404                 graph_root = find_graph_root(items, rss->level);
4405                 ret = rte_flow_expand_rss(buf, sizeof(expand_buffer.buffer),
4406                                           items, rss->types,
4407                                           mlx5_support_expansion,
4408                                           graph_root);
4409                 MLX5_ASSERT(ret > 0 &&
4410                        (unsigned int)ret < sizeof(expand_buffer.buffer));
4411         } else {
4412                 buf->entries = 1;
4413                 buf->entry[0].pattern = (void *)(uintptr_t)items;
4414         }
4415         /*
4416          * Record the start index when there is a nested call. All sub-flows
4417          * need to be translated before another calling.
4418          * No need to use ping-pong buffer to save memory here.
4419          */
4420         if (priv->flow_idx) {
4421                 MLX5_ASSERT(!priv->flow_nested_idx);
4422                 priv->flow_nested_idx = priv->flow_idx;
4423         }
4424         for (i = 0; i < buf->entries; ++i) {
4425                 /*
4426                  * The splitter may create multiple dev_flows,
4427                  * depending on configuration. In the simplest
4428                  * case it just creates unmodified original flow.
4429                  */
4430                 ret = flow_create_split_outer(dev, flow, attr,
4431                                               buf->entry[i].pattern,
4432                                               p_actions_rx, external, idx,
4433                                               error);
4434                 if (ret < 0)
4435                         goto error;
4436         }
4437         /* Create the tx flow. */
4438         if (hairpin_flow) {
4439                 attr_tx.group = MLX5_HAIRPIN_TX_TABLE;
4440                 attr_tx.ingress = 0;
4441                 attr_tx.egress = 1;
4442                 dev_flow = flow_drv_prepare(dev, flow, &attr_tx, items_tx.items,
4443                                          actions_hairpin_tx.actions,
4444                                          idx, error);
4445                 if (!dev_flow)
4446                         goto error;
4447                 dev_flow->flow = flow;
4448                 dev_flow->external = 0;
4449                 SILIST_INSERT(&flow->dev_handles, dev_flow->handle_idx,
4450                               dev_flow->handle, next);
4451                 ret = flow_drv_translate(dev, dev_flow, &attr_tx,
4452                                          items_tx.items,
4453                                          actions_hairpin_tx.actions, error);
4454                 if (ret < 0)
4455                         goto error;
4456         }
4457         /*
4458          * Update the metadata register copy table. If extensive
4459          * metadata feature is enabled and registers are supported
4460          * we might create the extra rte_flow for each unique
4461          * MARK/FLAG action ID.
4462          *
4463          * The table is updated for ingress Flows only, because
4464          * the egress Flows belong to the different device and
4465          * copy table should be updated in peer NIC Rx domain.
4466          */
4467         if (attr->ingress &&
4468             (external || attr->group != MLX5_FLOW_MREG_CP_TABLE_GROUP)) {
4469                 ret = flow_mreg_update_copy_table(dev, flow, actions, error);
4470                 if (ret)
4471                         goto error;
4472         }
4473         /*
4474          * If the flow is external (from application) OR device is started, then
4475          * the flow will be applied immediately.
4476          */
4477         if (external || dev->data->dev_started) {
4478                 ret = flow_drv_apply(dev, flow, error);
4479                 if (ret < 0)
4480                         goto error;
4481         }
4482         if (list)
4483                 ILIST_INSERT(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], list, idx,
4484                              flow, next);
4485         flow_rxq_flags_set(dev, flow);
4486         /* Nested flow creation index recovery. */
4487         priv->flow_idx = priv->flow_nested_idx;
4488         if (priv->flow_nested_idx)
4489                 priv->flow_nested_idx = 0;
4490         return idx;
4491 error:
4492         MLX5_ASSERT(flow);
4493         ret = rte_errno; /* Save rte_errno before cleanup. */
4494         flow_mreg_del_copy_action(dev, flow);
4495         flow_drv_destroy(dev, flow);
4496         mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], idx);
4497         rte_errno = ret; /* Restore rte_errno. */
4498 error_before_flow:
4499         ret = rte_errno;
4500         if (hairpin_id)
4501                 mlx5_flow_id_release(priv->sh->flow_id_pool,
4502                                      hairpin_id);
4503         rte_errno = ret;
4504         priv->flow_idx = priv->flow_nested_idx;
4505         if (priv->flow_nested_idx)
4506                 priv->flow_nested_idx = 0;
4507         return 0;
4508 }
4509
4510 /**
4511  * Create a dedicated flow rule on e-switch table 0 (root table), to direct all
4512  * incoming packets to table 1.
4513  *
4514  * Other flow rules, requested for group n, will be created in
4515  * e-switch table n+1.
4516  * Jump action to e-switch group n will be created to group n+1.
4517  *
4518  * Used when working in switchdev mode, to utilise advantages of table 1
4519  * and above.
4520  *
4521  * @param dev
4522  *   Pointer to Ethernet device.
4523  *
4524  * @return
4525  *   Pointer to flow on success, NULL otherwise and rte_errno is set.
4526  */
4527 struct rte_flow *
4528 mlx5_flow_create_esw_table_zero_flow(struct rte_eth_dev *dev)
4529 {
4530         const struct rte_flow_attr attr = {
4531                 .group = 0,
4532                 .priority = 0,
4533                 .ingress = 1,
4534                 .egress = 0,
4535                 .transfer = 1,
4536         };
4537         const struct rte_flow_item pattern = {
4538                 .type = RTE_FLOW_ITEM_TYPE_END,
4539         };
4540         struct rte_flow_action_jump jump = {
4541                 .group = 1,
4542         };
4543         const struct rte_flow_action actions[] = {
4544                 {
4545                         .type = RTE_FLOW_ACTION_TYPE_JUMP,
4546                         .conf = &jump,
4547                 },
4548                 {
4549                         .type = RTE_FLOW_ACTION_TYPE_END,
4550                 },
4551         };
4552         struct mlx5_priv *priv = dev->data->dev_private;
4553         struct rte_flow_error error;
4554
4555         return (void *)(uintptr_t)flow_list_create(dev, &priv->ctrl_flows,
4556                                                    &attr, &pattern,
4557                                                    actions, false, &error);
4558 }
4559
4560 /**
4561  * Validate a flow supported by the NIC.
4562  *
4563  * @see rte_flow_validate()
4564  * @see rte_flow_ops
4565  */
4566 int
4567 mlx5_flow_validate(struct rte_eth_dev *dev,
4568                    const struct rte_flow_attr *attr,
4569                    const struct rte_flow_item items[],
4570                    const struct rte_flow_action actions[],
4571                    struct rte_flow_error *error)
4572 {
4573         int hairpin_flow;
4574
4575         hairpin_flow = flow_check_hairpin_split(dev, attr, actions);
4576         return flow_drv_validate(dev, attr, items, actions,
4577                                 true, hairpin_flow, error);
4578 }
4579
4580 /**
4581  * Create a flow.
4582  *
4583  * @see rte_flow_create()
4584  * @see rte_flow_ops
4585  */
4586 struct rte_flow *
4587 mlx5_flow_create(struct rte_eth_dev *dev,
4588                  const struct rte_flow_attr *attr,
4589                  const struct rte_flow_item items[],
4590                  const struct rte_flow_action actions[],
4591                  struct rte_flow_error *error)
4592 {
4593         struct mlx5_priv *priv = dev->data->dev_private;
4594
4595         /*
4596          * If the device is not started yet, it is not allowed to created a
4597          * flow from application. PMD default flows and traffic control flows
4598          * are not affected.
4599          */
4600         if (unlikely(!dev->data->dev_started)) {
4601                 DRV_LOG(DEBUG, "port %u is not started when "
4602                         "inserting a flow", dev->data->port_id);
4603                 rte_flow_error_set(error, ENODEV,
4604                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
4605                                    NULL,
4606                                    "port not started");
4607                 return NULL;
4608         }
4609         return (void *)(uintptr_t)flow_list_create(dev, &priv->flows,
4610                                   attr, items, actions, true, error);
4611 }
4612
4613 /**
4614  * Destroy a flow in a list.
4615  *
4616  * @param dev
4617  *   Pointer to Ethernet device.
4618  * @param list
4619  *   Pointer to the Indexed flow list. If this parameter NULL,
4620  *   there is no flow removal from the list. Be noted that as
4621  *   flow is add to the indexed list, memory of the indexed
4622  *   list points to maybe changed as flow destroyed.
4623  * @param[in] flow_idx
4624  *   Index of flow to destroy.
4625  */
4626 static void
4627 flow_list_destroy(struct rte_eth_dev *dev, uint32_t *list,
4628                   uint32_t flow_idx)
4629 {
4630         struct mlx5_priv *priv = dev->data->dev_private;
4631         struct mlx5_fdir_flow *priv_fdir_flow = NULL;
4632         struct rte_flow *flow = mlx5_ipool_get(priv->sh->ipool
4633                                                [MLX5_IPOOL_RTE_FLOW], flow_idx);
4634
4635         if (!flow)
4636                 return;
4637         /*
4638          * Update RX queue flags only if port is started, otherwise it is
4639          * already clean.
4640          */
4641         if (dev->data->dev_started)
4642                 flow_rxq_flags_trim(dev, flow);
4643         if (flow->hairpin_flow_id)
4644                 mlx5_flow_id_release(priv->sh->flow_id_pool,
4645                                      flow->hairpin_flow_id);
4646         flow_drv_destroy(dev, flow);
4647         if (list)
4648                 ILIST_REMOVE(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], list,
4649                              flow_idx, flow, next);
4650         flow_mreg_del_copy_action(dev, flow);
4651         if (flow->fdir) {
4652                 LIST_FOREACH(priv_fdir_flow, &priv->fdir_flows, next) {
4653                         if (priv_fdir_flow->rix_flow == flow_idx)
4654                                 break;
4655                 }
4656                 if (priv_fdir_flow) {
4657                         LIST_REMOVE(priv_fdir_flow, next);
4658                         mlx5_free(priv_fdir_flow->fdir);
4659                         mlx5_free(priv_fdir_flow);
4660                 }
4661         }
4662         mlx5_ipool_free(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], flow_idx);
4663 }
4664
4665 /**
4666  * Destroy all flows.
4667  *
4668  * @param dev
4669  *   Pointer to Ethernet device.
4670  * @param list
4671  *   Pointer to the Indexed flow list.
4672  * @param active
4673  *   If flushing is called avtively.
4674  */
4675 void
4676 mlx5_flow_list_flush(struct rte_eth_dev *dev, uint32_t *list, bool active)
4677 {
4678         uint32_t num_flushed = 0;
4679
4680         while (*list) {
4681                 flow_list_destroy(dev, list, *list);
4682                 num_flushed++;
4683         }
4684         if (active) {
4685                 DRV_LOG(INFO, "port %u: %u flows flushed before stopping",
4686                         dev->data->port_id, num_flushed);
4687         }
4688 }
4689
4690 /**
4691  * Remove all flows.
4692  *
4693  * @param dev
4694  *   Pointer to Ethernet device.
4695  * @param list
4696  *   Pointer to the Indexed flow list.
4697  */
4698 void
4699 mlx5_flow_stop(struct rte_eth_dev *dev, uint32_t *list)
4700 {
4701         struct mlx5_priv *priv = dev->data->dev_private;
4702         struct rte_flow *flow = NULL;
4703         uint32_t idx;
4704
4705         ILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], *list, idx,
4706                       flow, next) {
4707                 flow_drv_remove(dev, flow);
4708                 flow_mreg_stop_copy_action(dev, flow);
4709         }
4710         flow_mreg_del_default_copy_action(dev);
4711         flow_rxq_flags_clear(dev);
4712 }
4713
4714 /**
4715  * Add all flows.
4716  *
4717  * @param dev
4718  *   Pointer to Ethernet device.
4719  * @param list
4720  *   Pointer to the Indexed flow list.
4721  *
4722  * @return
4723  *   0 on success, a negative errno value otherwise and rte_errno is set.
4724  */
4725 int
4726 mlx5_flow_start(struct rte_eth_dev *dev, uint32_t *list)
4727 {
4728         struct mlx5_priv *priv = dev->data->dev_private;
4729         struct rte_flow *flow = NULL;
4730         struct rte_flow_error error;
4731         uint32_t idx;
4732         int ret = 0;
4733
4734         /* Make sure default copy action (reg_c[0] -> reg_b) is created. */
4735         ret = flow_mreg_add_default_copy_action(dev, &error);
4736         if (ret < 0)
4737                 return -rte_errno;
4738         /* Apply Flows created by application. */
4739         ILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], *list, idx,
4740                       flow, next) {
4741                 ret = flow_mreg_start_copy_action(dev, flow);
4742                 if (ret < 0)
4743                         goto error;
4744                 ret = flow_drv_apply(dev, flow, &error);
4745                 if (ret < 0)
4746                         goto error;
4747                 flow_rxq_flags_set(dev, flow);
4748         }
4749         return 0;
4750 error:
4751         ret = rte_errno; /* Save rte_errno before cleanup. */
4752         mlx5_flow_stop(dev, list);
4753         rte_errno = ret; /* Restore rte_errno. */
4754         return -rte_errno;
4755 }
4756
4757 /**
4758  * Stop all default actions for flows.
4759  *
4760  * @param dev
4761  *   Pointer to Ethernet device.
4762  */
4763 void
4764 mlx5_flow_stop_default(struct rte_eth_dev *dev)
4765 {
4766         flow_mreg_del_default_copy_action(dev);
4767         flow_rxq_flags_clear(dev);
4768 }
4769
4770 /**
4771  * Start all default actions for flows.
4772  *
4773  * @param dev
4774  *   Pointer to Ethernet device.
4775  * @return
4776  *   0 on success, a negative errno value otherwise and rte_errno is set.
4777  */
4778 int
4779 mlx5_flow_start_default(struct rte_eth_dev *dev)
4780 {
4781         struct rte_flow_error error;
4782
4783         /* Make sure default copy action (reg_c[0] -> reg_b) is created. */
4784         return flow_mreg_add_default_copy_action(dev, &error);
4785 }
4786
4787 /**
4788  * Allocate intermediate resources for flow creation.
4789  *
4790  * @param dev
4791  *   Pointer to Ethernet device.
4792  */
4793 void
4794 mlx5_flow_alloc_intermediate(struct rte_eth_dev *dev)
4795 {
4796         struct mlx5_priv *priv = dev->data->dev_private;
4797
4798         if (!priv->inter_flows) {
4799                 priv->inter_flows = mlx5_malloc(MLX5_MEM_ZERO,
4800                                     MLX5_NUM_MAX_DEV_FLOWS *
4801                                     sizeof(struct mlx5_flow) +
4802                                     (sizeof(struct mlx5_flow_rss_desc) +
4803                                     sizeof(uint16_t) * UINT16_MAX) * 2, 0,
4804                                     SOCKET_ID_ANY);
4805                 if (!priv->inter_flows) {
4806                         DRV_LOG(ERR, "can't allocate intermediate memory.");
4807                         return;
4808                 }
4809         }
4810         priv->rss_desc = &((struct mlx5_flow *)priv->inter_flows)
4811                          [MLX5_NUM_MAX_DEV_FLOWS];
4812         /* Reset the index. */
4813         priv->flow_idx = 0;
4814         priv->flow_nested_idx = 0;
4815 }
4816
4817 /**
4818  * Free intermediate resources for flows.
4819  *
4820  * @param dev
4821  *   Pointer to Ethernet device.
4822  */
4823 void
4824 mlx5_flow_free_intermediate(struct rte_eth_dev *dev)
4825 {
4826         struct mlx5_priv *priv = dev->data->dev_private;
4827
4828         mlx5_free(priv->inter_flows);
4829         priv->inter_flows = NULL;
4830 }
4831
4832 /**
4833  * Verify the flow list is empty
4834  *
4835  * @param dev
4836  *  Pointer to Ethernet device.
4837  *
4838  * @return the number of flows not released.
4839  */
4840 int
4841 mlx5_flow_verify(struct rte_eth_dev *dev)
4842 {
4843         struct mlx5_priv *priv = dev->data->dev_private;
4844         struct rte_flow *flow;
4845         uint32_t idx;
4846         int ret = 0;
4847
4848         ILIST_FOREACH(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], priv->flows, idx,
4849                       flow, next) {
4850                 DRV_LOG(DEBUG, "port %u flow %p still referenced",
4851                         dev->data->port_id, (void *)flow);
4852                 ++ret;
4853         }
4854         return ret;
4855 }
4856
4857 /**
4858  * Enable default hairpin egress flow.
4859  *
4860  * @param dev
4861  *   Pointer to Ethernet device.
4862  * @param queue
4863  *   The queue index.
4864  *
4865  * @return
4866  *   0 on success, a negative errno value otherwise and rte_errno is set.
4867  */
4868 int
4869 mlx5_ctrl_flow_source_queue(struct rte_eth_dev *dev,
4870                             uint32_t queue)
4871 {
4872         struct mlx5_priv *priv = dev->data->dev_private;
4873         const struct rte_flow_attr attr = {
4874                 .egress = 1,
4875                 .priority = 0,
4876         };
4877         struct mlx5_rte_flow_item_tx_queue queue_spec = {
4878                 .queue = queue,
4879         };
4880         struct mlx5_rte_flow_item_tx_queue queue_mask = {
4881                 .queue = UINT32_MAX,
4882         };
4883         struct rte_flow_item items[] = {
4884                 {
4885                         .type = (enum rte_flow_item_type)
4886                                 MLX5_RTE_FLOW_ITEM_TYPE_TX_QUEUE,
4887                         .spec = &queue_spec,
4888                         .last = NULL,
4889                         .mask = &queue_mask,
4890                 },
4891                 {
4892                         .type = RTE_FLOW_ITEM_TYPE_END,
4893                 },
4894         };
4895         struct rte_flow_action_jump jump = {
4896                 .group = MLX5_HAIRPIN_TX_TABLE,
4897         };
4898         struct rte_flow_action actions[2];
4899         uint32_t flow_idx;
4900         struct rte_flow_error error;
4901
4902         actions[0].type = RTE_FLOW_ACTION_TYPE_JUMP;
4903         actions[0].conf = &jump;
4904         actions[1].type = RTE_FLOW_ACTION_TYPE_END;
4905         flow_idx = flow_list_create(dev, &priv->ctrl_flows,
4906                                 &attr, items, actions, false, &error);
4907         if (!flow_idx) {
4908                 DRV_LOG(DEBUG,
4909                         "Failed to create ctrl flow: rte_errno(%d),"
4910                         " type(%d), message(%s)",
4911                         rte_errno, error.type,
4912                         error.message ? error.message : " (no stated reason)");
4913                 return -rte_errno;
4914         }
4915         return 0;
4916 }
4917
4918 /**
4919  * Enable a control flow configured from the control plane.
4920  *
4921  * @param dev
4922  *   Pointer to Ethernet device.
4923  * @param eth_spec
4924  *   An Ethernet flow spec to apply.
4925  * @param eth_mask
4926  *   An Ethernet flow mask to apply.
4927  * @param vlan_spec
4928  *   A VLAN flow spec to apply.
4929  * @param vlan_mask
4930  *   A VLAN flow mask to apply.
4931  *
4932  * @return
4933  *   0 on success, a negative errno value otherwise and rte_errno is set.
4934  */
4935 int
4936 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
4937                     struct rte_flow_item_eth *eth_spec,
4938                     struct rte_flow_item_eth *eth_mask,
4939                     struct rte_flow_item_vlan *vlan_spec,
4940                     struct rte_flow_item_vlan *vlan_mask)
4941 {
4942         struct mlx5_priv *priv = dev->data->dev_private;
4943         const struct rte_flow_attr attr = {
4944                 .ingress = 1,
4945                 .priority = MLX5_FLOW_PRIO_RSVD,
4946         };
4947         struct rte_flow_item items[] = {
4948                 {
4949                         .type = RTE_FLOW_ITEM_TYPE_ETH,
4950                         .spec = eth_spec,
4951                         .last = NULL,
4952                         .mask = eth_mask,
4953                 },
4954                 {
4955                         .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
4956                                               RTE_FLOW_ITEM_TYPE_END,
4957                         .spec = vlan_spec,
4958                         .last = NULL,
4959                         .mask = vlan_mask,
4960                 },
4961                 {
4962                         .type = RTE_FLOW_ITEM_TYPE_END,
4963                 },
4964         };
4965         uint16_t queue[priv->reta_idx_n];
4966         struct rte_flow_action_rss action_rss = {
4967                 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
4968                 .level = 0,
4969                 .types = priv->rss_conf.rss_hf,
4970                 .key_len = priv->rss_conf.rss_key_len,
4971                 .queue_num = priv->reta_idx_n,
4972                 .key = priv->rss_conf.rss_key,
4973                 .queue = queue,
4974         };
4975         struct rte_flow_action actions[] = {
4976                 {
4977                         .type = RTE_FLOW_ACTION_TYPE_RSS,
4978                         .conf = &action_rss,
4979                 },
4980                 {
4981                         .type = RTE_FLOW_ACTION_TYPE_END,
4982                 },
4983         };
4984         uint32_t flow_idx;
4985         struct rte_flow_error error;
4986         unsigned int i;
4987
4988         if (!priv->reta_idx_n || !priv->rxqs_n) {
4989                 return 0;
4990         }
4991         if (!(dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG))
4992                 action_rss.types = 0;
4993         for (i = 0; i != priv->reta_idx_n; ++i)
4994                 queue[i] = (*priv->reta_idx)[i];
4995         flow_idx = flow_list_create(dev, &priv->ctrl_flows,
4996                                 &attr, items, actions, false, &error);
4997         if (!flow_idx)
4998                 return -rte_errno;
4999         return 0;
5000 }
5001
5002 /**
5003  * Enable a flow control configured from the control plane.
5004  *
5005  * @param dev
5006  *   Pointer to Ethernet device.
5007  * @param eth_spec
5008  *   An Ethernet flow spec to apply.
5009  * @param eth_mask
5010  *   An Ethernet flow mask to apply.
5011  *
5012  * @return
5013  *   0 on success, a negative errno value otherwise and rte_errno is set.
5014  */
5015 int
5016 mlx5_ctrl_flow(struct rte_eth_dev *dev,
5017                struct rte_flow_item_eth *eth_spec,
5018                struct rte_flow_item_eth *eth_mask)
5019 {
5020         return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
5021 }
5022
5023 /**
5024  * Create default miss flow rule matching lacp traffic
5025  *
5026  * @param dev
5027  *   Pointer to Ethernet device.
5028  * @param eth_spec
5029  *   An Ethernet flow spec to apply.
5030  *
5031  * @return
5032  *   0 on success, a negative errno value otherwise and rte_errno is set.
5033  */
5034 int
5035 mlx5_flow_lacp_miss(struct rte_eth_dev *dev)
5036 {
5037         struct mlx5_priv *priv = dev->data->dev_private;
5038         /*
5039          * The LACP matching is done by only using ether type since using
5040          * a multicast dst mac causes kernel to give low priority to this flow.
5041          */
5042         static const struct rte_flow_item_eth lacp_spec = {
5043                 .type = RTE_BE16(0x8809),
5044         };
5045         static const struct rte_flow_item_eth lacp_mask = {
5046                 .type = 0xffff,
5047         };
5048         const struct rte_flow_attr attr = {
5049                 .ingress = 1,
5050         };
5051         struct rte_flow_item items[] = {
5052                 {
5053                         .type = RTE_FLOW_ITEM_TYPE_ETH,
5054                         .spec = &lacp_spec,
5055                         .mask = &lacp_mask,
5056                 },
5057                 {
5058                         .type = RTE_FLOW_ITEM_TYPE_END,
5059                 },
5060         };
5061         struct rte_flow_action actions[] = {
5062                 {
5063                         .type = (enum rte_flow_action_type)
5064                                 MLX5_RTE_FLOW_ACTION_TYPE_DEFAULT_MISS,
5065                 },
5066                 {
5067                         .type = RTE_FLOW_ACTION_TYPE_END,
5068                 },
5069         };
5070         struct rte_flow_error error;
5071         uint32_t flow_idx = flow_list_create(dev, &priv->ctrl_flows,
5072                                 &attr, items, actions, false, &error);
5073
5074         if (!flow_idx)
5075                 return -rte_errno;
5076         return 0;
5077 }
5078
5079 /**
5080  * Destroy a flow.
5081  *
5082  * @see rte_flow_destroy()
5083  * @see rte_flow_ops
5084  */
5085 int
5086 mlx5_flow_destroy(struct rte_eth_dev *dev,
5087                   struct rte_flow *flow,
5088                   struct rte_flow_error *error __rte_unused)
5089 {
5090         struct mlx5_priv *priv = dev->data->dev_private;
5091
5092         flow_list_destroy(dev, &priv->flows, (uintptr_t)(void *)flow);
5093         return 0;
5094 }
5095
5096 /**
5097  * Destroy all flows.
5098  *
5099  * @see rte_flow_flush()
5100  * @see rte_flow_ops
5101  */
5102 int
5103 mlx5_flow_flush(struct rte_eth_dev *dev,
5104                 struct rte_flow_error *error __rte_unused)
5105 {
5106         struct mlx5_priv *priv = dev->data->dev_private;
5107
5108         mlx5_flow_list_flush(dev, &priv->flows, false);
5109         return 0;
5110 }
5111
5112 /**
5113  * Isolated mode.
5114  *
5115  * @see rte_flow_isolate()
5116  * @see rte_flow_ops
5117  */
5118 int
5119 mlx5_flow_isolate(struct rte_eth_dev *dev,
5120                   int enable,
5121                   struct rte_flow_error *error)
5122 {
5123         struct mlx5_priv *priv = dev->data->dev_private;
5124
5125         if (dev->data->dev_started) {
5126                 rte_flow_error_set(error, EBUSY,
5127                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
5128                                    NULL,
5129                                    "port must be stopped first");
5130                 return -rte_errno;
5131         }
5132         priv->isolated = !!enable;
5133         if (enable)
5134                 dev->dev_ops = &mlx5_os_dev_ops_isolate;
5135         else
5136                 dev->dev_ops = &mlx5_os_dev_ops;
5137         return 0;
5138 }
5139
5140 /**
5141  * Query a flow.
5142  *
5143  * @see rte_flow_query()
5144  * @see rte_flow_ops
5145  */
5146 static int
5147 flow_drv_query(struct rte_eth_dev *dev,
5148                uint32_t flow_idx,
5149                const struct rte_flow_action *actions,
5150                void *data,
5151                struct rte_flow_error *error)
5152 {
5153         struct mlx5_priv *priv = dev->data->dev_private;
5154         const struct mlx5_flow_driver_ops *fops;
5155         struct rte_flow *flow = mlx5_ipool_get(priv->sh->ipool
5156                                                [MLX5_IPOOL_RTE_FLOW],
5157                                                flow_idx);
5158         enum mlx5_flow_drv_type ftype;
5159
5160         if (!flow) {
5161                 return rte_flow_error_set(error, ENOENT,
5162                           RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
5163                           NULL,
5164                           "invalid flow handle");
5165         }
5166         ftype = flow->drv_type;
5167         MLX5_ASSERT(ftype > MLX5_FLOW_TYPE_MIN && ftype < MLX5_FLOW_TYPE_MAX);
5168         fops = flow_get_drv_ops(ftype);
5169
5170         return fops->query(dev, flow, actions, data, error);
5171 }
5172
5173 /**
5174  * Query a flow.
5175  *
5176  * @see rte_flow_query()
5177  * @see rte_flow_ops
5178  */
5179 int
5180 mlx5_flow_query(struct rte_eth_dev *dev,
5181                 struct rte_flow *flow,
5182                 const struct rte_flow_action *actions,
5183                 void *data,
5184                 struct rte_flow_error *error)
5185 {
5186         int ret;
5187
5188         ret = flow_drv_query(dev, (uintptr_t)(void *)flow, actions, data,
5189                              error);
5190         if (ret < 0)
5191                 return ret;
5192         return 0;
5193 }
5194
5195 /**
5196  * Convert a flow director filter to a generic flow.
5197  *
5198  * @param dev
5199  *   Pointer to Ethernet device.
5200  * @param fdir_filter
5201  *   Flow director filter to add.
5202  * @param attributes
5203  *   Generic flow parameters structure.
5204  *
5205  * @return
5206  *   0 on success, a negative errno value otherwise and rte_errno is set.
5207  */
5208 static int
5209 flow_fdir_filter_convert(struct rte_eth_dev *dev,
5210                          const struct rte_eth_fdir_filter *fdir_filter,
5211                          struct mlx5_fdir *attributes)
5212 {
5213         struct mlx5_priv *priv = dev->data->dev_private;
5214         const struct rte_eth_fdir_input *input = &fdir_filter->input;
5215         const struct rte_eth_fdir_masks *mask =
5216                 &dev->data->dev_conf.fdir_conf.mask;
5217
5218         /* Validate queue number. */
5219         if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
5220                 DRV_LOG(ERR, "port %u invalid queue number %d",
5221                         dev->data->port_id, fdir_filter->action.rx_queue);
5222                 rte_errno = EINVAL;
5223                 return -rte_errno;
5224         }
5225         attributes->attr.ingress = 1;
5226         attributes->items[0] = (struct rte_flow_item) {
5227                 .type = RTE_FLOW_ITEM_TYPE_ETH,
5228                 .spec = &attributes->l2,
5229                 .mask = &attributes->l2_mask,
5230         };
5231         switch (fdir_filter->action.behavior) {
5232         case RTE_ETH_FDIR_ACCEPT:
5233                 attributes->actions[0] = (struct rte_flow_action){
5234                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
5235                         .conf = &attributes->queue,
5236                 };
5237                 break;
5238         case RTE_ETH_FDIR_REJECT:
5239                 attributes->actions[0] = (struct rte_flow_action){
5240                         .type = RTE_FLOW_ACTION_TYPE_DROP,
5241                 };
5242                 break;
5243         default:
5244                 DRV_LOG(ERR, "port %u invalid behavior %d",
5245                         dev->data->port_id,
5246                         fdir_filter->action.behavior);
5247                 rte_errno = ENOTSUP;
5248                 return -rte_errno;
5249         }
5250         attributes->queue.index = fdir_filter->action.rx_queue;
5251         /* Handle L3. */
5252         switch (fdir_filter->input.flow_type) {
5253         case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
5254         case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
5255         case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
5256                 attributes->l3.ipv4.hdr = (struct rte_ipv4_hdr){
5257                         .src_addr = input->flow.ip4_flow.src_ip,
5258                         .dst_addr = input->flow.ip4_flow.dst_ip,
5259                         .time_to_live = input->flow.ip4_flow.ttl,
5260                         .type_of_service = input->flow.ip4_flow.tos,
5261                 };
5262                 attributes->l3_mask.ipv4.hdr = (struct rte_ipv4_hdr){
5263                         .src_addr = mask->ipv4_mask.src_ip,
5264                         .dst_addr = mask->ipv4_mask.dst_ip,
5265                         .time_to_live = mask->ipv4_mask.ttl,
5266                         .type_of_service = mask->ipv4_mask.tos,
5267                         .next_proto_id = mask->ipv4_mask.proto,
5268                 };
5269                 attributes->items[1] = (struct rte_flow_item){
5270                         .type = RTE_FLOW_ITEM_TYPE_IPV4,
5271                         .spec = &attributes->l3,
5272                         .mask = &attributes->l3_mask,
5273                 };
5274                 break;
5275         case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
5276         case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
5277         case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
5278                 attributes->l3.ipv6.hdr = (struct rte_ipv6_hdr){
5279                         .hop_limits = input->flow.ipv6_flow.hop_limits,
5280                         .proto = input->flow.ipv6_flow.proto,
5281                 };
5282
5283                 memcpy(attributes->l3.ipv6.hdr.src_addr,
5284                        input->flow.ipv6_flow.src_ip,
5285                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
5286                 memcpy(attributes->l3.ipv6.hdr.dst_addr,
5287                        input->flow.ipv6_flow.dst_ip,
5288                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
5289                 memcpy(attributes->l3_mask.ipv6.hdr.src_addr,
5290                        mask->ipv6_mask.src_ip,
5291                        RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
5292                 memcpy(attributes->l3_mask.ipv6.hdr.dst_addr,
5293                        mask->ipv6_mask.dst_ip,
5294                        RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
5295                 attributes->items[1] = (struct rte_flow_item){
5296                         .type = RTE_FLOW_ITEM_TYPE_IPV6,
5297                         .spec = &attributes->l3,
5298                         .mask = &attributes->l3_mask,
5299                 };
5300                 break;
5301         default:
5302                 DRV_LOG(ERR, "port %u invalid flow type%d",
5303                         dev->data->port_id, fdir_filter->input.flow_type);
5304                 rte_errno = ENOTSUP;
5305                 return -rte_errno;
5306         }
5307         /* Handle L4. */
5308         switch (fdir_filter->input.flow_type) {
5309         case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
5310                 attributes->l4.udp.hdr = (struct rte_udp_hdr){
5311                         .src_port = input->flow.udp4_flow.src_port,
5312                         .dst_port = input->flow.udp4_flow.dst_port,
5313                 };
5314                 attributes->l4_mask.udp.hdr = (struct rte_udp_hdr){
5315                         .src_port = mask->src_port_mask,
5316                         .dst_port = mask->dst_port_mask,
5317                 };
5318                 attributes->items[2] = (struct rte_flow_item){
5319                         .type = RTE_FLOW_ITEM_TYPE_UDP,
5320                         .spec = &attributes->l4,
5321                         .mask = &attributes->l4_mask,
5322                 };
5323                 break;
5324         case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
5325                 attributes->l4.tcp.hdr = (struct rte_tcp_hdr){
5326                         .src_port = input->flow.tcp4_flow.src_port,
5327                         .dst_port = input->flow.tcp4_flow.dst_port,
5328                 };
5329                 attributes->l4_mask.tcp.hdr = (struct rte_tcp_hdr){
5330                         .src_port = mask->src_port_mask,
5331                         .dst_port = mask->dst_port_mask,
5332                 };
5333                 attributes->items[2] = (struct rte_flow_item){
5334                         .type = RTE_FLOW_ITEM_TYPE_TCP,
5335                         .spec = &attributes->l4,
5336                         .mask = &attributes->l4_mask,
5337                 };
5338                 break;
5339         case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
5340                 attributes->l4.udp.hdr = (struct rte_udp_hdr){
5341                         .src_port = input->flow.udp6_flow.src_port,
5342                         .dst_port = input->flow.udp6_flow.dst_port,
5343                 };
5344                 attributes->l4_mask.udp.hdr = (struct rte_udp_hdr){
5345                         .src_port = mask->src_port_mask,
5346                         .dst_port = mask->dst_port_mask,
5347                 };
5348                 attributes->items[2] = (struct rte_flow_item){
5349                         .type = RTE_FLOW_ITEM_TYPE_UDP,
5350                         .spec = &attributes->l4,
5351                         .mask = &attributes->l4_mask,
5352                 };
5353                 break;
5354         case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
5355                 attributes->l4.tcp.hdr = (struct rte_tcp_hdr){
5356                         .src_port = input->flow.tcp6_flow.src_port,
5357                         .dst_port = input->flow.tcp6_flow.dst_port,
5358                 };
5359                 attributes->l4_mask.tcp.hdr = (struct rte_tcp_hdr){
5360                         .src_port = mask->src_port_mask,
5361                         .dst_port = mask->dst_port_mask,
5362                 };
5363                 attributes->items[2] = (struct rte_flow_item){
5364                         .type = RTE_FLOW_ITEM_TYPE_TCP,
5365                         .spec = &attributes->l4,
5366                         .mask = &attributes->l4_mask,
5367                 };
5368                 break;
5369         case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
5370         case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
5371                 break;
5372         default:
5373                 DRV_LOG(ERR, "port %u invalid flow type%d",
5374                         dev->data->port_id, fdir_filter->input.flow_type);
5375                 rte_errno = ENOTSUP;
5376                 return -rte_errno;
5377         }
5378         return 0;
5379 }
5380
5381 #define FLOW_FDIR_CMP(f1, f2, fld) \
5382         memcmp(&(f1)->fld, &(f2)->fld, sizeof(f1->fld))
5383
5384 /**
5385  * Compare two FDIR flows. If items and actions are identical, the two flows are
5386  * regarded as same.
5387  *
5388  * @param dev
5389  *   Pointer to Ethernet device.
5390  * @param f1
5391  *   FDIR flow to compare.
5392  * @param f2
5393  *   FDIR flow to compare.
5394  *
5395  * @return
5396  *   Zero on match, 1 otherwise.
5397  */
5398 static int
5399 flow_fdir_cmp(const struct mlx5_fdir *f1, const struct mlx5_fdir *f2)
5400 {
5401         if (FLOW_FDIR_CMP(f1, f2, attr) ||
5402             FLOW_FDIR_CMP(f1, f2, l2) ||
5403             FLOW_FDIR_CMP(f1, f2, l2_mask) ||
5404             FLOW_FDIR_CMP(f1, f2, l3) ||
5405             FLOW_FDIR_CMP(f1, f2, l3_mask) ||
5406             FLOW_FDIR_CMP(f1, f2, l4) ||
5407             FLOW_FDIR_CMP(f1, f2, l4_mask) ||
5408             FLOW_FDIR_CMP(f1, f2, actions[0].type))
5409                 return 1;
5410         if (f1->actions[0].type == RTE_FLOW_ACTION_TYPE_QUEUE &&
5411             FLOW_FDIR_CMP(f1, f2, queue))
5412                 return 1;
5413         return 0;
5414 }
5415
5416 /**
5417  * Search device flow list to find out a matched FDIR flow.
5418  *
5419  * @param dev
5420  *   Pointer to Ethernet device.
5421  * @param fdir_flow
5422  *   FDIR flow to lookup.
5423  *
5424  * @return
5425  *   Index of flow if found, 0 otherwise.
5426  */
5427 static uint32_t
5428 flow_fdir_filter_lookup(struct rte_eth_dev *dev, struct mlx5_fdir *fdir_flow)
5429 {
5430         struct mlx5_priv *priv = dev->data->dev_private;
5431         uint32_t flow_idx = 0;
5432         struct mlx5_fdir_flow *priv_fdir_flow = NULL;
5433
5434         MLX5_ASSERT(fdir_flow);
5435         LIST_FOREACH(priv_fdir_flow, &priv->fdir_flows, next) {
5436                 if (!flow_fdir_cmp(priv_fdir_flow->fdir, fdir_flow)) {
5437                         DRV_LOG(DEBUG, "port %u found FDIR flow %u",
5438                                 dev->data->port_id, flow_idx);
5439                         flow_idx = priv_fdir_flow->rix_flow;
5440                         break;
5441                 }
5442         }
5443         return flow_idx;
5444 }
5445
5446 /**
5447  * Add new flow director filter and store it in list.
5448  *
5449  * @param dev
5450  *   Pointer to Ethernet device.
5451  * @param fdir_filter
5452  *   Flow director filter to add.
5453  *
5454  * @return
5455  *   0 on success, a negative errno value otherwise and rte_errno is set.
5456  */
5457 static int
5458 flow_fdir_filter_add(struct rte_eth_dev *dev,
5459                      const struct rte_eth_fdir_filter *fdir_filter)
5460 {
5461         struct mlx5_priv *priv = dev->data->dev_private;
5462         struct mlx5_fdir *fdir_flow;
5463         struct rte_flow *flow;
5464         struct mlx5_fdir_flow *priv_fdir_flow = NULL;
5465         uint32_t flow_idx;
5466         int ret;
5467
5468         fdir_flow = mlx5_malloc(MLX5_MEM_ZERO, sizeof(*fdir_flow), 0,
5469                                 SOCKET_ID_ANY);
5470         if (!fdir_flow) {
5471                 rte_errno = ENOMEM;
5472                 return -rte_errno;
5473         }
5474         ret = flow_fdir_filter_convert(dev, fdir_filter, fdir_flow);
5475         if (ret)
5476                 goto error;
5477         flow_idx = flow_fdir_filter_lookup(dev, fdir_flow);
5478         if (flow_idx) {
5479                 rte_errno = EEXIST;
5480                 goto error;
5481         }
5482         priv_fdir_flow = mlx5_malloc(MLX5_MEM_ZERO,
5483                                      sizeof(struct mlx5_fdir_flow),
5484                                      0, SOCKET_ID_ANY);
5485         if (!priv_fdir_flow) {
5486                 rte_errno = ENOMEM;
5487                 goto error;
5488         }
5489         flow_idx = flow_list_create(dev, &priv->flows, &fdir_flow->attr,
5490                                     fdir_flow->items, fdir_flow->actions, true,
5491                                     NULL);
5492         flow = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW], flow_idx);
5493         if (!flow)
5494                 goto error;
5495         flow->fdir = 1;
5496         priv_fdir_flow->fdir = fdir_flow;
5497         priv_fdir_flow->rix_flow = flow_idx;
5498         LIST_INSERT_HEAD(&priv->fdir_flows, priv_fdir_flow, next);
5499         DRV_LOG(DEBUG, "port %u created FDIR flow %p",
5500                 dev->data->port_id, (void *)flow);
5501         return 0;
5502 error:
5503         mlx5_free(priv_fdir_flow);
5504         mlx5_free(fdir_flow);
5505         return -rte_errno;
5506 }
5507
5508 /**
5509  * Delete specific filter.
5510  *
5511  * @param dev
5512  *   Pointer to Ethernet device.
5513  * @param fdir_filter
5514  *   Filter to be deleted.
5515  *
5516  * @return
5517  *   0 on success, a negative errno value otherwise and rte_errno is set.
5518  */
5519 static int
5520 flow_fdir_filter_delete(struct rte_eth_dev *dev,
5521                         const struct rte_eth_fdir_filter *fdir_filter)
5522 {
5523         struct mlx5_priv *priv = dev->data->dev_private;
5524         uint32_t flow_idx;
5525         struct mlx5_fdir fdir_flow = {
5526                 .attr.group = 0,
5527         };
5528         struct mlx5_fdir_flow *priv_fdir_flow = NULL;
5529         int ret;
5530
5531         ret = flow_fdir_filter_convert(dev, fdir_filter, &fdir_flow);
5532         if (ret)
5533                 return -rte_errno;
5534         LIST_FOREACH(priv_fdir_flow, &priv->fdir_flows, next) {
5535                 /* Find the fdir in priv list */
5536                 if (!flow_fdir_cmp(priv_fdir_flow->fdir, &fdir_flow))
5537                         break;
5538         }
5539         if (!priv_fdir_flow)
5540                 return 0;
5541         LIST_REMOVE(priv_fdir_flow, next);
5542         flow_idx = priv_fdir_flow->rix_flow;
5543         flow_list_destroy(dev, &priv->flows, flow_idx);
5544         mlx5_free(priv_fdir_flow->fdir);
5545         mlx5_free(priv_fdir_flow);
5546         DRV_LOG(DEBUG, "port %u deleted FDIR flow %u",
5547                 dev->data->port_id, flow_idx);
5548         return 0;
5549 }
5550
5551 /**
5552  * Update queue for specific filter.
5553  *
5554  * @param dev
5555  *   Pointer to Ethernet device.
5556  * @param fdir_filter
5557  *   Filter to be updated.
5558  *
5559  * @return
5560  *   0 on success, a negative errno value otherwise and rte_errno is set.
5561  */
5562 static int
5563 flow_fdir_filter_update(struct rte_eth_dev *dev,
5564                         const struct rte_eth_fdir_filter *fdir_filter)
5565 {
5566         int ret;
5567
5568         ret = flow_fdir_filter_delete(dev, fdir_filter);
5569         if (ret)
5570                 return ret;
5571         return flow_fdir_filter_add(dev, fdir_filter);
5572 }
5573
5574 /**
5575  * Flush all filters.
5576  *
5577  * @param dev
5578  *   Pointer to Ethernet device.
5579  */
5580 static void
5581 flow_fdir_filter_flush(struct rte_eth_dev *dev)
5582 {
5583         struct mlx5_priv *priv = dev->data->dev_private;
5584         struct mlx5_fdir_flow *priv_fdir_flow = NULL;
5585
5586         while (!LIST_EMPTY(&priv->fdir_flows)) {
5587                 priv_fdir_flow = LIST_FIRST(&priv->fdir_flows);
5588                 LIST_REMOVE(priv_fdir_flow, next);
5589                 flow_list_destroy(dev, &priv->flows, priv_fdir_flow->rix_flow);
5590                 mlx5_free(priv_fdir_flow->fdir);
5591                 mlx5_free(priv_fdir_flow);
5592         }
5593 }
5594
5595 /**
5596  * Get flow director information.
5597  *
5598  * @param dev
5599  *   Pointer to Ethernet device.
5600  * @param[out] fdir_info
5601  *   Resulting flow director information.
5602  */
5603 static void
5604 flow_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info)
5605 {
5606         struct rte_eth_fdir_masks *mask =
5607                 &dev->data->dev_conf.fdir_conf.mask;
5608
5609         fdir_info->mode = dev->data->dev_conf.fdir_conf.mode;
5610         fdir_info->guarant_spc = 0;
5611         rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
5612         fdir_info->max_flexpayload = 0;
5613         fdir_info->flow_types_mask[0] = 0;
5614         fdir_info->flex_payload_unit = 0;
5615         fdir_info->max_flex_payload_segment_num = 0;
5616         fdir_info->flex_payload_limit = 0;
5617         memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
5618 }
5619
5620 /**
5621  * Deal with flow director operations.
5622  *
5623  * @param dev
5624  *   Pointer to Ethernet device.
5625  * @param filter_op
5626  *   Operation to perform.
5627  * @param arg
5628  *   Pointer to operation-specific structure.
5629  *
5630  * @return
5631  *   0 on success, a negative errno value otherwise and rte_errno is set.
5632  */
5633 static int
5634 flow_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op,
5635                     void *arg)
5636 {
5637         enum rte_fdir_mode fdir_mode =
5638                 dev->data->dev_conf.fdir_conf.mode;
5639
5640         if (filter_op == RTE_ETH_FILTER_NOP)
5641                 return 0;
5642         if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
5643             fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
5644                 DRV_LOG(ERR, "port %u flow director mode %d not supported",
5645                         dev->data->port_id, fdir_mode);
5646                 rte_errno = EINVAL;
5647                 return -rte_errno;
5648         }
5649         switch (filter_op) {
5650         case RTE_ETH_FILTER_ADD:
5651                 return flow_fdir_filter_add(dev, arg);
5652         case RTE_ETH_FILTER_UPDATE:
5653                 return flow_fdir_filter_update(dev, arg);
5654         case RTE_ETH_FILTER_DELETE:
5655                 return flow_fdir_filter_delete(dev, arg);
5656         case RTE_ETH_FILTER_FLUSH:
5657                 flow_fdir_filter_flush(dev);
5658                 break;
5659         case RTE_ETH_FILTER_INFO:
5660                 flow_fdir_info_get(dev, arg);
5661                 break;
5662         default:
5663                 DRV_LOG(DEBUG, "port %u unknown operation %u",
5664                         dev->data->port_id, filter_op);
5665                 rte_errno = EINVAL;
5666                 return -rte_errno;
5667         }
5668         return 0;
5669 }
5670
5671 /**
5672  * Manage filter operations.
5673  *
5674  * @param dev
5675  *   Pointer to Ethernet device structure.
5676  * @param filter_type
5677  *   Filter type.
5678  * @param filter_op
5679  *   Operation to perform.
5680  * @param arg
5681  *   Pointer to operation-specific structure.
5682  *
5683  * @return
5684  *   0 on success, a negative errno value otherwise and rte_errno is set.
5685  */
5686 int
5687 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
5688                      enum rte_filter_type filter_type,
5689                      enum rte_filter_op filter_op,
5690                      void *arg)
5691 {
5692         switch (filter_type) {
5693         case RTE_ETH_FILTER_GENERIC:
5694                 if (filter_op != RTE_ETH_FILTER_GET) {
5695                         rte_errno = EINVAL;
5696                         return -rte_errno;
5697                 }
5698                 *(const void **)arg = &mlx5_flow_ops;
5699                 return 0;
5700         case RTE_ETH_FILTER_FDIR:
5701                 return flow_fdir_ctrl_func(dev, filter_op, arg);
5702         default:
5703                 DRV_LOG(ERR, "port %u filter type (%d) not supported",
5704                         dev->data->port_id, filter_type);
5705                 rte_errno = ENOTSUP;
5706                 return -rte_errno;
5707         }
5708         return 0;
5709 }
5710
5711 /**
5712  * Create the needed meter and suffix tables.
5713  *
5714  * @param[in] dev
5715  *   Pointer to Ethernet device.
5716  * @param[in] fm
5717  *   Pointer to the flow meter.
5718  *
5719  * @return
5720  *   Pointer to table set on success, NULL otherwise.
5721  */
5722 struct mlx5_meter_domains_infos *
5723 mlx5_flow_create_mtr_tbls(struct rte_eth_dev *dev,
5724                           const struct mlx5_flow_meter *fm)
5725 {
5726         const struct mlx5_flow_driver_ops *fops;
5727
5728         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
5729         return fops->create_mtr_tbls(dev, fm);
5730 }
5731
5732 /**
5733  * Destroy the meter table set.
5734  *
5735  * @param[in] dev
5736  *   Pointer to Ethernet device.
5737  * @param[in] tbl
5738  *   Pointer to the meter table set.
5739  *
5740  * @return
5741  *   0 on success.
5742  */
5743 int
5744 mlx5_flow_destroy_mtr_tbls(struct rte_eth_dev *dev,
5745                            struct mlx5_meter_domains_infos *tbls)
5746 {
5747         const struct mlx5_flow_driver_ops *fops;
5748
5749         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
5750         return fops->destroy_mtr_tbls(dev, tbls);
5751 }
5752
5753 /**
5754  * Create policer rules.
5755  *
5756  * @param[in] dev
5757  *   Pointer to Ethernet device.
5758  * @param[in] fm
5759  *   Pointer to flow meter structure.
5760  * @param[in] attr
5761  *   Pointer to flow attributes.
5762  *
5763  * @return
5764  *   0 on success, -1 otherwise.
5765  */
5766 int
5767 mlx5_flow_create_policer_rules(struct rte_eth_dev *dev,
5768                                struct mlx5_flow_meter *fm,
5769                                const struct rte_flow_attr *attr)
5770 {
5771         const struct mlx5_flow_driver_ops *fops;
5772
5773         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
5774         return fops->create_policer_rules(dev, fm, attr);
5775 }
5776
5777 /**
5778  * Destroy policer rules.
5779  *
5780  * @param[in] fm
5781  *   Pointer to flow meter structure.
5782  * @param[in] attr
5783  *   Pointer to flow attributes.
5784  *
5785  * @return
5786  *   0 on success, -1 otherwise.
5787  */
5788 int
5789 mlx5_flow_destroy_policer_rules(struct rte_eth_dev *dev,
5790                                 struct mlx5_flow_meter *fm,
5791                                 const struct rte_flow_attr *attr)
5792 {
5793         const struct mlx5_flow_driver_ops *fops;
5794
5795         fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
5796         return fops->destroy_policer_rules(dev, fm, attr);
5797 }
5798
5799 /**
5800  * Allocate a counter.
5801  *
5802  * @param[in] dev
5803  *   Pointer to Ethernet device structure.
5804  *
5805  * @return
5806  *   Index to allocated counter  on success, 0 otherwise.
5807  */
5808 uint32_t
5809 mlx5_counter_alloc(struct rte_eth_dev *dev)
5810 {
5811         const struct mlx5_flow_driver_ops *fops;
5812         struct rte_flow_attr attr = { .transfer = 0 };
5813
5814         if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
5815                 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
5816                 return fops->counter_alloc(dev);
5817         }
5818         DRV_LOG(ERR,
5819                 "port %u counter allocate is not supported.",
5820                  dev->data->port_id);
5821         return 0;
5822 }
5823
5824 /**
5825  * Free a counter.
5826  *
5827  * @param[in] dev
5828  *   Pointer to Ethernet device structure.
5829  * @param[in] cnt
5830  *   Index to counter to be free.
5831  */
5832 void
5833 mlx5_counter_free(struct rte_eth_dev *dev, uint32_t cnt)
5834 {
5835         const struct mlx5_flow_driver_ops *fops;
5836         struct rte_flow_attr attr = { .transfer = 0 };
5837
5838         if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
5839                 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
5840                 fops->counter_free(dev, cnt);
5841                 return;
5842         }
5843         DRV_LOG(ERR,
5844                 "port %u counter free is not supported.",
5845                  dev->data->port_id);
5846 }
5847
5848 /**
5849  * Query counter statistics.
5850  *
5851  * @param[in] dev
5852  *   Pointer to Ethernet device structure.
5853  * @param[in] cnt
5854  *   Index to counter to query.
5855  * @param[in] clear
5856  *   Set to clear counter statistics.
5857  * @param[out] pkts
5858  *   The counter hits packets number to save.
5859  * @param[out] bytes
5860  *   The counter hits bytes number to save.
5861  *
5862  * @return
5863  *   0 on success, a negative errno value otherwise.
5864  */
5865 int
5866 mlx5_counter_query(struct rte_eth_dev *dev, uint32_t cnt,
5867                    bool clear, uint64_t *pkts, uint64_t *bytes)
5868 {
5869         const struct mlx5_flow_driver_ops *fops;
5870         struct rte_flow_attr attr = { .transfer = 0 };
5871
5872         if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
5873                 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
5874                 return fops->counter_query(dev, cnt, clear, pkts, bytes);
5875         }
5876         DRV_LOG(ERR,
5877                 "port %u counter query is not supported.",
5878                  dev->data->port_id);
5879         return -ENOTSUP;
5880 }
5881
5882 #define MLX5_POOL_QUERY_FREQ_US 1000000
5883
5884 /**
5885  * Get number of all validate pools.
5886  *
5887  * @param[in] sh
5888  *   Pointer to mlx5_dev_ctx_shared object.
5889  *
5890  * @return
5891  *   The number of all validate pools.
5892  */
5893 static uint32_t
5894 mlx5_get_all_valid_pool_count(struct mlx5_dev_ctx_shared *sh)
5895 {
5896         int i;
5897         uint32_t pools_n = 0;
5898
5899         for (i = 0; i < MLX5_CCONT_TYPE_MAX; ++i)
5900                 pools_n += rte_atomic16_read(&sh->cmng.ccont[i].n_valid);
5901         return pools_n;
5902 }
5903
5904 /**
5905  * Set the periodic procedure for triggering asynchronous batch queries for all
5906  * the counter pools.
5907  *
5908  * @param[in] sh
5909  *   Pointer to mlx5_dev_ctx_shared object.
5910  */
5911 void
5912 mlx5_set_query_alarm(struct mlx5_dev_ctx_shared *sh)
5913 {
5914         uint32_t pools_n, us;
5915
5916         pools_n = mlx5_get_all_valid_pool_count(sh);
5917         us = MLX5_POOL_QUERY_FREQ_US / pools_n;
5918         DRV_LOG(DEBUG, "Set alarm for %u pools each %u us", pools_n, us);
5919         if (rte_eal_alarm_set(us, mlx5_flow_query_alarm, sh)) {
5920                 sh->cmng.query_thread_on = 0;
5921                 DRV_LOG(ERR, "Cannot reinitialize query alarm");
5922         } else {
5923                 sh->cmng.query_thread_on = 1;
5924         }
5925 }
5926
5927 /**
5928  * The periodic procedure for triggering asynchronous batch queries for all the
5929  * counter pools. This function is probably called by the host thread.
5930  *
5931  * @param[in] arg
5932  *   The parameter for the alarm process.
5933  */
5934 void
5935 mlx5_flow_query_alarm(void *arg)
5936 {
5937         struct mlx5_dev_ctx_shared *sh = arg;
5938         struct mlx5_devx_obj *dcs;
5939         uint16_t offset;
5940         int ret;
5941         uint8_t batch = sh->cmng.batch;
5942         uint8_t age = sh->cmng.age;
5943         uint16_t pool_index = sh->cmng.pool_index;
5944         struct mlx5_pools_container *cont;
5945         struct mlx5_flow_counter_pool *pool;
5946         int cont_loop = MLX5_CCONT_TYPE_MAX;
5947
5948         if (sh->cmng.pending_queries >= MLX5_MAX_PENDING_QUERIES)
5949                 goto set_alarm;
5950 next_container:
5951         cont = MLX5_CNT_CONTAINER(sh, batch, age);
5952         rte_spinlock_lock(&cont->resize_sl);
5953         if (!cont->pools) {
5954                 rte_spinlock_unlock(&cont->resize_sl);
5955                 /* Check if all the containers are empty. */
5956                 if (unlikely(--cont_loop == 0))
5957                         goto set_alarm;
5958                 batch ^= 0x1;
5959                 pool_index = 0;
5960                 if (batch == 0 && pool_index == 0) {
5961                         age ^= 0x1;
5962                         sh->cmng.batch = batch;
5963                         sh->cmng.age = age;
5964                 }
5965                 goto next_container;
5966         }
5967         pool = cont->pools[pool_index];
5968         rte_spinlock_unlock(&cont->resize_sl);
5969         if (pool->raw_hw)
5970                 /* There is a pool query in progress. */
5971                 goto set_alarm;
5972         pool->raw_hw =
5973                 LIST_FIRST(&sh->cmng.free_stat_raws);
5974         if (!pool->raw_hw)
5975                 /* No free counter statistics raw memory. */
5976                 goto set_alarm;
5977         dcs = (struct mlx5_devx_obj *)(uintptr_t)rte_atomic64_read
5978                                                               (&pool->a64_dcs);
5979         offset = batch ? 0 : dcs->id % MLX5_COUNTERS_PER_POOL;
5980         /*
5981          * Identify the counters released between query trigger and query
5982          * handle more effiecntly. The counter released in this gap period
5983          * should wait for a new round of query as the new arrived packets
5984          * will not be taken into account.
5985          */
5986         pool->query_gen++;
5987         ret = mlx5_devx_cmd_flow_counter_query(dcs, 0, MLX5_COUNTERS_PER_POOL -
5988                                                offset, NULL, NULL,
5989                                                pool->raw_hw->mem_mng->dm->id,
5990                                                (void *)(uintptr_t)
5991                                                (pool->raw_hw->data + offset),
5992                                                sh->devx_comp,
5993                                                (uint64_t)(uintptr_t)pool);
5994         if (ret) {
5995                 DRV_LOG(ERR, "Failed to trigger asynchronous query for dcs ID"
5996                         " %d", pool->min_dcs->id);
5997                 pool->raw_hw = NULL;
5998                 goto set_alarm;
5999         }
6000         pool->raw_hw->min_dcs_id = dcs->id;
6001         LIST_REMOVE(pool->raw_hw, next);
6002         sh->cmng.pending_queries++;
6003         pool_index++;
6004         if (pool_index >= rte_atomic16_read(&cont->n_valid)) {
6005                 batch ^= 0x1;
6006                 pool_index = 0;
6007                 if (batch == 0 && pool_index == 0)
6008                         age ^= 0x1;
6009         }
6010 set_alarm:
6011         sh->cmng.batch = batch;
6012         sh->cmng.pool_index = pool_index;
6013         sh->cmng.age = age;
6014         mlx5_set_query_alarm(sh);
6015 }
6016
6017 /**
6018  * Check and callback event for new aged flow in the counter pool
6019  *
6020  * @param[in] sh
6021  *   Pointer to mlx5_dev_ctx_shared object.
6022  * @param[in] pool
6023  *   Pointer to Current counter pool.
6024  */
6025 static void
6026 mlx5_flow_aging_check(struct mlx5_dev_ctx_shared *sh,
6027                    struct mlx5_flow_counter_pool *pool)
6028 {
6029         struct mlx5_priv *priv;
6030         struct mlx5_flow_counter *cnt;
6031         struct mlx5_age_info *age_info;
6032         struct mlx5_age_param *age_param;
6033         struct mlx5_counter_stats_raw *cur = pool->raw_hw;
6034         struct mlx5_counter_stats_raw *prev = pool->raw;
6035         uint16_t curr = rte_rdtsc() / (rte_get_tsc_hz() / 10);
6036         uint32_t i;
6037
6038         for (i = 0; i < MLX5_COUNTERS_PER_POOL; ++i) {
6039                 cnt = MLX5_POOL_GET_CNT(pool, i);
6040                 age_param = MLX5_CNT_TO_AGE(cnt);
6041                 if (rte_atomic16_read(&age_param->state) != AGE_CANDIDATE)
6042                         continue;
6043                 if (cur->data[i].hits != prev->data[i].hits) {
6044                         age_param->expire = curr + age_param->timeout;
6045                         continue;
6046                 }
6047                 if ((uint16_t)(curr - age_param->expire) >= (UINT16_MAX / 2))
6048                         continue;
6049                 /**
6050                  * Hold the lock first, or if between the
6051                  * state AGE_TMOUT and tailq operation the
6052                  * release happened, the release procedure
6053                  * may delete a non-existent tailq node.
6054                  */
6055                 priv = rte_eth_devices[age_param->port_id].data->dev_private;
6056                 age_info = GET_PORT_AGE_INFO(priv);
6057                 rte_spinlock_lock(&age_info->aged_sl);
6058                 /* If the cpmset fails, release happens. */
6059                 if (rte_atomic16_cmpset((volatile uint16_t *)
6060                                         &age_param->state,
6061                                         AGE_CANDIDATE,
6062                                         AGE_TMOUT) ==
6063                                         AGE_CANDIDATE) {
6064                         TAILQ_INSERT_TAIL(&age_info->aged_counters, cnt, next);
6065                         MLX5_AGE_SET(age_info, MLX5_AGE_EVENT_NEW);
6066                 }
6067                 rte_spinlock_unlock(&age_info->aged_sl);
6068         }
6069         for (i = 0; i < sh->max_port; i++) {
6070                 age_info = &sh->port[i].age_info;
6071                 if (!MLX5_AGE_GET(age_info, MLX5_AGE_EVENT_NEW))
6072                         continue;
6073                 if (MLX5_AGE_GET(age_info, MLX5_AGE_TRIGGER))
6074                         _rte_eth_dev_callback_process
6075                                 (&rte_eth_devices[sh->port[i].devx_ih_port_id],
6076                                 RTE_ETH_EVENT_FLOW_AGED, NULL);
6077                 age_info->flags = 0;
6078         }
6079 }
6080
6081 /**
6082  * Handler for the HW respond about ready values from an asynchronous batch
6083  * query. This function is probably called by the host thread.
6084  *
6085  * @param[in] sh
6086  *   The pointer to the shared device context.
6087  * @param[in] async_id
6088  *   The Devx async ID.
6089  * @param[in] status
6090  *   The status of the completion.
6091  */
6092 void
6093 mlx5_flow_async_pool_query_handle(struct mlx5_dev_ctx_shared *sh,
6094                                   uint64_t async_id, int status)
6095 {
6096         struct mlx5_flow_counter_pool *pool =
6097                 (struct mlx5_flow_counter_pool *)(uintptr_t)async_id;
6098         struct mlx5_counter_stats_raw *raw_to_free;
6099         uint8_t age = !!IS_AGE_POOL(pool);
6100         uint8_t query_gen = pool->query_gen ^ 1;
6101         struct mlx5_pools_container *cont =
6102                 MLX5_CNT_CONTAINER(sh, !IS_EXT_POOL(pool), age);
6103
6104         if (unlikely(status)) {
6105                 raw_to_free = pool->raw_hw;
6106         } else {
6107                 raw_to_free = pool->raw;
6108                 if (IS_AGE_POOL(pool))
6109                         mlx5_flow_aging_check(sh, pool);
6110                 rte_spinlock_lock(&pool->sl);
6111                 pool->raw = pool->raw_hw;
6112                 rte_spinlock_unlock(&pool->sl);
6113                 /* Be sure the new raw counters data is updated in memory. */
6114                 rte_cio_wmb();
6115                 if (!TAILQ_EMPTY(&pool->counters[query_gen])) {
6116                         rte_spinlock_lock(&cont->csl);
6117                         TAILQ_CONCAT(&cont->counters,
6118                                      &pool->counters[query_gen], next);
6119                         rte_spinlock_unlock(&cont->csl);
6120                 }
6121         }
6122         LIST_INSERT_HEAD(&sh->cmng.free_stat_raws, raw_to_free, next);
6123         pool->raw_hw = NULL;
6124         sh->cmng.pending_queries--;
6125 }
6126
6127 /**
6128  * Translate the rte_flow group index to HW table value.
6129  *
6130  * @param[in] attributes
6131  *   Pointer to flow attributes
6132  * @param[in] external
6133  *   Value is part of flow rule created by request external to PMD.
6134  * @param[in] group
6135  *   rte_flow group index value.
6136  * @param[out] fdb_def_rule
6137  *   Whether fdb jump to table 1 is configured.
6138  * @param[out] table
6139  *   HW table value.
6140  * @param[out] error
6141  *   Pointer to error structure.
6142  *
6143  * @return
6144  *   0 on success, a negative errno value otherwise and rte_errno is set.
6145  */
6146 int
6147 mlx5_flow_group_to_table(const struct rte_flow_attr *attributes, bool external,
6148                          uint32_t group, bool fdb_def_rule, uint32_t *table,
6149                          struct rte_flow_error *error)
6150 {
6151         if (attributes->transfer && external && fdb_def_rule) {
6152                 if (group == UINT32_MAX)
6153                         return rte_flow_error_set
6154                                                 (error, EINVAL,
6155                                                  RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
6156                                                  NULL,
6157                                                  "group index not supported");
6158                 *table = group + 1;
6159         } else {
6160                 *table = group;
6161         }
6162         return 0;
6163 }
6164
6165 /**
6166  * Discover availability of metadata reg_c's.
6167  *
6168  * Iteratively use test flows to check availability.
6169  *
6170  * @param[in] dev
6171  *   Pointer to the Ethernet device structure.
6172  *
6173  * @return
6174  *   0 on success, a negative errno value otherwise and rte_errno is set.
6175  */
6176 int
6177 mlx5_flow_discover_mreg_c(struct rte_eth_dev *dev)
6178 {
6179         struct mlx5_priv *priv = dev->data->dev_private;
6180         struct mlx5_dev_config *config = &priv->config;
6181         enum modify_reg idx;
6182         int n = 0;
6183
6184         /* reg_c[0] and reg_c[1] are reserved. */
6185         config->flow_mreg_c[n++] = REG_C_0;
6186         config->flow_mreg_c[n++] = REG_C_1;
6187         /* Discover availability of other reg_c's. */
6188         for (idx = REG_C_2; idx <= REG_C_7; ++idx) {
6189                 struct rte_flow_attr attr = {
6190                         .group = MLX5_FLOW_MREG_CP_TABLE_GROUP,
6191                         .priority = MLX5_FLOW_PRIO_RSVD,
6192                         .ingress = 1,
6193                 };
6194                 struct rte_flow_item items[] = {
6195                         [0] = {
6196                                 .type = RTE_FLOW_ITEM_TYPE_END,
6197                         },
6198                 };
6199                 struct rte_flow_action actions[] = {
6200                         [0] = {
6201                                 .type = (enum rte_flow_action_type)
6202                                         MLX5_RTE_FLOW_ACTION_TYPE_COPY_MREG,
6203                                 .conf = &(struct mlx5_flow_action_copy_mreg){
6204                                         .src = REG_C_1,
6205                                         .dst = idx,
6206                                 },
6207                         },
6208                         [1] = {
6209                                 .type = RTE_FLOW_ACTION_TYPE_JUMP,
6210                                 .conf = &(struct rte_flow_action_jump){
6211                                         .group = MLX5_FLOW_MREG_ACT_TABLE_GROUP,
6212                                 },
6213                         },
6214                         [2] = {
6215                                 .type = RTE_FLOW_ACTION_TYPE_END,
6216                         },
6217                 };
6218                 uint32_t flow_idx;
6219                 struct rte_flow *flow;
6220                 struct rte_flow_error error;
6221
6222                 if (!config->dv_flow_en)
6223                         break;
6224                 /* Create internal flow, validation skips copy action. */
6225                 flow_idx = flow_list_create(dev, NULL, &attr, items,
6226                                             actions, false, &error);
6227                 flow = mlx5_ipool_get(priv->sh->ipool[MLX5_IPOOL_RTE_FLOW],
6228                                       flow_idx);
6229                 if (!flow)
6230                         continue;
6231                 if (dev->data->dev_started || !flow_drv_apply(dev, flow, NULL))
6232                         config->flow_mreg_c[n++] = idx;
6233                 flow_list_destroy(dev, NULL, flow_idx);
6234         }
6235         for (; n < MLX5_MREG_C_NUM; ++n)
6236                 config->flow_mreg_c[n] = REG_NONE;
6237         return 0;
6238 }
6239
6240 /**
6241  * Dump flow raw hw data to file
6242  *
6243  * @param[in] dev
6244  *    The pointer to Ethernet device.
6245  * @param[in] file
6246  *   A pointer to a file for output.
6247  * @param[out] error
6248  *   Perform verbose error reporting if not NULL. PMDs initialize this
6249  *   structure in case of error only.
6250  * @return
6251  *   0 on success, a nagative value otherwise.
6252  */
6253 int
6254 mlx5_flow_dev_dump(struct rte_eth_dev *dev,
6255                    FILE *file,
6256                    struct rte_flow_error *error __rte_unused)
6257 {
6258         struct mlx5_priv *priv = dev->data->dev_private;
6259         struct mlx5_dev_ctx_shared *sh = priv->sh;
6260
6261         return mlx5_devx_cmd_flow_dump(sh->fdb_domain, sh->rx_domain,
6262                                        sh->tx_domain, file);
6263 }
6264
6265 /**
6266  * Get aged-out flows.
6267  *
6268  * @param[in] dev
6269  *   Pointer to the Ethernet device structure.
6270  * @param[in] context
6271  *   The address of an array of pointers to the aged-out flows contexts.
6272  * @param[in] nb_countexts
6273  *   The length of context array pointers.
6274  * @param[out] error
6275  *   Perform verbose error reporting if not NULL. Initialized in case of
6276  *   error only.
6277  *
6278  * @return
6279  *   how many contexts get in success, otherwise negative errno value.
6280  *   if nb_contexts is 0, return the amount of all aged contexts.
6281  *   if nb_contexts is not 0 , return the amount of aged flows reported
6282  *   in the context array.
6283  */
6284 int
6285 mlx5_flow_get_aged_flows(struct rte_eth_dev *dev, void **contexts,
6286                         uint32_t nb_contexts, struct rte_flow_error *error)
6287 {
6288         const struct mlx5_flow_driver_ops *fops;
6289         struct rte_flow_attr attr = { .transfer = 0 };
6290
6291         if (flow_get_drv_type(dev, &attr) == MLX5_FLOW_TYPE_DV) {
6292                 fops = flow_get_drv_ops(MLX5_FLOW_TYPE_DV);
6293                 return fops->get_aged_flows(dev, contexts, nb_contexts,
6294                                                     error);
6295         }
6296         DRV_LOG(ERR,
6297                 "port %u get aged flows is not supported.",
6298                  dev->data->port_id);
6299         return -ENOTSUP;
6300 }