e90301ccde3323482e5f1a2dc522feab93832573
[dpdk.git] / drivers / net / mlx5 / mlx5_flow.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2016 6WIND S.A.
3  * Copyright 2016 Mellanox Technologies, Ltd
4  */
5
6 #include <netinet/in.h>
7 #include <sys/queue.h>
8 #include <stdalign.h>
9 #include <stdint.h>
10 #include <string.h>
11
12 /* Verbs header. */
13 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
14 #ifdef PEDANTIC
15 #pragma GCC diagnostic ignored "-Wpedantic"
16 #endif
17 #include <infiniband/verbs.h>
18 #ifdef PEDANTIC
19 #pragma GCC diagnostic error "-Wpedantic"
20 #endif
21
22 #include <rte_common.h>
23 #include <rte_ether.h>
24 #include <rte_ethdev_driver.h>
25 #include <rte_flow.h>
26 #include <rte_flow_driver.h>
27 #include <rte_malloc.h>
28 #include <rte_ip.h>
29
30 #include "mlx5.h"
31 #include "mlx5_defs.h"
32 #include "mlx5_flow.h"
33 #include "mlx5_glue.h"
34 #include "mlx5_prm.h"
35 #include "mlx5_rxtx.h"
36
37 /* Dev ops structure defined in mlx5.c */
38 extern const struct eth_dev_ops mlx5_dev_ops;
39 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
40
41 /** Device flow drivers. */
42 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
43 extern const struct mlx5_flow_driver_ops mlx5_flow_dv_drv_ops;
44 #endif
45 extern const struct mlx5_flow_driver_ops mlx5_flow_verbs_drv_ops;
46
47 const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops;
48
49 const struct mlx5_flow_driver_ops *flow_drv_ops[] = {
50         [MLX5_FLOW_TYPE_MIN] = &mlx5_flow_null_drv_ops,
51 #ifdef HAVE_IBV_FLOW_DV_SUPPORT
52         [MLX5_FLOW_TYPE_DV] = &mlx5_flow_dv_drv_ops,
53 #endif
54         [MLX5_FLOW_TYPE_VERBS] = &mlx5_flow_verbs_drv_ops,
55         [MLX5_FLOW_TYPE_MAX] = &mlx5_flow_null_drv_ops
56 };
57
58 enum mlx5_expansion {
59         MLX5_EXPANSION_ROOT,
60         MLX5_EXPANSION_ROOT_OUTER,
61         MLX5_EXPANSION_ROOT_ETH_VLAN,
62         MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN,
63         MLX5_EXPANSION_OUTER_ETH,
64         MLX5_EXPANSION_OUTER_ETH_VLAN,
65         MLX5_EXPANSION_OUTER_VLAN,
66         MLX5_EXPANSION_OUTER_IPV4,
67         MLX5_EXPANSION_OUTER_IPV4_UDP,
68         MLX5_EXPANSION_OUTER_IPV4_TCP,
69         MLX5_EXPANSION_OUTER_IPV6,
70         MLX5_EXPANSION_OUTER_IPV6_UDP,
71         MLX5_EXPANSION_OUTER_IPV6_TCP,
72         MLX5_EXPANSION_VXLAN,
73         MLX5_EXPANSION_VXLAN_GPE,
74         MLX5_EXPANSION_GRE,
75         MLX5_EXPANSION_MPLS,
76         MLX5_EXPANSION_ETH,
77         MLX5_EXPANSION_ETH_VLAN,
78         MLX5_EXPANSION_VLAN,
79         MLX5_EXPANSION_IPV4,
80         MLX5_EXPANSION_IPV4_UDP,
81         MLX5_EXPANSION_IPV4_TCP,
82         MLX5_EXPANSION_IPV6,
83         MLX5_EXPANSION_IPV6_UDP,
84         MLX5_EXPANSION_IPV6_TCP,
85 };
86
87 /** Supported expansion of items. */
88 static const struct rte_flow_expand_node mlx5_support_expansion[] = {
89         [MLX5_EXPANSION_ROOT] = {
90                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
91                                                  MLX5_EXPANSION_IPV4,
92                                                  MLX5_EXPANSION_IPV6),
93                 .type = RTE_FLOW_ITEM_TYPE_END,
94         },
95         [MLX5_EXPANSION_ROOT_OUTER] = {
96                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_ETH,
97                                                  MLX5_EXPANSION_OUTER_IPV4,
98                                                  MLX5_EXPANSION_OUTER_IPV6),
99                 .type = RTE_FLOW_ITEM_TYPE_END,
100         },
101         [MLX5_EXPANSION_ROOT_ETH_VLAN] = {
102                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH_VLAN),
103                 .type = RTE_FLOW_ITEM_TYPE_END,
104         },
105         [MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN] = {
106                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_ETH_VLAN),
107                 .type = RTE_FLOW_ITEM_TYPE_END,
108         },
109         [MLX5_EXPANSION_OUTER_ETH] = {
110                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_IPV4,
111                                                  MLX5_EXPANSION_OUTER_IPV6,
112                                                  MLX5_EXPANSION_MPLS),
113                 .type = RTE_FLOW_ITEM_TYPE_ETH,
114                 .rss_types = 0,
115         },
116         [MLX5_EXPANSION_OUTER_ETH_VLAN] = {
117                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_VLAN),
118                 .type = RTE_FLOW_ITEM_TYPE_ETH,
119                 .rss_types = 0,
120         },
121         [MLX5_EXPANSION_OUTER_VLAN] = {
122                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_IPV4,
123                                                  MLX5_EXPANSION_OUTER_IPV6),
124                 .type = RTE_FLOW_ITEM_TYPE_VLAN,
125         },
126         [MLX5_EXPANSION_OUTER_IPV4] = {
127                 .next = RTE_FLOW_EXPAND_RSS_NEXT
128                         (MLX5_EXPANSION_OUTER_IPV4_UDP,
129                          MLX5_EXPANSION_OUTER_IPV4_TCP,
130                          MLX5_EXPANSION_GRE,
131                          MLX5_EXPANSION_IPV4,
132                          MLX5_EXPANSION_IPV6),
133                 .type = RTE_FLOW_ITEM_TYPE_IPV4,
134                 .rss_types = ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
135                         ETH_RSS_NONFRAG_IPV4_OTHER,
136         },
137         [MLX5_EXPANSION_OUTER_IPV4_UDP] = {
138                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN,
139                                                  MLX5_EXPANSION_VXLAN_GPE),
140                 .type = RTE_FLOW_ITEM_TYPE_UDP,
141                 .rss_types = ETH_RSS_NONFRAG_IPV4_UDP,
142         },
143         [MLX5_EXPANSION_OUTER_IPV4_TCP] = {
144                 .type = RTE_FLOW_ITEM_TYPE_TCP,
145                 .rss_types = ETH_RSS_NONFRAG_IPV4_TCP,
146         },
147         [MLX5_EXPANSION_OUTER_IPV6] = {
148                 .next = RTE_FLOW_EXPAND_RSS_NEXT
149                         (MLX5_EXPANSION_OUTER_IPV6_UDP,
150                          MLX5_EXPANSION_OUTER_IPV6_TCP,
151                          MLX5_EXPANSION_IPV4,
152                          MLX5_EXPANSION_IPV6),
153                 .type = RTE_FLOW_ITEM_TYPE_IPV6,
154                 .rss_types = ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 |
155                         ETH_RSS_NONFRAG_IPV6_OTHER,
156         },
157         [MLX5_EXPANSION_OUTER_IPV6_UDP] = {
158                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN,
159                                                  MLX5_EXPANSION_VXLAN_GPE),
160                 .type = RTE_FLOW_ITEM_TYPE_UDP,
161                 .rss_types = ETH_RSS_NONFRAG_IPV6_UDP,
162         },
163         [MLX5_EXPANSION_OUTER_IPV6_TCP] = {
164                 .type = RTE_FLOW_ITEM_TYPE_TCP,
165                 .rss_types = ETH_RSS_NONFRAG_IPV6_TCP,
166         },
167         [MLX5_EXPANSION_VXLAN] = {
168                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH),
169                 .type = RTE_FLOW_ITEM_TYPE_VXLAN,
170         },
171         [MLX5_EXPANSION_VXLAN_GPE] = {
172                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
173                                                  MLX5_EXPANSION_IPV4,
174                                                  MLX5_EXPANSION_IPV6),
175                 .type = RTE_FLOW_ITEM_TYPE_VXLAN_GPE,
176         },
177         [MLX5_EXPANSION_GRE] = {
178                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4),
179                 .type = RTE_FLOW_ITEM_TYPE_GRE,
180         },
181         [MLX5_EXPANSION_MPLS] = {
182                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
183                                                  MLX5_EXPANSION_IPV6),
184                 .type = RTE_FLOW_ITEM_TYPE_MPLS,
185         },
186         [MLX5_EXPANSION_ETH] = {
187                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
188                                                  MLX5_EXPANSION_IPV6),
189                 .type = RTE_FLOW_ITEM_TYPE_ETH,
190         },
191         [MLX5_EXPANSION_ETH_VLAN] = {
192                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VLAN),
193                 .type = RTE_FLOW_ITEM_TYPE_ETH,
194         },
195         [MLX5_EXPANSION_VLAN] = {
196                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
197                                                  MLX5_EXPANSION_IPV6),
198                 .type = RTE_FLOW_ITEM_TYPE_VLAN,
199         },
200         [MLX5_EXPANSION_IPV4] = {
201                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4_UDP,
202                                                  MLX5_EXPANSION_IPV4_TCP),
203                 .type = RTE_FLOW_ITEM_TYPE_IPV4,
204                 .rss_types = ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
205                         ETH_RSS_NONFRAG_IPV4_OTHER,
206         },
207         [MLX5_EXPANSION_IPV4_UDP] = {
208                 .type = RTE_FLOW_ITEM_TYPE_UDP,
209                 .rss_types = ETH_RSS_NONFRAG_IPV4_UDP,
210         },
211         [MLX5_EXPANSION_IPV4_TCP] = {
212                 .type = RTE_FLOW_ITEM_TYPE_TCP,
213                 .rss_types = ETH_RSS_NONFRAG_IPV4_TCP,
214         },
215         [MLX5_EXPANSION_IPV6] = {
216                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV6_UDP,
217                                                  MLX5_EXPANSION_IPV6_TCP),
218                 .type = RTE_FLOW_ITEM_TYPE_IPV6,
219                 .rss_types = ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 |
220                         ETH_RSS_NONFRAG_IPV6_OTHER,
221         },
222         [MLX5_EXPANSION_IPV6_UDP] = {
223                 .type = RTE_FLOW_ITEM_TYPE_UDP,
224                 .rss_types = ETH_RSS_NONFRAG_IPV6_UDP,
225         },
226         [MLX5_EXPANSION_IPV6_TCP] = {
227                 .type = RTE_FLOW_ITEM_TYPE_TCP,
228                 .rss_types = ETH_RSS_NONFRAG_IPV6_TCP,
229         },
230 };
231
232 static const struct rte_flow_ops mlx5_flow_ops = {
233         .validate = mlx5_flow_validate,
234         .create = mlx5_flow_create,
235         .destroy = mlx5_flow_destroy,
236         .flush = mlx5_flow_flush,
237         .isolate = mlx5_flow_isolate,
238         .query = mlx5_flow_query,
239 };
240
241 /* Convert FDIR request to Generic flow. */
242 struct mlx5_fdir {
243         struct rte_flow_attr attr;
244         struct rte_flow_item items[4];
245         struct rte_flow_item_eth l2;
246         struct rte_flow_item_eth l2_mask;
247         union {
248                 struct rte_flow_item_ipv4 ipv4;
249                 struct rte_flow_item_ipv6 ipv6;
250         } l3;
251         union {
252                 struct rte_flow_item_ipv4 ipv4;
253                 struct rte_flow_item_ipv6 ipv6;
254         } l3_mask;
255         union {
256                 struct rte_flow_item_udp udp;
257                 struct rte_flow_item_tcp tcp;
258         } l4;
259         union {
260                 struct rte_flow_item_udp udp;
261                 struct rte_flow_item_tcp tcp;
262         } l4_mask;
263         struct rte_flow_action actions[2];
264         struct rte_flow_action_queue queue;
265 };
266
267 /* Map of Verbs to Flow priority with 8 Verbs priorities. */
268 static const uint32_t priority_map_3[][MLX5_PRIORITY_MAP_MAX] = {
269         { 0, 1, 2 }, { 2, 3, 4 }, { 5, 6, 7 },
270 };
271
272 /* Map of Verbs to Flow priority with 16 Verbs priorities. */
273 static const uint32_t priority_map_5[][MLX5_PRIORITY_MAP_MAX] = {
274         { 0, 1, 2 }, { 3, 4, 5 }, { 6, 7, 8 },
275         { 9, 10, 11 }, { 12, 13, 14 },
276 };
277
278 /* Tunnel information. */
279 struct mlx5_flow_tunnel_info {
280         uint64_t tunnel; /**< Tunnel bit (see MLX5_FLOW_*). */
281         uint32_t ptype; /**< Tunnel Ptype (see RTE_PTYPE_*). */
282 };
283
284 static struct mlx5_flow_tunnel_info tunnels_info[] = {
285         {
286                 .tunnel = MLX5_FLOW_LAYER_VXLAN,
287                 .ptype = RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L4_UDP,
288         },
289         {
290                 .tunnel = MLX5_FLOW_LAYER_VXLAN_GPE,
291                 .ptype = RTE_PTYPE_TUNNEL_VXLAN_GPE | RTE_PTYPE_L4_UDP,
292         },
293         {
294                 .tunnel = MLX5_FLOW_LAYER_GRE,
295                 .ptype = RTE_PTYPE_TUNNEL_GRE,
296         },
297         {
298                 .tunnel = MLX5_FLOW_LAYER_MPLS | MLX5_FLOW_LAYER_OUTER_L4_UDP,
299                 .ptype = RTE_PTYPE_TUNNEL_MPLS_IN_UDP | RTE_PTYPE_L4_UDP,
300         },
301         {
302                 .tunnel = MLX5_FLOW_LAYER_MPLS,
303                 .ptype = RTE_PTYPE_TUNNEL_MPLS_IN_GRE,
304         },
305         {
306                 .tunnel = MLX5_FLOW_LAYER_NVGRE,
307                 .ptype = RTE_PTYPE_TUNNEL_NVGRE,
308         },
309         {
310                 .tunnel = MLX5_FLOW_LAYER_IPIP,
311                 .ptype = RTE_PTYPE_TUNNEL_IP,
312         },
313         {
314                 .tunnel = MLX5_FLOW_LAYER_IPV6_ENCAP,
315                 .ptype = RTE_PTYPE_TUNNEL_IP,
316         },
317 };
318
319 enum mlx5_feature_name {
320         MLX5_HAIRPIN_RX,
321         MLX5_HAIRPIN_TX,
322         MLX5_APPLICATION,
323 };
324
325 /**
326  * Translate tag ID to register.
327  *
328  * @param[in] dev
329  *   Pointer to the Ethernet device structure.
330  * @param[in] feature
331  *   The feature that request the register.
332  * @param[in] id
333  *   The request register ID.
334  * @param[out] error
335  *   Error description in case of any.
336  *
337  * @return
338  *   The request register on success, a negative errno
339  *   value otherwise and rte_errno is set.
340  */
341 __rte_unused
342 static enum modify_reg flow_get_reg_id(struct rte_eth_dev *dev,
343                                        enum mlx5_feature_name feature,
344                                        uint32_t id,
345                                        struct rte_flow_error *error)
346 {
347         static enum modify_reg id2reg[] = {
348                 [0] = REG_A,
349                 [1] = REG_C_2,
350                 [2] = REG_C_3,
351                 [3] = REG_C_4,
352                 [4] = REG_B,};
353
354         dev = (void *)dev;
355         switch (feature) {
356         case MLX5_HAIRPIN_RX:
357                 return REG_B;
358         case MLX5_HAIRPIN_TX:
359                 return REG_A;
360         case MLX5_APPLICATION:
361                 if (id > 4)
362                         return rte_flow_error_set(error, EINVAL,
363                                                   RTE_FLOW_ERROR_TYPE_ITEM,
364                                                   NULL, "invalid tag id");
365                 return id2reg[id];
366         }
367         return rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM,
368                                   NULL, "invalid feature name");
369 }
370
371 /**
372  * Discover the maximum number of priority available.
373  *
374  * @param[in] dev
375  *   Pointer to the Ethernet device structure.
376  *
377  * @return
378  *   number of supported flow priority on success, a negative errno
379  *   value otherwise and rte_errno is set.
380  */
381 int
382 mlx5_flow_discover_priorities(struct rte_eth_dev *dev)
383 {
384         struct mlx5_priv *priv = dev->data->dev_private;
385         struct {
386                 struct ibv_flow_attr attr;
387                 struct ibv_flow_spec_eth eth;
388                 struct ibv_flow_spec_action_drop drop;
389         } flow_attr = {
390                 .attr = {
391                         .num_of_specs = 2,
392                         .port = (uint8_t)priv->ibv_port,
393                 },
394                 .eth = {
395                         .type = IBV_FLOW_SPEC_ETH,
396                         .size = sizeof(struct ibv_flow_spec_eth),
397                 },
398                 .drop = {
399                         .size = sizeof(struct ibv_flow_spec_action_drop),
400                         .type = IBV_FLOW_SPEC_ACTION_DROP,
401                 },
402         };
403         struct ibv_flow *flow;
404         struct mlx5_hrxq *drop = mlx5_hrxq_drop_new(dev);
405         uint16_t vprio[] = { 8, 16 };
406         int i;
407         int priority = 0;
408
409         if (!drop) {
410                 rte_errno = ENOTSUP;
411                 return -rte_errno;
412         }
413         for (i = 0; i != RTE_DIM(vprio); i++) {
414                 flow_attr.attr.priority = vprio[i] - 1;
415                 flow = mlx5_glue->create_flow(drop->qp, &flow_attr.attr);
416                 if (!flow)
417                         break;
418                 claim_zero(mlx5_glue->destroy_flow(flow));
419                 priority = vprio[i];
420         }
421         mlx5_hrxq_drop_release(dev);
422         switch (priority) {
423         case 8:
424                 priority = RTE_DIM(priority_map_3);
425                 break;
426         case 16:
427                 priority = RTE_DIM(priority_map_5);
428                 break;
429         default:
430                 rte_errno = ENOTSUP;
431                 DRV_LOG(ERR,
432                         "port %u verbs maximum priority: %d expected 8/16",
433                         dev->data->port_id, priority);
434                 return -rte_errno;
435         }
436         DRV_LOG(INFO, "port %u flow maximum priority: %d",
437                 dev->data->port_id, priority);
438         return priority;
439 }
440
441 /**
442  * Adjust flow priority based on the highest layer and the request priority.
443  *
444  * @param[in] dev
445  *   Pointer to the Ethernet device structure.
446  * @param[in] priority
447  *   The rule base priority.
448  * @param[in] subpriority
449  *   The priority based on the items.
450  *
451  * @return
452  *   The new priority.
453  */
454 uint32_t mlx5_flow_adjust_priority(struct rte_eth_dev *dev, int32_t priority,
455                                    uint32_t subpriority)
456 {
457         uint32_t res = 0;
458         struct mlx5_priv *priv = dev->data->dev_private;
459
460         switch (priv->config.flow_prio) {
461         case RTE_DIM(priority_map_3):
462                 res = priority_map_3[priority][subpriority];
463                 break;
464         case RTE_DIM(priority_map_5):
465                 res = priority_map_5[priority][subpriority];
466                 break;
467         }
468         return  res;
469 }
470
471 /**
472  * Verify the @p item specifications (spec, last, mask) are compatible with the
473  * NIC capabilities.
474  *
475  * @param[in] item
476  *   Item specification.
477  * @param[in] mask
478  *   @p item->mask or flow default bit-masks.
479  * @param[in] nic_mask
480  *   Bit-masks covering supported fields by the NIC to compare with user mask.
481  * @param[in] size
482  *   Bit-masks size in bytes.
483  * @param[out] error
484  *   Pointer to error structure.
485  *
486  * @return
487  *   0 on success, a negative errno value otherwise and rte_errno is set.
488  */
489 int
490 mlx5_flow_item_acceptable(const struct rte_flow_item *item,
491                           const uint8_t *mask,
492                           const uint8_t *nic_mask,
493                           unsigned int size,
494                           struct rte_flow_error *error)
495 {
496         unsigned int i;
497
498         assert(nic_mask);
499         for (i = 0; i < size; ++i)
500                 if ((nic_mask[i] | mask[i]) != nic_mask[i])
501                         return rte_flow_error_set(error, ENOTSUP,
502                                                   RTE_FLOW_ERROR_TYPE_ITEM,
503                                                   item,
504                                                   "mask enables non supported"
505                                                   " bits");
506         if (!item->spec && (item->mask || item->last))
507                 return rte_flow_error_set(error, EINVAL,
508                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
509                                           "mask/last without a spec is not"
510                                           " supported");
511         if (item->spec && item->last) {
512                 uint8_t spec[size];
513                 uint8_t last[size];
514                 unsigned int i;
515                 int ret;
516
517                 for (i = 0; i < size; ++i) {
518                         spec[i] = ((const uint8_t *)item->spec)[i] & mask[i];
519                         last[i] = ((const uint8_t *)item->last)[i] & mask[i];
520                 }
521                 ret = memcmp(spec, last, size);
522                 if (ret != 0)
523                         return rte_flow_error_set(error, EINVAL,
524                                                   RTE_FLOW_ERROR_TYPE_ITEM,
525                                                   item,
526                                                   "range is not valid");
527         }
528         return 0;
529 }
530
531 /**
532  * Adjust the hash fields according to the @p flow information.
533  *
534  * @param[in] dev_flow.
535  *   Pointer to the mlx5_flow.
536  * @param[in] tunnel
537  *   1 when the hash field is for a tunnel item.
538  * @param[in] layer_types
539  *   ETH_RSS_* types.
540  * @param[in] hash_fields
541  *   Item hash fields.
542  *
543  * @return
544  *   The hash fields that should be used.
545  */
546 uint64_t
547 mlx5_flow_hashfields_adjust(struct mlx5_flow *dev_flow,
548                             int tunnel __rte_unused, uint64_t layer_types,
549                             uint64_t hash_fields)
550 {
551         struct rte_flow *flow = dev_flow->flow;
552 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
553         int rss_request_inner = flow->rss.level >= 2;
554
555         /* Check RSS hash level for tunnel. */
556         if (tunnel && rss_request_inner)
557                 hash_fields |= IBV_RX_HASH_INNER;
558         else if (tunnel || rss_request_inner)
559                 return 0;
560 #endif
561         /* Check if requested layer matches RSS hash fields. */
562         if (!(flow->rss.types & layer_types))
563                 return 0;
564         return hash_fields;
565 }
566
567 /**
568  * Lookup and set the ptype in the data Rx part.  A single Ptype can be used,
569  * if several tunnel rules are used on this queue, the tunnel ptype will be
570  * cleared.
571  *
572  * @param rxq_ctrl
573  *   Rx queue to update.
574  */
575 static void
576 flow_rxq_tunnel_ptype_update(struct mlx5_rxq_ctrl *rxq_ctrl)
577 {
578         unsigned int i;
579         uint32_t tunnel_ptype = 0;
580
581         /* Look up for the ptype to use. */
582         for (i = 0; i != MLX5_FLOW_TUNNEL; ++i) {
583                 if (!rxq_ctrl->flow_tunnels_n[i])
584                         continue;
585                 if (!tunnel_ptype) {
586                         tunnel_ptype = tunnels_info[i].ptype;
587                 } else {
588                         tunnel_ptype = 0;
589                         break;
590                 }
591         }
592         rxq_ctrl->rxq.tunnel = tunnel_ptype;
593 }
594
595 /**
596  * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) according to the devive
597  * flow.
598  *
599  * @param[in] dev
600  *   Pointer to the Ethernet device structure.
601  * @param[in] dev_flow
602  *   Pointer to device flow structure.
603  */
604 static void
605 flow_drv_rxq_flags_set(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow)
606 {
607         struct mlx5_priv *priv = dev->data->dev_private;
608         struct rte_flow *flow = dev_flow->flow;
609         const int mark = !!(dev_flow->actions &
610                             (MLX5_FLOW_ACTION_FLAG | MLX5_FLOW_ACTION_MARK));
611         const int tunnel = !!(dev_flow->layers & MLX5_FLOW_LAYER_TUNNEL);
612         unsigned int i;
613
614         for (i = 0; i != flow->rss.queue_num; ++i) {
615                 int idx = (*flow->queue)[i];
616                 struct mlx5_rxq_ctrl *rxq_ctrl =
617                         container_of((*priv->rxqs)[idx],
618                                      struct mlx5_rxq_ctrl, rxq);
619
620                 if (mark) {
621                         rxq_ctrl->rxq.mark = 1;
622                         rxq_ctrl->flow_mark_n++;
623                 }
624                 if (tunnel) {
625                         unsigned int j;
626
627                         /* Increase the counter matching the flow. */
628                         for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) {
629                                 if ((tunnels_info[j].tunnel &
630                                      dev_flow->layers) ==
631                                     tunnels_info[j].tunnel) {
632                                         rxq_ctrl->flow_tunnels_n[j]++;
633                                         break;
634                                 }
635                         }
636                         flow_rxq_tunnel_ptype_update(rxq_ctrl);
637                 }
638         }
639 }
640
641 /**
642  * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) for a flow
643  *
644  * @param[in] dev
645  *   Pointer to the Ethernet device structure.
646  * @param[in] flow
647  *   Pointer to flow structure.
648  */
649 static void
650 flow_rxq_flags_set(struct rte_eth_dev *dev, struct rte_flow *flow)
651 {
652         struct mlx5_flow *dev_flow;
653
654         LIST_FOREACH(dev_flow, &flow->dev_flows, next)
655                 flow_drv_rxq_flags_set(dev, dev_flow);
656 }
657
658 /**
659  * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the
660  * device flow if no other flow uses it with the same kind of request.
661  *
662  * @param dev
663  *   Pointer to Ethernet device.
664  * @param[in] dev_flow
665  *   Pointer to the device flow.
666  */
667 static void
668 flow_drv_rxq_flags_trim(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow)
669 {
670         struct mlx5_priv *priv = dev->data->dev_private;
671         struct rte_flow *flow = dev_flow->flow;
672         const int mark = !!(dev_flow->actions &
673                             (MLX5_FLOW_ACTION_FLAG | MLX5_FLOW_ACTION_MARK));
674         const int tunnel = !!(dev_flow->layers & MLX5_FLOW_LAYER_TUNNEL);
675         unsigned int i;
676
677         assert(dev->data->dev_started);
678         for (i = 0; i != flow->rss.queue_num; ++i) {
679                 int idx = (*flow->queue)[i];
680                 struct mlx5_rxq_ctrl *rxq_ctrl =
681                         container_of((*priv->rxqs)[idx],
682                                      struct mlx5_rxq_ctrl, rxq);
683
684                 if (mark) {
685                         rxq_ctrl->flow_mark_n--;
686                         rxq_ctrl->rxq.mark = !!rxq_ctrl->flow_mark_n;
687                 }
688                 if (tunnel) {
689                         unsigned int j;
690
691                         /* Decrease the counter matching the flow. */
692                         for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) {
693                                 if ((tunnels_info[j].tunnel &
694                                      dev_flow->layers) ==
695                                     tunnels_info[j].tunnel) {
696                                         rxq_ctrl->flow_tunnels_n[j]--;
697                                         break;
698                                 }
699                         }
700                         flow_rxq_tunnel_ptype_update(rxq_ctrl);
701                 }
702         }
703 }
704
705 /**
706  * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the
707  * @p flow if no other flow uses it with the same kind of request.
708  *
709  * @param dev
710  *   Pointer to Ethernet device.
711  * @param[in] flow
712  *   Pointer to the flow.
713  */
714 static void
715 flow_rxq_flags_trim(struct rte_eth_dev *dev, struct rte_flow *flow)
716 {
717         struct mlx5_flow *dev_flow;
718
719         LIST_FOREACH(dev_flow, &flow->dev_flows, next)
720                 flow_drv_rxq_flags_trim(dev, dev_flow);
721 }
722
723 /**
724  * Clear the Mark/Flag and Tunnel ptype information in all Rx queues.
725  *
726  * @param dev
727  *   Pointer to Ethernet device.
728  */
729 static void
730 flow_rxq_flags_clear(struct rte_eth_dev *dev)
731 {
732         struct mlx5_priv *priv = dev->data->dev_private;
733         unsigned int i;
734
735         for (i = 0; i != priv->rxqs_n; ++i) {
736                 struct mlx5_rxq_ctrl *rxq_ctrl;
737                 unsigned int j;
738
739                 if (!(*priv->rxqs)[i])
740                         continue;
741                 rxq_ctrl = container_of((*priv->rxqs)[i],
742                                         struct mlx5_rxq_ctrl, rxq);
743                 rxq_ctrl->flow_mark_n = 0;
744                 rxq_ctrl->rxq.mark = 0;
745                 for (j = 0; j != MLX5_FLOW_TUNNEL; ++j)
746                         rxq_ctrl->flow_tunnels_n[j] = 0;
747                 rxq_ctrl->rxq.tunnel = 0;
748         }
749 }
750
751 /*
752  * return a pointer to the desired action in the list of actions.
753  *
754  * @param[in] actions
755  *   The list of actions to search the action in.
756  * @param[in] action
757  *   The action to find.
758  *
759  * @return
760  *   Pointer to the action in the list, if found. NULL otherwise.
761  */
762 const struct rte_flow_action *
763 mlx5_flow_find_action(const struct rte_flow_action *actions,
764                       enum rte_flow_action_type action)
765 {
766         if (actions == NULL)
767                 return NULL;
768         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++)
769                 if (actions->type == action)
770                         return actions;
771         return NULL;
772 }
773
774 /*
775  * Validate the flag action.
776  *
777  * @param[in] action_flags
778  *   Bit-fields that holds the actions detected until now.
779  * @param[in] attr
780  *   Attributes of flow that includes this action.
781  * @param[out] error
782  *   Pointer to error structure.
783  *
784  * @return
785  *   0 on success, a negative errno value otherwise and rte_errno is set.
786  */
787 int
788 mlx5_flow_validate_action_flag(uint64_t action_flags,
789                                const struct rte_flow_attr *attr,
790                                struct rte_flow_error *error)
791 {
792
793         if (action_flags & MLX5_FLOW_ACTION_DROP)
794                 return rte_flow_error_set(error, EINVAL,
795                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
796                                           "can't drop and flag in same flow");
797         if (action_flags & MLX5_FLOW_ACTION_MARK)
798                 return rte_flow_error_set(error, EINVAL,
799                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
800                                           "can't mark and flag in same flow");
801         if (action_flags & MLX5_FLOW_ACTION_FLAG)
802                 return rte_flow_error_set(error, EINVAL,
803                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
804                                           "can't have 2 flag"
805                                           " actions in same flow");
806         if (attr->egress)
807                 return rte_flow_error_set(error, ENOTSUP,
808                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
809                                           "flag action not supported for "
810                                           "egress");
811         return 0;
812 }
813
814 /*
815  * Validate the mark action.
816  *
817  * @param[in] action
818  *   Pointer to the queue action.
819  * @param[in] action_flags
820  *   Bit-fields that holds the actions detected until now.
821  * @param[in] attr
822  *   Attributes of flow that includes this action.
823  * @param[out] error
824  *   Pointer to error structure.
825  *
826  * @return
827  *   0 on success, a negative errno value otherwise and rte_errno is set.
828  */
829 int
830 mlx5_flow_validate_action_mark(const struct rte_flow_action *action,
831                                uint64_t action_flags,
832                                const struct rte_flow_attr *attr,
833                                struct rte_flow_error *error)
834 {
835         const struct rte_flow_action_mark *mark = action->conf;
836
837         if (!mark)
838                 return rte_flow_error_set(error, EINVAL,
839                                           RTE_FLOW_ERROR_TYPE_ACTION,
840                                           action,
841                                           "configuration cannot be null");
842         if (mark->id >= MLX5_FLOW_MARK_MAX)
843                 return rte_flow_error_set(error, EINVAL,
844                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
845                                           &mark->id,
846                                           "mark id must in 0 <= id < "
847                                           RTE_STR(MLX5_FLOW_MARK_MAX));
848         if (action_flags & MLX5_FLOW_ACTION_DROP)
849                 return rte_flow_error_set(error, EINVAL,
850                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
851                                           "can't drop and mark in same flow");
852         if (action_flags & MLX5_FLOW_ACTION_FLAG)
853                 return rte_flow_error_set(error, EINVAL,
854                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
855                                           "can't flag and mark in same flow");
856         if (action_flags & MLX5_FLOW_ACTION_MARK)
857                 return rte_flow_error_set(error, EINVAL,
858                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
859                                           "can't have 2 mark actions in same"
860                                           " flow");
861         if (attr->egress)
862                 return rte_flow_error_set(error, ENOTSUP,
863                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
864                                           "mark action not supported for "
865                                           "egress");
866         return 0;
867 }
868
869 /*
870  * Validate the drop action.
871  *
872  * @param[in] action_flags
873  *   Bit-fields that holds the actions detected until now.
874  * @param[in] attr
875  *   Attributes of flow that includes this action.
876  * @param[out] error
877  *   Pointer to error structure.
878  *
879  * @return
880  *   0 on success, a negative errno value otherwise and rte_errno is set.
881  */
882 int
883 mlx5_flow_validate_action_drop(uint64_t action_flags,
884                                const struct rte_flow_attr *attr,
885                                struct rte_flow_error *error)
886 {
887         if (action_flags & MLX5_FLOW_ACTION_FLAG)
888                 return rte_flow_error_set(error, EINVAL,
889                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
890                                           "can't drop and flag in same flow");
891         if (action_flags & MLX5_FLOW_ACTION_MARK)
892                 return rte_flow_error_set(error, EINVAL,
893                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
894                                           "can't drop and mark in same flow");
895         if (action_flags & (MLX5_FLOW_FATE_ACTIONS |
896                             MLX5_FLOW_FATE_ESWITCH_ACTIONS))
897                 return rte_flow_error_set(error, EINVAL,
898                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
899                                           "can't have 2 fate actions in"
900                                           " same flow");
901         if (attr->egress)
902                 return rte_flow_error_set(error, ENOTSUP,
903                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
904                                           "drop action not supported for "
905                                           "egress");
906         return 0;
907 }
908
909 /*
910  * Validate the queue action.
911  *
912  * @param[in] action
913  *   Pointer to the queue action.
914  * @param[in] action_flags
915  *   Bit-fields that holds the actions detected until now.
916  * @param[in] dev
917  *   Pointer to the Ethernet device structure.
918  * @param[in] attr
919  *   Attributes of flow that includes this action.
920  * @param[out] error
921  *   Pointer to error structure.
922  *
923  * @return
924  *   0 on success, a negative errno value otherwise and rte_errno is set.
925  */
926 int
927 mlx5_flow_validate_action_queue(const struct rte_flow_action *action,
928                                 uint64_t action_flags,
929                                 struct rte_eth_dev *dev,
930                                 const struct rte_flow_attr *attr,
931                                 struct rte_flow_error *error)
932 {
933         struct mlx5_priv *priv = dev->data->dev_private;
934         const struct rte_flow_action_queue *queue = action->conf;
935
936         if (action_flags & MLX5_FLOW_FATE_ACTIONS)
937                 return rte_flow_error_set(error, EINVAL,
938                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
939                                           "can't have 2 fate actions in"
940                                           " same flow");
941         if (!priv->rxqs_n)
942                 return rte_flow_error_set(error, EINVAL,
943                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
944                                           NULL, "No Rx queues configured");
945         if (queue->index >= priv->rxqs_n)
946                 return rte_flow_error_set(error, EINVAL,
947                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
948                                           &queue->index,
949                                           "queue index out of range");
950         if (!(*priv->rxqs)[queue->index])
951                 return rte_flow_error_set(error, EINVAL,
952                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
953                                           &queue->index,
954                                           "queue is not configured");
955         if (attr->egress)
956                 return rte_flow_error_set(error, ENOTSUP,
957                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
958                                           "queue action not supported for "
959                                           "egress");
960         return 0;
961 }
962
963 /*
964  * Validate the rss action.
965  *
966  * @param[in] action
967  *   Pointer to the queue action.
968  * @param[in] action_flags
969  *   Bit-fields that holds the actions detected until now.
970  * @param[in] dev
971  *   Pointer to the Ethernet device structure.
972  * @param[in] attr
973  *   Attributes of flow that includes this action.
974  * @param[in] item_flags
975  *   Items that were detected.
976  * @param[out] error
977  *   Pointer to error structure.
978  *
979  * @return
980  *   0 on success, a negative errno value otherwise and rte_errno is set.
981  */
982 int
983 mlx5_flow_validate_action_rss(const struct rte_flow_action *action,
984                               uint64_t action_flags,
985                               struct rte_eth_dev *dev,
986                               const struct rte_flow_attr *attr,
987                               uint64_t item_flags,
988                               struct rte_flow_error *error)
989 {
990         struct mlx5_priv *priv = dev->data->dev_private;
991         const struct rte_flow_action_rss *rss = action->conf;
992         int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
993         unsigned int i;
994
995         if (action_flags & MLX5_FLOW_FATE_ACTIONS)
996                 return rte_flow_error_set(error, EINVAL,
997                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
998                                           "can't have 2 fate actions"
999                                           " in same flow");
1000         if (rss->func != RTE_ETH_HASH_FUNCTION_DEFAULT &&
1001             rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ)
1002                 return rte_flow_error_set(error, ENOTSUP,
1003                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1004                                           &rss->func,
1005                                           "RSS hash function not supported");
1006 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
1007         if (rss->level > 2)
1008 #else
1009         if (rss->level > 1)
1010 #endif
1011                 return rte_flow_error_set(error, ENOTSUP,
1012                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1013                                           &rss->level,
1014                                           "tunnel RSS is not supported");
1015         /* allow RSS key_len 0 in case of NULL (default) RSS key. */
1016         if (rss->key_len == 0 && rss->key != NULL)
1017                 return rte_flow_error_set(error, ENOTSUP,
1018                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1019                                           &rss->key_len,
1020                                           "RSS hash key length 0");
1021         if (rss->key_len > 0 && rss->key_len < MLX5_RSS_HASH_KEY_LEN)
1022                 return rte_flow_error_set(error, ENOTSUP,
1023                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1024                                           &rss->key_len,
1025                                           "RSS hash key too small");
1026         if (rss->key_len > MLX5_RSS_HASH_KEY_LEN)
1027                 return rte_flow_error_set(error, ENOTSUP,
1028                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1029                                           &rss->key_len,
1030                                           "RSS hash key too large");
1031         if (rss->queue_num > priv->config.ind_table_max_size)
1032                 return rte_flow_error_set(error, ENOTSUP,
1033                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1034                                           &rss->queue_num,
1035                                           "number of queues too large");
1036         if (rss->types & MLX5_RSS_HF_MASK)
1037                 return rte_flow_error_set(error, ENOTSUP,
1038                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1039                                           &rss->types,
1040                                           "some RSS protocols are not"
1041                                           " supported");
1042         if (!priv->rxqs_n)
1043                 return rte_flow_error_set(error, EINVAL,
1044                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1045                                           NULL, "No Rx queues configured");
1046         if (!rss->queue_num)
1047                 return rte_flow_error_set(error, EINVAL,
1048                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1049                                           NULL, "No queues configured");
1050         for (i = 0; i != rss->queue_num; ++i) {
1051                 if (!(*priv->rxqs)[rss->queue[i]])
1052                         return rte_flow_error_set
1053                                 (error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1054                                  &rss->queue[i], "queue is not configured");
1055         }
1056         if (attr->egress)
1057                 return rte_flow_error_set(error, ENOTSUP,
1058                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1059                                           "rss action not supported for "
1060                                           "egress");
1061         if (rss->level > 1 &&  !tunnel)
1062                 return rte_flow_error_set(error, EINVAL,
1063                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF, NULL,
1064                                           "inner RSS is not supported for "
1065                                           "non-tunnel flows");
1066         return 0;
1067 }
1068
1069 /*
1070  * Validate the count action.
1071  *
1072  * @param[in] dev
1073  *   Pointer to the Ethernet device structure.
1074  * @param[in] attr
1075  *   Attributes of flow that includes this action.
1076  * @param[out] error
1077  *   Pointer to error structure.
1078  *
1079  * @return
1080  *   0 on success, a negative errno value otherwise and rte_errno is set.
1081  */
1082 int
1083 mlx5_flow_validate_action_count(struct rte_eth_dev *dev __rte_unused,
1084                                 const struct rte_flow_attr *attr,
1085                                 struct rte_flow_error *error)
1086 {
1087         if (attr->egress)
1088                 return rte_flow_error_set(error, ENOTSUP,
1089                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1090                                           "count action not supported for "
1091                                           "egress");
1092         return 0;
1093 }
1094
1095 /**
1096  * Verify the @p attributes will be correctly understood by the NIC and store
1097  * them in the @p flow if everything is correct.
1098  *
1099  * @param[in] dev
1100  *   Pointer to the Ethernet device structure.
1101  * @param[in] attributes
1102  *   Pointer to flow attributes
1103  * @param[out] error
1104  *   Pointer to error structure.
1105  *
1106  * @return
1107  *   0 on success, a negative errno value otherwise and rte_errno is set.
1108  */
1109 int
1110 mlx5_flow_validate_attributes(struct rte_eth_dev *dev,
1111                               const struct rte_flow_attr *attributes,
1112                               struct rte_flow_error *error)
1113 {
1114         struct mlx5_priv *priv = dev->data->dev_private;
1115         uint32_t priority_max = priv->config.flow_prio - 1;
1116
1117         if (attributes->group)
1118                 return rte_flow_error_set(error, ENOTSUP,
1119                                           RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
1120                                           NULL, "groups is not supported");
1121         if (attributes->priority != MLX5_FLOW_PRIO_RSVD &&
1122             attributes->priority >= priority_max)
1123                 return rte_flow_error_set(error, ENOTSUP,
1124                                           RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
1125                                           NULL, "priority out of range");
1126         if (attributes->egress)
1127                 return rte_flow_error_set(error, ENOTSUP,
1128                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
1129                                           "egress is not supported");
1130         if (attributes->transfer && !priv->config.dv_esw_en)
1131                 return rte_flow_error_set(error, ENOTSUP,
1132                                           RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
1133                                           NULL, "transfer is not supported");
1134         if (!attributes->ingress)
1135                 return rte_flow_error_set(error, EINVAL,
1136                                           RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
1137                                           NULL,
1138                                           "ingress attribute is mandatory");
1139         return 0;
1140 }
1141
1142 /**
1143  * Validate ICMP6 item.
1144  *
1145  * @param[in] item
1146  *   Item specification.
1147  * @param[in] item_flags
1148  *   Bit-fields that holds the items detected until now.
1149  * @param[out] error
1150  *   Pointer to error structure.
1151  *
1152  * @return
1153  *   0 on success, a negative errno value otherwise and rte_errno is set.
1154  */
1155 int
1156 mlx5_flow_validate_item_icmp6(const struct rte_flow_item *item,
1157                                uint64_t item_flags,
1158                                uint8_t target_protocol,
1159                                struct rte_flow_error *error)
1160 {
1161         const struct rte_flow_item_icmp6 *mask = item->mask;
1162         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1163         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
1164                                       MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1165         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
1166                                       MLX5_FLOW_LAYER_OUTER_L4;
1167         int ret;
1168
1169         if (target_protocol != 0xFF && target_protocol != IPPROTO_ICMPV6)
1170                 return rte_flow_error_set(error, EINVAL,
1171                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1172                                           "protocol filtering not compatible"
1173                                           " with ICMP6 layer");
1174         if (!(item_flags & l3m))
1175                 return rte_flow_error_set(error, EINVAL,
1176                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1177                                           "IPv6 is mandatory to filter on"
1178                                           " ICMP6");
1179         if (item_flags & l4m)
1180                 return rte_flow_error_set(error, EINVAL,
1181                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1182                                           "multiple L4 layers not supported");
1183         if (!mask)
1184                 mask = &rte_flow_item_icmp6_mask;
1185         ret = mlx5_flow_item_acceptable
1186                 (item, (const uint8_t *)mask,
1187                  (const uint8_t *)&rte_flow_item_icmp6_mask,
1188                  sizeof(struct rte_flow_item_icmp6), error);
1189         if (ret < 0)
1190                 return ret;
1191         return 0;
1192 }
1193
1194 /**
1195  * Validate ICMP item.
1196  *
1197  * @param[in] item
1198  *   Item specification.
1199  * @param[in] item_flags
1200  *   Bit-fields that holds the items detected until now.
1201  * @param[out] error
1202  *   Pointer to error structure.
1203  *
1204  * @return
1205  *   0 on success, a negative errno value otherwise and rte_errno is set.
1206  */
1207 int
1208 mlx5_flow_validate_item_icmp(const struct rte_flow_item *item,
1209                              uint64_t item_flags,
1210                              uint8_t target_protocol,
1211                              struct rte_flow_error *error)
1212 {
1213         const struct rte_flow_item_icmp *mask = item->mask;
1214         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1215         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
1216                                       MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1217         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
1218                                       MLX5_FLOW_LAYER_OUTER_L4;
1219         int ret;
1220
1221         if (target_protocol != 0xFF && target_protocol != IPPROTO_ICMP)
1222                 return rte_flow_error_set(error, EINVAL,
1223                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1224                                           "protocol filtering not compatible"
1225                                           " with ICMP layer");
1226         if (!(item_flags & l3m))
1227                 return rte_flow_error_set(error, EINVAL,
1228                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1229                                           "IPv4 is mandatory to filter"
1230                                           " on ICMP");
1231         if (item_flags & l4m)
1232                 return rte_flow_error_set(error, EINVAL,
1233                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1234                                           "multiple L4 layers not supported");
1235         if (!mask)
1236                 mask = &rte_flow_item_icmp_mask;
1237         ret = mlx5_flow_item_acceptable
1238                 (item, (const uint8_t *)mask,
1239                  (const uint8_t *)&rte_flow_item_icmp_mask,
1240                  sizeof(struct rte_flow_item_icmp), error);
1241         if (ret < 0)
1242                 return ret;
1243         return 0;
1244 }
1245
1246 /**
1247  * Validate Ethernet item.
1248  *
1249  * @param[in] item
1250  *   Item specification.
1251  * @param[in] item_flags
1252  *   Bit-fields that holds the items detected until now.
1253  * @param[out] error
1254  *   Pointer to error structure.
1255  *
1256  * @return
1257  *   0 on success, a negative errno value otherwise and rte_errno is set.
1258  */
1259 int
1260 mlx5_flow_validate_item_eth(const struct rte_flow_item *item,
1261                             uint64_t item_flags,
1262                             struct rte_flow_error *error)
1263 {
1264         const struct rte_flow_item_eth *mask = item->mask;
1265         const struct rte_flow_item_eth nic_mask = {
1266                 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1267                 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1268                 .type = RTE_BE16(0xffff),
1269         };
1270         int ret;
1271         int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1272         const uint64_t ethm = tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
1273                                        MLX5_FLOW_LAYER_OUTER_L2;
1274
1275         if (item_flags & ethm)
1276                 return rte_flow_error_set(error, ENOTSUP,
1277                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1278                                           "multiple L2 layers not supported");
1279         if ((!tunnel && (item_flags & MLX5_FLOW_LAYER_OUTER_L3)) ||
1280             (tunnel && (item_flags & MLX5_FLOW_LAYER_INNER_L3)))
1281                 return rte_flow_error_set(error, EINVAL,
1282                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1283                                           "L2 layer should not follow "
1284                                           "L3 layers");
1285         if ((!tunnel && (item_flags & MLX5_FLOW_LAYER_OUTER_VLAN)) ||
1286             (tunnel && (item_flags & MLX5_FLOW_LAYER_INNER_VLAN)))
1287                 return rte_flow_error_set(error, EINVAL,
1288                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1289                                           "L2 layer should not follow VLAN");
1290         if (!mask)
1291                 mask = &rte_flow_item_eth_mask;
1292         ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
1293                                         (const uint8_t *)&nic_mask,
1294                                         sizeof(struct rte_flow_item_eth),
1295                                         error);
1296         return ret;
1297 }
1298
1299 /**
1300  * Validate VLAN item.
1301  *
1302  * @param[in] item
1303  *   Item specification.
1304  * @param[in] item_flags
1305  *   Bit-fields that holds the items detected until now.
1306  * @param[in] dev
1307  *   Ethernet device flow is being created on.
1308  * @param[out] error
1309  *   Pointer to error structure.
1310  *
1311  * @return
1312  *   0 on success, a negative errno value otherwise and rte_errno is set.
1313  */
1314 int
1315 mlx5_flow_validate_item_vlan(const struct rte_flow_item *item,
1316                              uint64_t item_flags,
1317                              struct rte_eth_dev *dev,
1318                              struct rte_flow_error *error)
1319 {
1320         const struct rte_flow_item_vlan *spec = item->spec;
1321         const struct rte_flow_item_vlan *mask = item->mask;
1322         const struct rte_flow_item_vlan nic_mask = {
1323                 .tci = RTE_BE16(UINT16_MAX),
1324                 .inner_type = RTE_BE16(UINT16_MAX),
1325         };
1326         uint16_t vlan_tag = 0;
1327         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1328         int ret;
1329         const uint64_t l34m = tunnel ? (MLX5_FLOW_LAYER_INNER_L3 |
1330                                         MLX5_FLOW_LAYER_INNER_L4) :
1331                                        (MLX5_FLOW_LAYER_OUTER_L3 |
1332                                         MLX5_FLOW_LAYER_OUTER_L4);
1333         const uint64_t vlanm = tunnel ? MLX5_FLOW_LAYER_INNER_VLAN :
1334                                         MLX5_FLOW_LAYER_OUTER_VLAN;
1335
1336         if (item_flags & vlanm)
1337                 return rte_flow_error_set(error, EINVAL,
1338                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1339                                           "multiple VLAN layers not supported");
1340         else if ((item_flags & l34m) != 0)
1341                 return rte_flow_error_set(error, EINVAL,
1342                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1343                                           "VLAN cannot follow L3/L4 layer");
1344         if (!mask)
1345                 mask = &rte_flow_item_vlan_mask;
1346         ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
1347                                         (const uint8_t *)&nic_mask,
1348                                         sizeof(struct rte_flow_item_vlan),
1349                                         error);
1350         if (ret)
1351                 return ret;
1352         if (!tunnel && mask->tci != RTE_BE16(0x0fff)) {
1353                 struct mlx5_priv *priv = dev->data->dev_private;
1354
1355                 if (priv->vmwa_context) {
1356                         /*
1357                          * Non-NULL context means we have a virtual machine
1358                          * and SR-IOV enabled, we have to create VLAN interface
1359                          * to make hypervisor to setup E-Switch vport
1360                          * context correctly. We avoid creating the multiple
1361                          * VLAN interfaces, so we cannot support VLAN tag mask.
1362                          */
1363                         return rte_flow_error_set(error, EINVAL,
1364                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1365                                                   item,
1366                                                   "VLAN tag mask is not"
1367                                                   " supported in virtual"
1368                                                   " environment");
1369                 }
1370         }
1371         if (spec) {
1372                 vlan_tag = spec->tci;
1373                 vlan_tag &= mask->tci;
1374         }
1375         /*
1376          * From verbs perspective an empty VLAN is equivalent
1377          * to a packet without VLAN layer.
1378          */
1379         if (!vlan_tag)
1380                 return rte_flow_error_set(error, EINVAL,
1381                                           RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
1382                                           item->spec,
1383                                           "VLAN cannot be empty");
1384         return 0;
1385 }
1386
1387 /**
1388  * Validate IPV4 item.
1389  *
1390  * @param[in] item
1391  *   Item specification.
1392  * @param[in] item_flags
1393  *   Bit-fields that holds the items detected until now.
1394  * @param[in] acc_mask
1395  *   Acceptable mask, if NULL default internal default mask
1396  *   will be used to check whether item fields are supported.
1397  * @param[out] error
1398  *   Pointer to error structure.
1399  *
1400  * @return
1401  *   0 on success, a negative errno value otherwise and rte_errno is set.
1402  */
1403 int
1404 mlx5_flow_validate_item_ipv4(const struct rte_flow_item *item,
1405                              uint64_t item_flags,
1406                              uint64_t last_item,
1407                              uint16_t ether_type,
1408                              const struct rte_flow_item_ipv4 *acc_mask,
1409                              struct rte_flow_error *error)
1410 {
1411         const struct rte_flow_item_ipv4 *mask = item->mask;
1412         const struct rte_flow_item_ipv4 *spec = item->spec;
1413         const struct rte_flow_item_ipv4 nic_mask = {
1414                 .hdr = {
1415                         .src_addr = RTE_BE32(0xffffffff),
1416                         .dst_addr = RTE_BE32(0xffffffff),
1417                         .type_of_service = 0xff,
1418                         .next_proto_id = 0xff,
1419                 },
1420         };
1421         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1422         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
1423                                       MLX5_FLOW_LAYER_OUTER_L3;
1424         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
1425                                       MLX5_FLOW_LAYER_OUTER_L4;
1426         int ret;
1427         uint8_t next_proto = 0xFF;
1428         const uint64_t l2_vlan = (MLX5_FLOW_LAYER_L2 |
1429                                   MLX5_FLOW_LAYER_OUTER_VLAN |
1430                                   MLX5_FLOW_LAYER_INNER_VLAN);
1431
1432         if ((last_item & l2_vlan) && ether_type &&
1433             ether_type != RTE_ETHER_TYPE_IPV4)
1434                 return rte_flow_error_set(error, EINVAL,
1435                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1436                                           "IPv4 cannot follow L2/VLAN layer "
1437                                           "which ether type is not IPv4");
1438         if (item_flags & MLX5_FLOW_LAYER_IPIP) {
1439                 if (mask && spec)
1440                         next_proto = mask->hdr.next_proto_id &
1441                                      spec->hdr.next_proto_id;
1442                 if (next_proto == IPPROTO_IPIP || next_proto == IPPROTO_IPV6)
1443                         return rte_flow_error_set(error, EINVAL,
1444                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1445                                                   item,
1446                                                   "multiple tunnel "
1447                                                   "not supported");
1448         }
1449         if (item_flags & MLX5_FLOW_LAYER_IPV6_ENCAP)
1450                 return rte_flow_error_set(error, EINVAL,
1451                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1452                                           "wrong tunnel type - IPv6 specified "
1453                                           "but IPv4 item provided");
1454         if (item_flags & l3m)
1455                 return rte_flow_error_set(error, ENOTSUP,
1456                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1457                                           "multiple L3 layers not supported");
1458         else if (item_flags & l4m)
1459                 return rte_flow_error_set(error, EINVAL,
1460                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1461                                           "L3 cannot follow an L4 layer.");
1462         else if ((item_flags & MLX5_FLOW_LAYER_NVGRE) &&
1463                   !(item_flags & MLX5_FLOW_LAYER_INNER_L2))
1464                 return rte_flow_error_set(error, EINVAL,
1465                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1466                                           "L3 cannot follow an NVGRE layer.");
1467         if (!mask)
1468                 mask = &rte_flow_item_ipv4_mask;
1469         else if (mask->hdr.next_proto_id != 0 &&
1470                  mask->hdr.next_proto_id != 0xff)
1471                 return rte_flow_error_set(error, EINVAL,
1472                                           RTE_FLOW_ERROR_TYPE_ITEM_MASK, mask,
1473                                           "partial mask is not supported"
1474                                           " for protocol");
1475         ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
1476                                         acc_mask ? (const uint8_t *)acc_mask
1477                                                  : (const uint8_t *)&nic_mask,
1478                                         sizeof(struct rte_flow_item_ipv4),
1479                                         error);
1480         if (ret < 0)
1481                 return ret;
1482         return 0;
1483 }
1484
1485 /**
1486  * Validate IPV6 item.
1487  *
1488  * @param[in] item
1489  *   Item specification.
1490  * @param[in] item_flags
1491  *   Bit-fields that holds the items detected until now.
1492  * @param[in] acc_mask
1493  *   Acceptable mask, if NULL default internal default mask
1494  *   will be used to check whether item fields are supported.
1495  * @param[out] error
1496  *   Pointer to error structure.
1497  *
1498  * @return
1499  *   0 on success, a negative errno value otherwise and rte_errno is set.
1500  */
1501 int
1502 mlx5_flow_validate_item_ipv6(const struct rte_flow_item *item,
1503                              uint64_t item_flags,
1504                              uint64_t last_item,
1505                              uint16_t ether_type,
1506                              const struct rte_flow_item_ipv6 *acc_mask,
1507                              struct rte_flow_error *error)
1508 {
1509         const struct rte_flow_item_ipv6 *mask = item->mask;
1510         const struct rte_flow_item_ipv6 *spec = item->spec;
1511         const struct rte_flow_item_ipv6 nic_mask = {
1512                 .hdr = {
1513                         .src_addr =
1514                                 "\xff\xff\xff\xff\xff\xff\xff\xff"
1515                                 "\xff\xff\xff\xff\xff\xff\xff\xff",
1516                         .dst_addr =
1517                                 "\xff\xff\xff\xff\xff\xff\xff\xff"
1518                                 "\xff\xff\xff\xff\xff\xff\xff\xff",
1519                         .vtc_flow = RTE_BE32(0xffffffff),
1520                         .proto = 0xff,
1521                         .hop_limits = 0xff,
1522                 },
1523         };
1524         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1525         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
1526                                       MLX5_FLOW_LAYER_OUTER_L3;
1527         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
1528                                       MLX5_FLOW_LAYER_OUTER_L4;
1529         int ret;
1530         uint8_t next_proto = 0xFF;
1531         const uint64_t l2_vlan = (MLX5_FLOW_LAYER_L2 |
1532                                   MLX5_FLOW_LAYER_OUTER_VLAN |
1533                                   MLX5_FLOW_LAYER_INNER_VLAN);
1534
1535         if ((last_item & l2_vlan) && ether_type &&
1536             ether_type != RTE_ETHER_TYPE_IPV6)
1537                 return rte_flow_error_set(error, EINVAL,
1538                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1539                                           "IPv6 cannot follow L2/VLAN layer "
1540                                           "which ether type is not IPv6");
1541         if (item_flags & MLX5_FLOW_LAYER_IPV6_ENCAP) {
1542                 if (mask && spec)
1543                         next_proto = mask->hdr.proto & spec->hdr.proto;
1544                 if (next_proto == IPPROTO_IPIP || next_proto == IPPROTO_IPV6)
1545                         return rte_flow_error_set(error, EINVAL,
1546                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1547                                                   item,
1548                                                   "multiple tunnel "
1549                                                   "not supported");
1550         }
1551         if (item_flags & MLX5_FLOW_LAYER_IPIP)
1552                 return rte_flow_error_set(error, EINVAL,
1553                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1554                                           "wrong tunnel type - IPv4 specified "
1555                                           "but IPv6 item provided");
1556         if (item_flags & l3m)
1557                 return rte_flow_error_set(error, ENOTSUP,
1558                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1559                                           "multiple L3 layers not supported");
1560         else if (item_flags & l4m)
1561                 return rte_flow_error_set(error, EINVAL,
1562                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1563                                           "L3 cannot follow an L4 layer.");
1564         else if ((item_flags & MLX5_FLOW_LAYER_NVGRE) &&
1565                   !(item_flags & MLX5_FLOW_LAYER_INNER_L2))
1566                 return rte_flow_error_set(error, EINVAL,
1567                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1568                                           "L3 cannot follow an NVGRE layer.");
1569         if (!mask)
1570                 mask = &rte_flow_item_ipv6_mask;
1571         ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
1572                                         acc_mask ? (const uint8_t *)acc_mask
1573                                                  : (const uint8_t *)&nic_mask,
1574                                         sizeof(struct rte_flow_item_ipv6),
1575                                         error);
1576         if (ret < 0)
1577                 return ret;
1578         return 0;
1579 }
1580
1581 /**
1582  * Validate UDP item.
1583  *
1584  * @param[in] item
1585  *   Item specification.
1586  * @param[in] item_flags
1587  *   Bit-fields that holds the items detected until now.
1588  * @param[in] target_protocol
1589  *   The next protocol in the previous item.
1590  * @param[in] flow_mask
1591  *   mlx5 flow-specific (DV, verbs, etc.) supported header fields mask.
1592  * @param[out] error
1593  *   Pointer to error structure.
1594  *
1595  * @return
1596  *   0 on success, a negative errno value otherwise and rte_errno is set.
1597  */
1598 int
1599 mlx5_flow_validate_item_udp(const struct rte_flow_item *item,
1600                             uint64_t item_flags,
1601                             uint8_t target_protocol,
1602                             struct rte_flow_error *error)
1603 {
1604         const struct rte_flow_item_udp *mask = item->mask;
1605         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1606         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
1607                                       MLX5_FLOW_LAYER_OUTER_L3;
1608         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
1609                                       MLX5_FLOW_LAYER_OUTER_L4;
1610         int ret;
1611
1612         if (target_protocol != 0xff && target_protocol != IPPROTO_UDP)
1613                 return rte_flow_error_set(error, EINVAL,
1614                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1615                                           "protocol filtering not compatible"
1616                                           " with UDP layer");
1617         if (!(item_flags & l3m))
1618                 return rte_flow_error_set(error, EINVAL,
1619                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1620                                           "L3 is mandatory to filter on L4");
1621         if (item_flags & l4m)
1622                 return rte_flow_error_set(error, EINVAL,
1623                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1624                                           "multiple L4 layers not supported");
1625         if (!mask)
1626                 mask = &rte_flow_item_udp_mask;
1627         ret = mlx5_flow_item_acceptable
1628                 (item, (const uint8_t *)mask,
1629                  (const uint8_t *)&rte_flow_item_udp_mask,
1630                  sizeof(struct rte_flow_item_udp), error);
1631         if (ret < 0)
1632                 return ret;
1633         return 0;
1634 }
1635
1636 /**
1637  * Validate TCP item.
1638  *
1639  * @param[in] item
1640  *   Item specification.
1641  * @param[in] item_flags
1642  *   Bit-fields that holds the items detected until now.
1643  * @param[in] target_protocol
1644  *   The next protocol in the previous item.
1645  * @param[out] error
1646  *   Pointer to error structure.
1647  *
1648  * @return
1649  *   0 on success, a negative errno value otherwise and rte_errno is set.
1650  */
1651 int
1652 mlx5_flow_validate_item_tcp(const struct rte_flow_item *item,
1653                             uint64_t item_flags,
1654                             uint8_t target_protocol,
1655                             const struct rte_flow_item_tcp *flow_mask,
1656                             struct rte_flow_error *error)
1657 {
1658         const struct rte_flow_item_tcp *mask = item->mask;
1659         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
1660         const uint64_t l3m = tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
1661                                       MLX5_FLOW_LAYER_OUTER_L3;
1662         const uint64_t l4m = tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
1663                                       MLX5_FLOW_LAYER_OUTER_L4;
1664         int ret;
1665
1666         assert(flow_mask);
1667         if (target_protocol != 0xff && target_protocol != IPPROTO_TCP)
1668                 return rte_flow_error_set(error, EINVAL,
1669                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1670                                           "protocol filtering not compatible"
1671                                           " with TCP layer");
1672         if (!(item_flags & l3m))
1673                 return rte_flow_error_set(error, EINVAL,
1674                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1675                                           "L3 is mandatory to filter on L4");
1676         if (item_flags & l4m)
1677                 return rte_flow_error_set(error, EINVAL,
1678                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1679                                           "multiple L4 layers not supported");
1680         if (!mask)
1681                 mask = &rte_flow_item_tcp_mask;
1682         ret = mlx5_flow_item_acceptable
1683                 (item, (const uint8_t *)mask,
1684                  (const uint8_t *)flow_mask,
1685                  sizeof(struct rte_flow_item_tcp), error);
1686         if (ret < 0)
1687                 return ret;
1688         return 0;
1689 }
1690
1691 /**
1692  * Validate VXLAN item.
1693  *
1694  * @param[in] item
1695  *   Item specification.
1696  * @param[in] item_flags
1697  *   Bit-fields that holds the items detected until now.
1698  * @param[in] target_protocol
1699  *   The next protocol in the previous item.
1700  * @param[out] error
1701  *   Pointer to error structure.
1702  *
1703  * @return
1704  *   0 on success, a negative errno value otherwise and rte_errno is set.
1705  */
1706 int
1707 mlx5_flow_validate_item_vxlan(const struct rte_flow_item *item,
1708                               uint64_t item_flags,
1709                               struct rte_flow_error *error)
1710 {
1711         const struct rte_flow_item_vxlan *spec = item->spec;
1712         const struct rte_flow_item_vxlan *mask = item->mask;
1713         int ret;
1714         union vni {
1715                 uint32_t vlan_id;
1716                 uint8_t vni[4];
1717         } id = { .vlan_id = 0, };
1718         uint32_t vlan_id = 0;
1719
1720
1721         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
1722                 return rte_flow_error_set(error, ENOTSUP,
1723                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1724                                           "multiple tunnel layers not"
1725                                           " supported");
1726         /*
1727          * Verify only UDPv4 is present as defined in
1728          * https://tools.ietf.org/html/rfc7348
1729          */
1730         if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
1731                 return rte_flow_error_set(error, EINVAL,
1732                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1733                                           "no outer UDP layer found");
1734         if (!mask)
1735                 mask = &rte_flow_item_vxlan_mask;
1736         ret = mlx5_flow_item_acceptable
1737                 (item, (const uint8_t *)mask,
1738                  (const uint8_t *)&rte_flow_item_vxlan_mask,
1739                  sizeof(struct rte_flow_item_vxlan),
1740                  error);
1741         if (ret < 0)
1742                 return ret;
1743         if (spec) {
1744                 memcpy(&id.vni[1], spec->vni, 3);
1745                 vlan_id = id.vlan_id;
1746                 memcpy(&id.vni[1], mask->vni, 3);
1747                 vlan_id &= id.vlan_id;
1748         }
1749         /*
1750          * Tunnel id 0 is equivalent as not adding a VXLAN layer, if
1751          * only this layer is defined in the Verbs specification it is
1752          * interpreted as wildcard and all packets will match this
1753          * rule, if it follows a full stack layer (ex: eth / ipv4 /
1754          * udp), all packets matching the layers before will also
1755          * match this rule.  To avoid such situation, VNI 0 is
1756          * currently refused.
1757          */
1758         if (!vlan_id)
1759                 return rte_flow_error_set(error, ENOTSUP,
1760                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1761                                           "VXLAN vni cannot be 0");
1762         if (!(item_flags & MLX5_FLOW_LAYER_OUTER))
1763                 return rte_flow_error_set(error, ENOTSUP,
1764                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1765                                           "VXLAN tunnel must be fully defined");
1766         return 0;
1767 }
1768
1769 /**
1770  * Validate VXLAN_GPE item.
1771  *
1772  * @param[in] item
1773  *   Item specification.
1774  * @param[in] item_flags
1775  *   Bit-fields that holds the items detected until now.
1776  * @param[in] priv
1777  *   Pointer to the private data structure.
1778  * @param[in] target_protocol
1779  *   The next protocol in the previous item.
1780  * @param[out] error
1781  *   Pointer to error structure.
1782  *
1783  * @return
1784  *   0 on success, a negative errno value otherwise and rte_errno is set.
1785  */
1786 int
1787 mlx5_flow_validate_item_vxlan_gpe(const struct rte_flow_item *item,
1788                                   uint64_t item_flags,
1789                                   struct rte_eth_dev *dev,
1790                                   struct rte_flow_error *error)
1791 {
1792         struct mlx5_priv *priv = dev->data->dev_private;
1793         const struct rte_flow_item_vxlan_gpe *spec = item->spec;
1794         const struct rte_flow_item_vxlan_gpe *mask = item->mask;
1795         int ret;
1796         union vni {
1797                 uint32_t vlan_id;
1798                 uint8_t vni[4];
1799         } id = { .vlan_id = 0, };
1800         uint32_t vlan_id = 0;
1801
1802         if (!priv->config.l3_vxlan_en)
1803                 return rte_flow_error_set(error, ENOTSUP,
1804                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1805                                           "L3 VXLAN is not enabled by device"
1806                                           " parameter and/or not configured in"
1807                                           " firmware");
1808         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
1809                 return rte_flow_error_set(error, ENOTSUP,
1810                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1811                                           "multiple tunnel layers not"
1812                                           " supported");
1813         /*
1814          * Verify only UDPv4 is present as defined in
1815          * https://tools.ietf.org/html/rfc7348
1816          */
1817         if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
1818                 return rte_flow_error_set(error, EINVAL,
1819                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1820                                           "no outer UDP layer found");
1821         if (!mask)
1822                 mask = &rte_flow_item_vxlan_gpe_mask;
1823         ret = mlx5_flow_item_acceptable
1824                 (item, (const uint8_t *)mask,
1825                  (const uint8_t *)&rte_flow_item_vxlan_gpe_mask,
1826                  sizeof(struct rte_flow_item_vxlan_gpe),
1827                  error);
1828         if (ret < 0)
1829                 return ret;
1830         if (spec) {
1831                 if (spec->protocol)
1832                         return rte_flow_error_set(error, ENOTSUP,
1833                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1834                                                   item,
1835                                                   "VxLAN-GPE protocol"
1836                                                   " not supported");
1837                 memcpy(&id.vni[1], spec->vni, 3);
1838                 vlan_id = id.vlan_id;
1839                 memcpy(&id.vni[1], mask->vni, 3);
1840                 vlan_id &= id.vlan_id;
1841         }
1842         /*
1843          * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
1844          * layer is defined in the Verbs specification it is interpreted as
1845          * wildcard and all packets will match this rule, if it follows a full
1846          * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
1847          * before will also match this rule.  To avoid such situation, VNI 0
1848          * is currently refused.
1849          */
1850         if (!vlan_id)
1851                 return rte_flow_error_set(error, ENOTSUP,
1852                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1853                                           "VXLAN-GPE vni cannot be 0");
1854         if (!(item_flags & MLX5_FLOW_LAYER_OUTER))
1855                 return rte_flow_error_set(error, ENOTSUP,
1856                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1857                                           "VXLAN-GPE tunnel must be fully"
1858                                           " defined");
1859         return 0;
1860 }
1861 /**
1862  * Validate GRE Key item.
1863  *
1864  * @param[in] item
1865  *   Item specification.
1866  * @param[in] item_flags
1867  *   Bit flags to mark detected items.
1868  * @param[in] gre_item
1869  *   Pointer to gre_item
1870  * @param[out] error
1871  *   Pointer to error structure.
1872  *
1873  * @return
1874  *   0 on success, a negative errno value otherwise and rte_errno is set.
1875  */
1876 int
1877 mlx5_flow_validate_item_gre_key(const struct rte_flow_item *item,
1878                                 uint64_t item_flags,
1879                                 const struct rte_flow_item *gre_item,
1880                                 struct rte_flow_error *error)
1881 {
1882         const rte_be32_t *mask = item->mask;
1883         int ret = 0;
1884         rte_be32_t gre_key_default_mask = RTE_BE32(UINT32_MAX);
1885         const struct rte_flow_item_gre *gre_spec = gre_item->spec;
1886         const struct rte_flow_item_gre *gre_mask = gre_item->mask;
1887
1888         if (item_flags & MLX5_FLOW_LAYER_GRE_KEY)
1889                 return rte_flow_error_set(error, ENOTSUP,
1890                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1891                                           "Multiple GRE key not support");
1892         if (!(item_flags & MLX5_FLOW_LAYER_GRE))
1893                 return rte_flow_error_set(error, ENOTSUP,
1894                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1895                                           "No preceding GRE header");
1896         if (item_flags & MLX5_FLOW_LAYER_INNER)
1897                 return rte_flow_error_set(error, ENOTSUP,
1898                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1899                                           "GRE key following a wrong item");
1900         if (!gre_mask)
1901                 gre_mask = &rte_flow_item_gre_mask;
1902         if (gre_spec && (gre_mask->c_rsvd0_ver & RTE_BE16(0x2000)) &&
1903                          !(gre_spec->c_rsvd0_ver & RTE_BE16(0x2000)))
1904                 return rte_flow_error_set(error, EINVAL,
1905                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1906                                           "Key bit must be on");
1907
1908         if (!mask)
1909                 mask = &gre_key_default_mask;
1910         ret = mlx5_flow_item_acceptable
1911                 (item, (const uint8_t *)mask,
1912                  (const uint8_t *)&gre_key_default_mask,
1913                  sizeof(rte_be32_t), error);
1914         return ret;
1915 }
1916
1917 /**
1918  * Validate GRE item.
1919  *
1920  * @param[in] item
1921  *   Item specification.
1922  * @param[in] item_flags
1923  *   Bit flags to mark detected items.
1924  * @param[in] target_protocol
1925  *   The next protocol in the previous item.
1926  * @param[out] error
1927  *   Pointer to error structure.
1928  *
1929  * @return
1930  *   0 on success, a negative errno value otherwise and rte_errno is set.
1931  */
1932 int
1933 mlx5_flow_validate_item_gre(const struct rte_flow_item *item,
1934                             uint64_t item_flags,
1935                             uint8_t target_protocol,
1936                             struct rte_flow_error *error)
1937 {
1938         const struct rte_flow_item_gre *spec __rte_unused = item->spec;
1939         const struct rte_flow_item_gre *mask = item->mask;
1940         int ret;
1941         const struct rte_flow_item_gre nic_mask = {
1942                 .c_rsvd0_ver = RTE_BE16(0xB000),
1943                 .protocol = RTE_BE16(UINT16_MAX),
1944         };
1945
1946         if (target_protocol != 0xff && target_protocol != IPPROTO_GRE)
1947                 return rte_flow_error_set(error, EINVAL,
1948                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1949                                           "protocol filtering not compatible"
1950                                           " with this GRE layer");
1951         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
1952                 return rte_flow_error_set(error, ENOTSUP,
1953                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1954                                           "multiple tunnel layers not"
1955                                           " supported");
1956         if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3))
1957                 return rte_flow_error_set(error, ENOTSUP,
1958                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1959                                           "L3 Layer is missing");
1960         if (!mask)
1961                 mask = &rte_flow_item_gre_mask;
1962         ret = mlx5_flow_item_acceptable
1963                 (item, (const uint8_t *)mask,
1964                  (const uint8_t *)&nic_mask,
1965                  sizeof(struct rte_flow_item_gre), error);
1966         if (ret < 0)
1967                 return ret;
1968 #ifndef HAVE_MLX5DV_DR
1969 #ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT
1970         if (spec && (spec->protocol & mask->protocol))
1971                 return rte_flow_error_set(error, ENOTSUP,
1972                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
1973                                           "without MPLS support the"
1974                                           " specification cannot be used for"
1975                                           " filtering");
1976 #endif
1977 #endif
1978         return 0;
1979 }
1980
1981 /**
1982  * Validate Geneve item.
1983  *
1984  * @param[in] item
1985  *   Item specification.
1986  * @param[in] itemFlags
1987  *   Bit-fields that holds the items detected until now.
1988  * @param[in] enPriv
1989  *   Pointer to the private data structure.
1990  * @param[out] error
1991  *   Pointer to error structure.
1992  *
1993  * @return
1994  *   0 on success, a negative errno value otherwise and rte_errno is set.
1995  */
1996
1997 int
1998 mlx5_flow_validate_item_geneve(const struct rte_flow_item *item,
1999                                uint64_t item_flags,
2000                                struct rte_eth_dev *dev,
2001                                struct rte_flow_error *error)
2002 {
2003         struct mlx5_priv *priv = dev->data->dev_private;
2004         const struct rte_flow_item_geneve *spec = item->spec;
2005         const struct rte_flow_item_geneve *mask = item->mask;
2006         int ret;
2007         uint16_t gbhdr;
2008         uint8_t opt_len = priv->config.hca_attr.geneve_max_opt_len ?
2009                           MLX5_GENEVE_OPT_LEN_1 : MLX5_GENEVE_OPT_LEN_0;
2010         const struct rte_flow_item_geneve nic_mask = {
2011                 .ver_opt_len_o_c_rsvd0 = RTE_BE16(0x3f80),
2012                 .vni = "\xff\xff\xff",
2013                 .protocol = RTE_BE16(UINT16_MAX),
2014         };
2015
2016         if (!(priv->config.hca_attr.flex_parser_protocols &
2017               MLX5_HCA_FLEX_GENEVE_ENABLED) ||
2018             !priv->config.hca_attr.tunnel_stateless_geneve_rx)
2019                 return rte_flow_error_set(error, ENOTSUP,
2020                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2021                                           "L3 Geneve is not enabled by device"
2022                                           " parameter and/or not configured in"
2023                                           " firmware");
2024         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2025                 return rte_flow_error_set(error, ENOTSUP,
2026                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2027                                           "multiple tunnel layers not"
2028                                           " supported");
2029         /*
2030          * Verify only UDPv4 is present as defined in
2031          * https://tools.ietf.org/html/rfc7348
2032          */
2033         if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
2034                 return rte_flow_error_set(error, EINVAL,
2035                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2036                                           "no outer UDP layer found");
2037         if (!mask)
2038                 mask = &rte_flow_item_geneve_mask;
2039         ret = mlx5_flow_item_acceptable
2040                                   (item, (const uint8_t *)mask,
2041                                    (const uint8_t *)&nic_mask,
2042                                    sizeof(struct rte_flow_item_geneve), error);
2043         if (ret)
2044                 return ret;
2045         if (spec) {
2046                 gbhdr = rte_be_to_cpu_16(spec->ver_opt_len_o_c_rsvd0);
2047                 if (MLX5_GENEVE_VER_VAL(gbhdr) ||
2048                      MLX5_GENEVE_CRITO_VAL(gbhdr) ||
2049                      MLX5_GENEVE_RSVD_VAL(gbhdr) || spec->rsvd1)
2050                         return rte_flow_error_set(error, ENOTSUP,
2051                                                   RTE_FLOW_ERROR_TYPE_ITEM,
2052                                                   item,
2053                                                   "Geneve protocol unsupported"
2054                                                   " fields are being used");
2055                 if (MLX5_GENEVE_OPTLEN_VAL(gbhdr) > opt_len)
2056                         return rte_flow_error_set
2057                                         (error, ENOTSUP,
2058                                          RTE_FLOW_ERROR_TYPE_ITEM,
2059                                          item,
2060                                          "Unsupported Geneve options length");
2061         }
2062         if (!(item_flags & MLX5_FLOW_LAYER_OUTER))
2063                 return rte_flow_error_set
2064                                     (error, ENOTSUP,
2065                                      RTE_FLOW_ERROR_TYPE_ITEM, item,
2066                                      "Geneve tunnel must be fully defined");
2067         return 0;
2068 }
2069
2070 /**
2071  * Validate MPLS item.
2072  *
2073  * @param[in] dev
2074  *   Pointer to the rte_eth_dev structure.
2075  * @param[in] item
2076  *   Item specification.
2077  * @param[in] item_flags
2078  *   Bit-fields that holds the items detected until now.
2079  * @param[in] prev_layer
2080  *   The protocol layer indicated in previous item.
2081  * @param[out] error
2082  *   Pointer to error structure.
2083  *
2084  * @return
2085  *   0 on success, a negative errno value otherwise and rte_errno is set.
2086  */
2087 int
2088 mlx5_flow_validate_item_mpls(struct rte_eth_dev *dev __rte_unused,
2089                              const struct rte_flow_item *item __rte_unused,
2090                              uint64_t item_flags __rte_unused,
2091                              uint64_t prev_layer __rte_unused,
2092                              struct rte_flow_error *error)
2093 {
2094 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
2095         const struct rte_flow_item_mpls *mask = item->mask;
2096         struct mlx5_priv *priv = dev->data->dev_private;
2097         int ret;
2098
2099         if (!priv->config.mpls_en)
2100                 return rte_flow_error_set(error, ENOTSUP,
2101                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2102                                           "MPLS not supported or"
2103                                           " disabled in firmware"
2104                                           " configuration.");
2105         /* MPLS over IP, UDP, GRE is allowed */
2106         if (!(prev_layer & (MLX5_FLOW_LAYER_OUTER_L3 |
2107                             MLX5_FLOW_LAYER_OUTER_L4_UDP |
2108                             MLX5_FLOW_LAYER_GRE)))
2109                 return rte_flow_error_set(error, EINVAL,
2110                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2111                                           "protocol filtering not compatible"
2112                                           " with MPLS layer");
2113         /* Multi-tunnel isn't allowed but MPLS over GRE is an exception. */
2114         if ((item_flags & MLX5_FLOW_LAYER_TUNNEL) &&
2115             !(item_flags & MLX5_FLOW_LAYER_GRE))
2116                 return rte_flow_error_set(error, ENOTSUP,
2117                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2118                                           "multiple tunnel layers not"
2119                                           " supported");
2120         if (!mask)
2121                 mask = &rte_flow_item_mpls_mask;
2122         ret = mlx5_flow_item_acceptable
2123                 (item, (const uint8_t *)mask,
2124                  (const uint8_t *)&rte_flow_item_mpls_mask,
2125                  sizeof(struct rte_flow_item_mpls), error);
2126         if (ret < 0)
2127                 return ret;
2128         return 0;
2129 #endif
2130         return rte_flow_error_set(error, ENOTSUP,
2131                                   RTE_FLOW_ERROR_TYPE_ITEM, item,
2132                                   "MPLS is not supported by Verbs, please"
2133                                   " update.");
2134 }
2135
2136 /**
2137  * Validate NVGRE item.
2138  *
2139  * @param[in] item
2140  *   Item specification.
2141  * @param[in] item_flags
2142  *   Bit flags to mark detected items.
2143  * @param[in] target_protocol
2144  *   The next protocol in the previous item.
2145  * @param[out] error
2146  *   Pointer to error structure.
2147  *
2148  * @return
2149  *   0 on success, a negative errno value otherwise and rte_errno is set.
2150  */
2151 int
2152 mlx5_flow_validate_item_nvgre(const struct rte_flow_item *item,
2153                               uint64_t item_flags,
2154                               uint8_t target_protocol,
2155                               struct rte_flow_error *error)
2156 {
2157         const struct rte_flow_item_nvgre *mask = item->mask;
2158         int ret;
2159
2160         if (target_protocol != 0xff && target_protocol != IPPROTO_GRE)
2161                 return rte_flow_error_set(error, EINVAL,
2162                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2163                                           "protocol filtering not compatible"
2164                                           " with this GRE layer");
2165         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2166                 return rte_flow_error_set(error, ENOTSUP,
2167                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2168                                           "multiple tunnel layers not"
2169                                           " supported");
2170         if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3))
2171                 return rte_flow_error_set(error, ENOTSUP,
2172                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2173                                           "L3 Layer is missing");
2174         if (!mask)
2175                 mask = &rte_flow_item_nvgre_mask;
2176         ret = mlx5_flow_item_acceptable
2177                 (item, (const uint8_t *)mask,
2178                  (const uint8_t *)&rte_flow_item_nvgre_mask,
2179                  sizeof(struct rte_flow_item_nvgre), error);
2180         if (ret < 0)
2181                 return ret;
2182         return 0;
2183 }
2184
2185 static int
2186 flow_null_validate(struct rte_eth_dev *dev __rte_unused,
2187                    const struct rte_flow_attr *attr __rte_unused,
2188                    const struct rte_flow_item items[] __rte_unused,
2189                    const struct rte_flow_action actions[] __rte_unused,
2190                    bool external __rte_unused,
2191                    struct rte_flow_error *error)
2192 {
2193         return rte_flow_error_set(error, ENOTSUP,
2194                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
2195 }
2196
2197 static struct mlx5_flow *
2198 flow_null_prepare(const struct rte_flow_attr *attr __rte_unused,
2199                   const struct rte_flow_item items[] __rte_unused,
2200                   const struct rte_flow_action actions[] __rte_unused,
2201                   struct rte_flow_error *error)
2202 {
2203         rte_flow_error_set(error, ENOTSUP,
2204                            RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
2205         return NULL;
2206 }
2207
2208 static int
2209 flow_null_translate(struct rte_eth_dev *dev __rte_unused,
2210                     struct mlx5_flow *dev_flow __rte_unused,
2211                     const struct rte_flow_attr *attr __rte_unused,
2212                     const struct rte_flow_item items[] __rte_unused,
2213                     const struct rte_flow_action actions[] __rte_unused,
2214                     struct rte_flow_error *error)
2215 {
2216         return rte_flow_error_set(error, ENOTSUP,
2217                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
2218 }
2219
2220 static int
2221 flow_null_apply(struct rte_eth_dev *dev __rte_unused,
2222                 struct rte_flow *flow __rte_unused,
2223                 struct rte_flow_error *error)
2224 {
2225         return rte_flow_error_set(error, ENOTSUP,
2226                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
2227 }
2228
2229 static void
2230 flow_null_remove(struct rte_eth_dev *dev __rte_unused,
2231                  struct rte_flow *flow __rte_unused)
2232 {
2233 }
2234
2235 static void
2236 flow_null_destroy(struct rte_eth_dev *dev __rte_unused,
2237                   struct rte_flow *flow __rte_unused)
2238 {
2239 }
2240
2241 static int
2242 flow_null_query(struct rte_eth_dev *dev __rte_unused,
2243                 struct rte_flow *flow __rte_unused,
2244                 const struct rte_flow_action *actions __rte_unused,
2245                 void *data __rte_unused,
2246                 struct rte_flow_error *error)
2247 {
2248         return rte_flow_error_set(error, ENOTSUP,
2249                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL, NULL);
2250 }
2251
2252 /* Void driver to protect from null pointer reference. */
2253 const struct mlx5_flow_driver_ops mlx5_flow_null_drv_ops = {
2254         .validate = flow_null_validate,
2255         .prepare = flow_null_prepare,
2256         .translate = flow_null_translate,
2257         .apply = flow_null_apply,
2258         .remove = flow_null_remove,
2259         .destroy = flow_null_destroy,
2260         .query = flow_null_query,
2261 };
2262
2263 /**
2264  * Select flow driver type according to flow attributes and device
2265  * configuration.
2266  *
2267  * @param[in] dev
2268  *   Pointer to the dev structure.
2269  * @param[in] attr
2270  *   Pointer to the flow attributes.
2271  *
2272  * @return
2273  *   flow driver type, MLX5_FLOW_TYPE_MAX otherwise.
2274  */
2275 static enum mlx5_flow_drv_type
2276 flow_get_drv_type(struct rte_eth_dev *dev, const struct rte_flow_attr *attr)
2277 {
2278         struct mlx5_priv *priv = dev->data->dev_private;
2279         enum mlx5_flow_drv_type type = MLX5_FLOW_TYPE_MAX;
2280
2281         if (attr->transfer && priv->config.dv_esw_en)
2282                 type = MLX5_FLOW_TYPE_DV;
2283         if (!attr->transfer)
2284                 type = priv->config.dv_flow_en ? MLX5_FLOW_TYPE_DV :
2285                                                  MLX5_FLOW_TYPE_VERBS;
2286         return type;
2287 }
2288
2289 #define flow_get_drv_ops(type) flow_drv_ops[type]
2290
2291 /**
2292  * Flow driver validation API. This abstracts calling driver specific functions.
2293  * The type of flow driver is determined according to flow attributes.
2294  *
2295  * @param[in] dev
2296  *   Pointer to the dev structure.
2297  * @param[in] attr
2298  *   Pointer to the flow attributes.
2299  * @param[in] items
2300  *   Pointer to the list of items.
2301  * @param[in] actions
2302  *   Pointer to the list of actions.
2303  * @param[in] external
2304  *   This flow rule is created by request external to PMD.
2305  * @param[out] error
2306  *   Pointer to the error structure.
2307  *
2308  * @return
2309  *   0 on success, a negative errno value otherwise and rte_errno is set.
2310  */
2311 static inline int
2312 flow_drv_validate(struct rte_eth_dev *dev,
2313                   const struct rte_flow_attr *attr,
2314                   const struct rte_flow_item items[],
2315                   const struct rte_flow_action actions[],
2316                   bool external, struct rte_flow_error *error)
2317 {
2318         const struct mlx5_flow_driver_ops *fops;
2319         enum mlx5_flow_drv_type type = flow_get_drv_type(dev, attr);
2320
2321         fops = flow_get_drv_ops(type);
2322         return fops->validate(dev, attr, items, actions, external, error);
2323 }
2324
2325 /**
2326  * Flow driver preparation API. This abstracts calling driver specific
2327  * functions. Parent flow (rte_flow) should have driver type (drv_type). It
2328  * calculates the size of memory required for device flow, allocates the memory,
2329  * initializes the device flow and returns the pointer.
2330  *
2331  * @note
2332  *   This function initializes device flow structure such as dv or verbs in
2333  *   struct mlx5_flow. However, it is caller's responsibility to initialize the
2334  *   rest. For example, adding returning device flow to flow->dev_flow list and
2335  *   setting backward reference to the flow should be done out of this function.
2336  *   layers field is not filled either.
2337  *
2338  * @param[in] attr
2339  *   Pointer to the flow attributes.
2340  * @param[in] items
2341  *   Pointer to the list of items.
2342  * @param[in] actions
2343  *   Pointer to the list of actions.
2344  * @param[out] error
2345  *   Pointer to the error structure.
2346  *
2347  * @return
2348  *   Pointer to device flow on success, otherwise NULL and rte_errno is set.
2349  */
2350 static inline struct mlx5_flow *
2351 flow_drv_prepare(const struct rte_flow *flow,
2352                  const struct rte_flow_attr *attr,
2353                  const struct rte_flow_item items[],
2354                  const struct rte_flow_action actions[],
2355                  struct rte_flow_error *error)
2356 {
2357         const struct mlx5_flow_driver_ops *fops;
2358         enum mlx5_flow_drv_type type = flow->drv_type;
2359
2360         assert(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
2361         fops = flow_get_drv_ops(type);
2362         return fops->prepare(attr, items, actions, error);
2363 }
2364
2365 /**
2366  * Flow driver translation API. This abstracts calling driver specific
2367  * functions. Parent flow (rte_flow) should have driver type (drv_type). It
2368  * translates a generic flow into a driver flow. flow_drv_prepare() must
2369  * precede.
2370  *
2371  * @note
2372  *   dev_flow->layers could be filled as a result of parsing during translation
2373  *   if needed by flow_drv_apply(). dev_flow->flow->actions can also be filled
2374  *   if necessary. As a flow can have multiple dev_flows by RSS flow expansion,
2375  *   flow->actions could be overwritten even though all the expanded dev_flows
2376  *   have the same actions.
2377  *
2378  * @param[in] dev
2379  *   Pointer to the rte dev structure.
2380  * @param[in, out] dev_flow
2381  *   Pointer to the mlx5 flow.
2382  * @param[in] attr
2383  *   Pointer to the flow attributes.
2384  * @param[in] items
2385  *   Pointer to the list of items.
2386  * @param[in] actions
2387  *   Pointer to the list of actions.
2388  * @param[out] error
2389  *   Pointer to the error structure.
2390  *
2391  * @return
2392  *   0 on success, a negative errno value otherwise and rte_errno is set.
2393  */
2394 static inline int
2395 flow_drv_translate(struct rte_eth_dev *dev, struct mlx5_flow *dev_flow,
2396                    const struct rte_flow_attr *attr,
2397                    const struct rte_flow_item items[],
2398                    const struct rte_flow_action actions[],
2399                    struct rte_flow_error *error)
2400 {
2401         const struct mlx5_flow_driver_ops *fops;
2402         enum mlx5_flow_drv_type type = dev_flow->flow->drv_type;
2403
2404         assert(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
2405         fops = flow_get_drv_ops(type);
2406         return fops->translate(dev, dev_flow, attr, items, actions, error);
2407 }
2408
2409 /**
2410  * Flow driver apply API. This abstracts calling driver specific functions.
2411  * Parent flow (rte_flow) should have driver type (drv_type). It applies
2412  * translated driver flows on to device. flow_drv_translate() must precede.
2413  *
2414  * @param[in] dev
2415  *   Pointer to Ethernet device structure.
2416  * @param[in, out] flow
2417  *   Pointer to flow structure.
2418  * @param[out] error
2419  *   Pointer to error structure.
2420  *
2421  * @return
2422  *   0 on success, a negative errno value otherwise and rte_errno is set.
2423  */
2424 static inline int
2425 flow_drv_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
2426                struct rte_flow_error *error)
2427 {
2428         const struct mlx5_flow_driver_ops *fops;
2429         enum mlx5_flow_drv_type type = flow->drv_type;
2430
2431         assert(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
2432         fops = flow_get_drv_ops(type);
2433         return fops->apply(dev, flow, error);
2434 }
2435
2436 /**
2437  * Flow driver remove API. This abstracts calling driver specific functions.
2438  * Parent flow (rte_flow) should have driver type (drv_type). It removes a flow
2439  * on device. All the resources of the flow should be freed by calling
2440  * flow_drv_destroy().
2441  *
2442  * @param[in] dev
2443  *   Pointer to Ethernet device.
2444  * @param[in, out] flow
2445  *   Pointer to flow structure.
2446  */
2447 static inline void
2448 flow_drv_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
2449 {
2450         const struct mlx5_flow_driver_ops *fops;
2451         enum mlx5_flow_drv_type type = flow->drv_type;
2452
2453         assert(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
2454         fops = flow_get_drv_ops(type);
2455         fops->remove(dev, flow);
2456 }
2457
2458 /**
2459  * Flow driver destroy API. This abstracts calling driver specific functions.
2460  * Parent flow (rte_flow) should have driver type (drv_type). It removes a flow
2461  * on device and releases resources of the flow.
2462  *
2463  * @param[in] dev
2464  *   Pointer to Ethernet device.
2465  * @param[in, out] flow
2466  *   Pointer to flow structure.
2467  */
2468 static inline void
2469 flow_drv_destroy(struct rte_eth_dev *dev, struct rte_flow *flow)
2470 {
2471         const struct mlx5_flow_driver_ops *fops;
2472         enum mlx5_flow_drv_type type = flow->drv_type;
2473
2474         assert(type > MLX5_FLOW_TYPE_MIN && type < MLX5_FLOW_TYPE_MAX);
2475         fops = flow_get_drv_ops(type);
2476         fops->destroy(dev, flow);
2477 }
2478
2479 /**
2480  * Validate a flow supported by the NIC.
2481  *
2482  * @see rte_flow_validate()
2483  * @see rte_flow_ops
2484  */
2485 int
2486 mlx5_flow_validate(struct rte_eth_dev *dev,
2487                    const struct rte_flow_attr *attr,
2488                    const struct rte_flow_item items[],
2489                    const struct rte_flow_action actions[],
2490                    struct rte_flow_error *error)
2491 {
2492         int ret;
2493
2494         ret = flow_drv_validate(dev, attr, items, actions, true, error);
2495         if (ret < 0)
2496                 return ret;
2497         return 0;
2498 }
2499
2500 /**
2501  * Get RSS action from the action list.
2502  *
2503  * @param[in] actions
2504  *   Pointer to the list of actions.
2505  *
2506  * @return
2507  *   Pointer to the RSS action if exist, else return NULL.
2508  */
2509 static const struct rte_flow_action_rss*
2510 flow_get_rss_action(const struct rte_flow_action actions[])
2511 {
2512         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
2513                 switch (actions->type) {
2514                 case RTE_FLOW_ACTION_TYPE_RSS:
2515                         return (const struct rte_flow_action_rss *)
2516                                actions->conf;
2517                 default:
2518                         break;
2519                 }
2520         }
2521         return NULL;
2522 }
2523
2524 static unsigned int
2525 find_graph_root(const struct rte_flow_item pattern[], uint32_t rss_level)
2526 {
2527         const struct rte_flow_item *item;
2528         unsigned int has_vlan = 0;
2529
2530         for (item = pattern; item->type != RTE_FLOW_ITEM_TYPE_END; item++) {
2531                 if (item->type == RTE_FLOW_ITEM_TYPE_VLAN) {
2532                         has_vlan = 1;
2533                         break;
2534                 }
2535         }
2536         if (has_vlan)
2537                 return rss_level < 2 ? MLX5_EXPANSION_ROOT_ETH_VLAN :
2538                                        MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN;
2539         return rss_level < 2 ? MLX5_EXPANSION_ROOT :
2540                                MLX5_EXPANSION_ROOT_OUTER;
2541 }
2542
2543 /**
2544  * Check if the flow should be splited due to hairpin.
2545  * The reason for the split is that in current HW we can't
2546  * support encap on Rx, so if a flow have encap we move it
2547  * to Tx.
2548  *
2549  * @param dev
2550  *   Pointer to Ethernet device.
2551  * @param[in] attr
2552  *   Flow rule attributes.
2553  * @param[in] actions
2554  *   Associated actions (list terminated by the END action).
2555  *
2556  * @return
2557  *   > 0 the number of actions and the flow should be split,
2558  *   0 when no split required.
2559  */
2560 static int
2561 flow_check_hairpin_split(struct rte_eth_dev *dev,
2562                          const struct rte_flow_attr *attr,
2563                          const struct rte_flow_action actions[])
2564 {
2565         int queue_action = 0;
2566         int action_n = 0;
2567         int encap = 0;
2568         const struct rte_flow_action_queue *queue;
2569         const struct rte_flow_action_rss *rss;
2570         const struct rte_flow_action_raw_encap *raw_encap;
2571
2572         if (!attr->ingress)
2573                 return 0;
2574         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
2575                 switch (actions->type) {
2576                 case RTE_FLOW_ACTION_TYPE_QUEUE:
2577                         queue = actions->conf;
2578                         if (mlx5_rxq_get_type(dev, queue->index) !=
2579                             MLX5_RXQ_TYPE_HAIRPIN)
2580                                 return 0;
2581                         queue_action = 1;
2582                         action_n++;
2583                         break;
2584                 case RTE_FLOW_ACTION_TYPE_RSS:
2585                         rss = actions->conf;
2586                         if (mlx5_rxq_get_type(dev, rss->queue[0]) !=
2587                             MLX5_RXQ_TYPE_HAIRPIN)
2588                                 return 0;
2589                         queue_action = 1;
2590                         action_n++;
2591                         break;
2592                 case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
2593                 case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
2594                         encap = 1;
2595                         action_n++;
2596                         break;
2597                 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
2598                         raw_encap = actions->conf;
2599                         if (raw_encap->size >
2600                             (sizeof(struct rte_flow_item_eth) +
2601                              sizeof(struct rte_flow_item_ipv4)))
2602                                 encap = 1;
2603                         action_n++;
2604                         break;
2605                 default:
2606                         action_n++;
2607                         break;
2608                 }
2609         }
2610         if (encap == 1 && queue_action)
2611                 return action_n;
2612         return 0;
2613 }
2614
2615 #define MLX5_MAX_SPLIT_ACTIONS 24
2616 #define MLX5_MAX_SPLIT_ITEMS 24
2617
2618 /**
2619  * Split the hairpin flow.
2620  * Since HW can't support encap on Rx we move the encap to Tx.
2621  * If the count action is after the encap then we also
2622  * move the count action. in this case the count will also measure
2623  * the outer bytes.
2624  *
2625  * @param dev
2626  *   Pointer to Ethernet device.
2627  * @param[in] actions
2628  *   Associated actions (list terminated by the END action).
2629  * @param[out] actions_rx
2630  *   Rx flow actions.
2631  * @param[out] actions_tx
2632  *   Tx flow actions..
2633  * @param[out] pattern_tx
2634  *   The pattern items for the Tx flow.
2635  * @param[out] flow_id
2636  *   The flow ID connected to this flow.
2637  *
2638  * @return
2639  *   0 on success.
2640  */
2641 static int
2642 flow_hairpin_split(struct rte_eth_dev *dev,
2643                    const struct rte_flow_action actions[],
2644                    struct rte_flow_action actions_rx[],
2645                    struct rte_flow_action actions_tx[],
2646                    struct rte_flow_item pattern_tx[],
2647                    uint32_t *flow_id)
2648 {
2649         struct mlx5_priv *priv = dev->data->dev_private;
2650         const struct rte_flow_action_raw_encap *raw_encap;
2651         const struct rte_flow_action_raw_decap *raw_decap;
2652         struct mlx5_rte_flow_action_set_tag *set_tag;
2653         struct rte_flow_action *tag_action;
2654         struct mlx5_rte_flow_item_tag *tag_item;
2655         struct rte_flow_item *item;
2656         char *addr;
2657         struct rte_flow_error error;
2658         int encap = 0;
2659
2660         mlx5_flow_id_get(priv->sh->flow_id_pool, flow_id);
2661         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
2662                 switch (actions->type) {
2663                 case RTE_FLOW_ACTION_TYPE_VXLAN_ENCAP:
2664                 case RTE_FLOW_ACTION_TYPE_NVGRE_ENCAP:
2665                         rte_memcpy(actions_tx, actions,
2666                                sizeof(struct rte_flow_action));
2667                         actions_tx++;
2668                         break;
2669                 case RTE_FLOW_ACTION_TYPE_COUNT:
2670                         if (encap) {
2671                                 rte_memcpy(actions_tx, actions,
2672                                            sizeof(struct rte_flow_action));
2673                                 actions_tx++;
2674                         } else {
2675                                 rte_memcpy(actions_rx, actions,
2676                                            sizeof(struct rte_flow_action));
2677                                 actions_rx++;
2678                         }
2679                         break;
2680                 case RTE_FLOW_ACTION_TYPE_RAW_ENCAP:
2681                         raw_encap = actions->conf;
2682                         if (raw_encap->size >
2683                             (sizeof(struct rte_flow_item_eth) +
2684                              sizeof(struct rte_flow_item_ipv4))) {
2685                                 memcpy(actions_tx, actions,
2686                                        sizeof(struct rte_flow_action));
2687                                 actions_tx++;
2688                                 encap = 1;
2689                         } else {
2690                                 rte_memcpy(actions_rx, actions,
2691                                            sizeof(struct rte_flow_action));
2692                                 actions_rx++;
2693                         }
2694                         break;
2695                 case RTE_FLOW_ACTION_TYPE_RAW_DECAP:
2696                         raw_decap = actions->conf;
2697                         if (raw_decap->size <
2698                             (sizeof(struct rte_flow_item_eth) +
2699                              sizeof(struct rte_flow_item_ipv4))) {
2700                                 memcpy(actions_tx, actions,
2701                                        sizeof(struct rte_flow_action));
2702                                 actions_tx++;
2703                         } else {
2704                                 rte_memcpy(actions_rx, actions,
2705                                            sizeof(struct rte_flow_action));
2706                                 actions_rx++;
2707                         }
2708                         break;
2709                 default:
2710                         rte_memcpy(actions_rx, actions,
2711                                    sizeof(struct rte_flow_action));
2712                         actions_rx++;
2713                         break;
2714                 }
2715         }
2716         /* Add set meta action and end action for the Rx flow. */
2717         tag_action = actions_rx;
2718         tag_action->type = MLX5_RTE_FLOW_ACTION_TYPE_TAG;
2719         actions_rx++;
2720         rte_memcpy(actions_rx, actions, sizeof(struct rte_flow_action));
2721         actions_rx++;
2722         set_tag = (void *)actions_rx;
2723         set_tag->id = flow_get_reg_id(dev, MLX5_HAIRPIN_RX, 0, &error);
2724         set_tag->data = rte_cpu_to_be_32(*flow_id);
2725         tag_action->conf = set_tag;
2726         /* Create Tx item list. */
2727         rte_memcpy(actions_tx, actions, sizeof(struct rte_flow_action));
2728         addr = (void *)&pattern_tx[2];
2729         item = pattern_tx;
2730         item->type = MLX5_RTE_FLOW_ITEM_TYPE_TAG;
2731         tag_item = (void *)addr;
2732         tag_item->data = rte_cpu_to_be_32(*flow_id);
2733         tag_item->id = flow_get_reg_id(dev, MLX5_HAIRPIN_TX, 0, &error);
2734         item->spec = tag_item;
2735         addr += sizeof(struct mlx5_rte_flow_item_tag);
2736         tag_item = (void *)addr;
2737         tag_item->data = UINT32_MAX;
2738         tag_item->id = UINT16_MAX;
2739         item->mask = tag_item;
2740         addr += sizeof(struct mlx5_rte_flow_item_tag);
2741         item->last = NULL;
2742         item++;
2743         item->type = RTE_FLOW_ITEM_TYPE_END;
2744         return 0;
2745 }
2746
2747 /**
2748  * Create a flow and add it to @p list.
2749  *
2750  * @param dev
2751  *   Pointer to Ethernet device.
2752  * @param list
2753  *   Pointer to a TAILQ flow list.
2754  * @param[in] attr
2755  *   Flow rule attributes.
2756  * @param[in] items
2757  *   Pattern specification (list terminated by the END pattern item).
2758  * @param[in] actions
2759  *   Associated actions (list terminated by the END action).
2760  * @param[in] external
2761  *   This flow rule is created by request external to PMD.
2762  * @param[out] error
2763  *   Perform verbose error reporting if not NULL.
2764  *
2765  * @return
2766  *   A flow on success, NULL otherwise and rte_errno is set.
2767  */
2768 static struct rte_flow *
2769 flow_list_create(struct rte_eth_dev *dev, struct mlx5_flows *list,
2770                  const struct rte_flow_attr *attr,
2771                  const struct rte_flow_item items[],
2772                  const struct rte_flow_action actions[],
2773                  bool external, struct rte_flow_error *error)
2774 {
2775         struct mlx5_priv *priv = dev->data->dev_private;
2776         struct rte_flow *flow = NULL;
2777         struct mlx5_flow *dev_flow;
2778         const struct rte_flow_action_rss *rss;
2779         union {
2780                 struct rte_flow_expand_rss buf;
2781                 uint8_t buffer[2048];
2782         } expand_buffer;
2783         union {
2784                 struct rte_flow_action actions[MLX5_MAX_SPLIT_ACTIONS];
2785                 uint8_t buffer[2048];
2786         } actions_rx;
2787         union {
2788                 struct rte_flow_action actions[MLX5_MAX_SPLIT_ACTIONS];
2789                 uint8_t buffer[2048];
2790         } actions_hairpin_tx;
2791         union {
2792                 struct rte_flow_item items[MLX5_MAX_SPLIT_ITEMS];
2793                 uint8_t buffer[2048];
2794         } items_tx;
2795         struct rte_flow_expand_rss *buf = &expand_buffer.buf;
2796         const struct rte_flow_action *p_actions_rx = actions;
2797         int ret;
2798         uint32_t i;
2799         uint32_t flow_size;
2800         int hairpin_flow = 0;
2801         uint32_t hairpin_id = 0;
2802         struct rte_flow_attr attr_tx = { .priority = 0 };
2803
2804         hairpin_flow = flow_check_hairpin_split(dev, attr, actions);
2805         if (hairpin_flow > 0) {
2806                 if (hairpin_flow > MLX5_MAX_SPLIT_ACTIONS) {
2807                         rte_errno = EINVAL;
2808                         return NULL;
2809                 }
2810                 flow_hairpin_split(dev, actions, actions_rx.actions,
2811                                    actions_hairpin_tx.actions, items_tx.items,
2812                                    &hairpin_id);
2813                 p_actions_rx = actions_rx.actions;
2814         }
2815         ret = flow_drv_validate(dev, attr, items, p_actions_rx, external,
2816                                 error);
2817         if (ret < 0)
2818                 goto error_before_flow;
2819         flow_size = sizeof(struct rte_flow);
2820         rss = flow_get_rss_action(p_actions_rx);
2821         if (rss)
2822                 flow_size += RTE_ALIGN_CEIL(rss->queue_num * sizeof(uint16_t),
2823                                             sizeof(void *));
2824         else
2825                 flow_size += RTE_ALIGN_CEIL(sizeof(uint16_t), sizeof(void *));
2826         flow = rte_calloc(__func__, 1, flow_size, 0);
2827         if (!flow) {
2828                 rte_errno = ENOMEM;
2829                 goto error_before_flow;
2830         }
2831         flow->drv_type = flow_get_drv_type(dev, attr);
2832         flow->ingress = attr->ingress;
2833         flow->transfer = attr->transfer;
2834         if (hairpin_id != 0)
2835                 flow->hairpin_flow_id = hairpin_id;
2836         assert(flow->drv_type > MLX5_FLOW_TYPE_MIN &&
2837                flow->drv_type < MLX5_FLOW_TYPE_MAX);
2838         flow->queue = (void *)(flow + 1);
2839         LIST_INIT(&flow->dev_flows);
2840         if (rss && rss->types) {
2841                 unsigned int graph_root;
2842
2843                 graph_root = find_graph_root(items, rss->level);
2844                 ret = rte_flow_expand_rss(buf, sizeof(expand_buffer.buffer),
2845                                           items, rss->types,
2846                                           mlx5_support_expansion,
2847                                           graph_root);
2848                 assert(ret > 0 &&
2849                        (unsigned int)ret < sizeof(expand_buffer.buffer));
2850         } else {
2851                 buf->entries = 1;
2852                 buf->entry[0].pattern = (void *)(uintptr_t)items;
2853         }
2854         for (i = 0; i < buf->entries; ++i) {
2855                 dev_flow = flow_drv_prepare(flow, attr, buf->entry[i].pattern,
2856                                             p_actions_rx, error);
2857                 if (!dev_flow)
2858                         goto error;
2859                 dev_flow->flow = flow;
2860                 dev_flow->external = external;
2861                 LIST_INSERT_HEAD(&flow->dev_flows, dev_flow, next);
2862                 ret = flow_drv_translate(dev, dev_flow, attr,
2863                                          buf->entry[i].pattern,
2864                                          p_actions_rx, error);
2865                 if (ret < 0)
2866                         goto error;
2867         }
2868         /* Create the tx flow. */
2869         if (hairpin_flow) {
2870                 attr_tx.group = MLX5_HAIRPIN_TX_TABLE;
2871                 attr_tx.ingress = 0;
2872                 attr_tx.egress = 1;
2873                 dev_flow = flow_drv_prepare(flow, &attr_tx, items_tx.items,
2874                                             actions_hairpin_tx.actions, error);
2875                 if (!dev_flow)
2876                         goto error;
2877                 dev_flow->flow = flow;
2878                 LIST_INSERT_HEAD(&flow->dev_flows, dev_flow, next);
2879                 ret = flow_drv_translate(dev, dev_flow, &attr_tx,
2880                                          items_tx.items,
2881                                          actions_hairpin_tx.actions, error);
2882                 if (ret < 0)
2883                         goto error;
2884         }
2885         if (dev->data->dev_started) {
2886                 ret = flow_drv_apply(dev, flow, error);
2887                 if (ret < 0)
2888                         goto error;
2889         }
2890         TAILQ_INSERT_TAIL(list, flow, next);
2891         flow_rxq_flags_set(dev, flow);
2892         return flow;
2893 error_before_flow:
2894         if (hairpin_id)
2895                 mlx5_flow_id_release(priv->sh->flow_id_pool,
2896                                      hairpin_id);
2897         return NULL;
2898 error:
2899         ret = rte_errno; /* Save rte_errno before cleanup. */
2900         if (flow->hairpin_flow_id)
2901                 mlx5_flow_id_release(priv->sh->flow_id_pool,
2902                                      flow->hairpin_flow_id);
2903         assert(flow);
2904         flow_drv_destroy(dev, flow);
2905         rte_free(flow);
2906         rte_errno = ret; /* Restore rte_errno. */
2907         return NULL;
2908 }
2909
2910 /**
2911  * Create a dedicated flow rule on e-switch table 0 (root table), to direct all
2912  * incoming packets to table 1.
2913  *
2914  * Other flow rules, requested for group n, will be created in
2915  * e-switch table n+1.
2916  * Jump action to e-switch group n will be created to group n+1.
2917  *
2918  * Used when working in switchdev mode, to utilise advantages of table 1
2919  * and above.
2920  *
2921  * @param dev
2922  *   Pointer to Ethernet device.
2923  *
2924  * @return
2925  *   Pointer to flow on success, NULL otherwise and rte_errno is set.
2926  */
2927 struct rte_flow *
2928 mlx5_flow_create_esw_table_zero_flow(struct rte_eth_dev *dev)
2929 {
2930         const struct rte_flow_attr attr = {
2931                 .group = 0,
2932                 .priority = 0,
2933                 .ingress = 1,
2934                 .egress = 0,
2935                 .transfer = 1,
2936         };
2937         const struct rte_flow_item pattern = {
2938                 .type = RTE_FLOW_ITEM_TYPE_END,
2939         };
2940         struct rte_flow_action_jump jump = {
2941                 .group = 1,
2942         };
2943         const struct rte_flow_action actions[] = {
2944                 {
2945                         .type = RTE_FLOW_ACTION_TYPE_JUMP,
2946                         .conf = &jump,
2947                 },
2948                 {
2949                         .type = RTE_FLOW_ACTION_TYPE_END,
2950                 },
2951         };
2952         struct mlx5_priv *priv = dev->data->dev_private;
2953         struct rte_flow_error error;
2954
2955         return flow_list_create(dev, &priv->ctrl_flows, &attr, &pattern,
2956                                 actions, false, &error);
2957 }
2958
2959 /**
2960  * Create a flow.
2961  *
2962  * @see rte_flow_create()
2963  * @see rte_flow_ops
2964  */
2965 struct rte_flow *
2966 mlx5_flow_create(struct rte_eth_dev *dev,
2967                  const struct rte_flow_attr *attr,
2968                  const struct rte_flow_item items[],
2969                  const struct rte_flow_action actions[],
2970                  struct rte_flow_error *error)
2971 {
2972         struct mlx5_priv *priv = dev->data->dev_private;
2973
2974         return flow_list_create(dev, &priv->flows,
2975                                 attr, items, actions, true, error);
2976 }
2977
2978 /**
2979  * Destroy a flow in a list.
2980  *
2981  * @param dev
2982  *   Pointer to Ethernet device.
2983  * @param list
2984  *   Pointer to a TAILQ flow list.
2985  * @param[in] flow
2986  *   Flow to destroy.
2987  */
2988 static void
2989 flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list,
2990                   struct rte_flow *flow)
2991 {
2992         struct mlx5_priv *priv = dev->data->dev_private;
2993
2994         /*
2995          * Update RX queue flags only if port is started, otherwise it is
2996          * already clean.
2997          */
2998         if (dev->data->dev_started)
2999                 flow_rxq_flags_trim(dev, flow);
3000         if (flow->hairpin_flow_id)
3001                 mlx5_flow_id_release(priv->sh->flow_id_pool,
3002                                      flow->hairpin_flow_id);
3003         flow_drv_destroy(dev, flow);
3004         TAILQ_REMOVE(list, flow, next);
3005         rte_free(flow->fdir);
3006         rte_free(flow);
3007 }
3008
3009 /**
3010  * Destroy all flows.
3011  *
3012  * @param dev
3013  *   Pointer to Ethernet device.
3014  * @param list
3015  *   Pointer to a TAILQ flow list.
3016  */
3017 void
3018 mlx5_flow_list_flush(struct rte_eth_dev *dev, struct mlx5_flows *list)
3019 {
3020         while (!TAILQ_EMPTY(list)) {
3021                 struct rte_flow *flow;
3022
3023                 flow = TAILQ_FIRST(list);
3024                 flow_list_destroy(dev, list, flow);
3025         }
3026 }
3027
3028 /**
3029  * Remove all flows.
3030  *
3031  * @param dev
3032  *   Pointer to Ethernet device.
3033  * @param list
3034  *   Pointer to a TAILQ flow list.
3035  */
3036 void
3037 mlx5_flow_stop(struct rte_eth_dev *dev, struct mlx5_flows *list)
3038 {
3039         struct rte_flow *flow;
3040
3041         TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next)
3042                 flow_drv_remove(dev, flow);
3043         flow_rxq_flags_clear(dev);
3044 }
3045
3046 /**
3047  * Add all flows.
3048  *
3049  * @param dev
3050  *   Pointer to Ethernet device.
3051  * @param list
3052  *   Pointer to a TAILQ flow list.
3053  *
3054  * @return
3055  *   0 on success, a negative errno value otherwise and rte_errno is set.
3056  */
3057 int
3058 mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list)
3059 {
3060         struct rte_flow *flow;
3061         struct rte_flow_error error;
3062         int ret = 0;
3063
3064         TAILQ_FOREACH(flow, list, next) {
3065                 ret = flow_drv_apply(dev, flow, &error);
3066                 if (ret < 0)
3067                         goto error;
3068                 flow_rxq_flags_set(dev, flow);
3069         }
3070         return 0;
3071 error:
3072         ret = rte_errno; /* Save rte_errno before cleanup. */
3073         mlx5_flow_stop(dev, list);
3074         rte_errno = ret; /* Restore rte_errno. */
3075         return -rte_errno;
3076 }
3077
3078 /**
3079  * Verify the flow list is empty
3080  *
3081  * @param dev
3082  *  Pointer to Ethernet device.
3083  *
3084  * @return the number of flows not released.
3085  */
3086 int
3087 mlx5_flow_verify(struct rte_eth_dev *dev)
3088 {
3089         struct mlx5_priv *priv = dev->data->dev_private;
3090         struct rte_flow *flow;
3091         int ret = 0;
3092
3093         TAILQ_FOREACH(flow, &priv->flows, next) {
3094                 DRV_LOG(DEBUG, "port %u flow %p still referenced",
3095                         dev->data->port_id, (void *)flow);
3096                 ++ret;
3097         }
3098         return ret;
3099 }
3100
3101 /**
3102  * Enable default hairpin egress flow.
3103  *
3104  * @param dev
3105  *   Pointer to Ethernet device.
3106  * @param queue
3107  *   The queue index.
3108  *
3109  * @return
3110  *   0 on success, a negative errno value otherwise and rte_errno is set.
3111  */
3112 int
3113 mlx5_ctrl_flow_source_queue(struct rte_eth_dev *dev,
3114                             uint32_t queue)
3115 {
3116         struct mlx5_priv *priv = dev->data->dev_private;
3117         const struct rte_flow_attr attr = {
3118                 .egress = 1,
3119                 .priority = 0,
3120         };
3121         struct mlx5_rte_flow_item_tx_queue queue_spec = {
3122                 .queue = queue,
3123         };
3124         struct mlx5_rte_flow_item_tx_queue queue_mask = {
3125                 .queue = UINT32_MAX,
3126         };
3127         struct rte_flow_item items[] = {
3128                 {
3129                         .type = MLX5_RTE_FLOW_ITEM_TYPE_TX_QUEUE,
3130                         .spec = &queue_spec,
3131                         .last = NULL,
3132                         .mask = &queue_mask,
3133                 },
3134                 {
3135                         .type = RTE_FLOW_ITEM_TYPE_END,
3136                 },
3137         };
3138         struct rte_flow_action_jump jump = {
3139                 .group = MLX5_HAIRPIN_TX_TABLE,
3140         };
3141         struct rte_flow_action actions[2];
3142         struct rte_flow *flow;
3143         struct rte_flow_error error;
3144
3145         actions[0].type = RTE_FLOW_ACTION_TYPE_JUMP;
3146         actions[0].conf = &jump;
3147         actions[1].type = RTE_FLOW_ACTION_TYPE_END;
3148         flow = flow_list_create(dev, &priv->ctrl_flows,
3149                                 &attr, items, actions, false, &error);
3150         if (!flow) {
3151                 DRV_LOG(DEBUG,
3152                         "Failed to create ctrl flow: rte_errno(%d),"
3153                         " type(%d), message(%s)",
3154                         rte_errno, error.type,
3155                         error.message ? error.message : " (no stated reason)");
3156                 return -rte_errno;
3157         }
3158         return 0;
3159 }
3160
3161 /**
3162  * Enable a control flow configured from the control plane.
3163  *
3164  * @param dev
3165  *   Pointer to Ethernet device.
3166  * @param eth_spec
3167  *   An Ethernet flow spec to apply.
3168  * @param eth_mask
3169  *   An Ethernet flow mask to apply.
3170  * @param vlan_spec
3171  *   A VLAN flow spec to apply.
3172  * @param vlan_mask
3173  *   A VLAN flow mask to apply.
3174  *
3175  * @return
3176  *   0 on success, a negative errno value otherwise and rte_errno is set.
3177  */
3178 int
3179 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
3180                     struct rte_flow_item_eth *eth_spec,
3181                     struct rte_flow_item_eth *eth_mask,
3182                     struct rte_flow_item_vlan *vlan_spec,
3183                     struct rte_flow_item_vlan *vlan_mask)
3184 {
3185         struct mlx5_priv *priv = dev->data->dev_private;
3186         const struct rte_flow_attr attr = {
3187                 .ingress = 1,
3188                 .priority = MLX5_FLOW_PRIO_RSVD,
3189         };
3190         struct rte_flow_item items[] = {
3191                 {
3192                         .type = RTE_FLOW_ITEM_TYPE_ETH,
3193                         .spec = eth_spec,
3194                         .last = NULL,
3195                         .mask = eth_mask,
3196                 },
3197                 {
3198                         .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
3199                                               RTE_FLOW_ITEM_TYPE_END,
3200                         .spec = vlan_spec,
3201                         .last = NULL,
3202                         .mask = vlan_mask,
3203                 },
3204                 {
3205                         .type = RTE_FLOW_ITEM_TYPE_END,
3206                 },
3207         };
3208         uint16_t queue[priv->reta_idx_n];
3209         struct rte_flow_action_rss action_rss = {
3210                 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
3211                 .level = 0,
3212                 .types = priv->rss_conf.rss_hf,
3213                 .key_len = priv->rss_conf.rss_key_len,
3214                 .queue_num = priv->reta_idx_n,
3215                 .key = priv->rss_conf.rss_key,
3216                 .queue = queue,
3217         };
3218         struct rte_flow_action actions[] = {
3219                 {
3220                         .type = RTE_FLOW_ACTION_TYPE_RSS,
3221                         .conf = &action_rss,
3222                 },
3223                 {
3224                         .type = RTE_FLOW_ACTION_TYPE_END,
3225                 },
3226         };
3227         struct rte_flow *flow;
3228         struct rte_flow_error error;
3229         unsigned int i;
3230
3231         if (!priv->reta_idx_n || !priv->rxqs_n) {
3232                 return 0;
3233         }
3234         for (i = 0; i != priv->reta_idx_n; ++i)
3235                 queue[i] = (*priv->reta_idx)[i];
3236         flow = flow_list_create(dev, &priv->ctrl_flows,
3237                                 &attr, items, actions, false, &error);
3238         if (!flow)
3239                 return -rte_errno;
3240         return 0;
3241 }
3242
3243 /**
3244  * Enable a flow control configured from the control plane.
3245  *
3246  * @param dev
3247  *   Pointer to Ethernet device.
3248  * @param eth_spec
3249  *   An Ethernet flow spec to apply.
3250  * @param eth_mask
3251  *   An Ethernet flow mask to apply.
3252  *
3253  * @return
3254  *   0 on success, a negative errno value otherwise and rte_errno is set.
3255  */
3256 int
3257 mlx5_ctrl_flow(struct rte_eth_dev *dev,
3258                struct rte_flow_item_eth *eth_spec,
3259                struct rte_flow_item_eth *eth_mask)
3260 {
3261         return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
3262 }
3263
3264 /**
3265  * Destroy a flow.
3266  *
3267  * @see rte_flow_destroy()
3268  * @see rte_flow_ops
3269  */
3270 int
3271 mlx5_flow_destroy(struct rte_eth_dev *dev,
3272                   struct rte_flow *flow,
3273                   struct rte_flow_error *error __rte_unused)
3274 {
3275         struct mlx5_priv *priv = dev->data->dev_private;
3276
3277         flow_list_destroy(dev, &priv->flows, flow);
3278         return 0;
3279 }
3280
3281 /**
3282  * Destroy all flows.
3283  *
3284  * @see rte_flow_flush()
3285  * @see rte_flow_ops
3286  */
3287 int
3288 mlx5_flow_flush(struct rte_eth_dev *dev,
3289                 struct rte_flow_error *error __rte_unused)
3290 {
3291         struct mlx5_priv *priv = dev->data->dev_private;
3292
3293         mlx5_flow_list_flush(dev, &priv->flows);
3294         return 0;
3295 }
3296
3297 /**
3298  * Isolated mode.
3299  *
3300  * @see rte_flow_isolate()
3301  * @see rte_flow_ops
3302  */
3303 int
3304 mlx5_flow_isolate(struct rte_eth_dev *dev,
3305                   int enable,
3306                   struct rte_flow_error *error)
3307 {
3308         struct mlx5_priv *priv = dev->data->dev_private;
3309
3310         if (dev->data->dev_started) {
3311                 rte_flow_error_set(error, EBUSY,
3312                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
3313                                    NULL,
3314                                    "port must be stopped first");
3315                 return -rte_errno;
3316         }
3317         priv->isolated = !!enable;
3318         if (enable)
3319                 dev->dev_ops = &mlx5_dev_ops_isolate;
3320         else
3321                 dev->dev_ops = &mlx5_dev_ops;
3322         return 0;
3323 }
3324
3325 /**
3326  * Query a flow.
3327  *
3328  * @see rte_flow_query()
3329  * @see rte_flow_ops
3330  */
3331 static int
3332 flow_drv_query(struct rte_eth_dev *dev,
3333                struct rte_flow *flow,
3334                const struct rte_flow_action *actions,
3335                void *data,
3336                struct rte_flow_error *error)
3337 {
3338         const struct mlx5_flow_driver_ops *fops;
3339         enum mlx5_flow_drv_type ftype = flow->drv_type;
3340
3341         assert(ftype > MLX5_FLOW_TYPE_MIN && ftype < MLX5_FLOW_TYPE_MAX);
3342         fops = flow_get_drv_ops(ftype);
3343
3344         return fops->query(dev, flow, actions, data, error);
3345 }
3346
3347 /**
3348  * Query a flow.
3349  *
3350  * @see rte_flow_query()
3351  * @see rte_flow_ops
3352  */
3353 int
3354 mlx5_flow_query(struct rte_eth_dev *dev,
3355                 struct rte_flow *flow,
3356                 const struct rte_flow_action *actions,
3357                 void *data,
3358                 struct rte_flow_error *error)
3359 {
3360         int ret;
3361
3362         ret = flow_drv_query(dev, flow, actions, data, error);
3363         if (ret < 0)
3364                 return ret;
3365         return 0;
3366 }
3367
3368 /**
3369  * Convert a flow director filter to a generic flow.
3370  *
3371  * @param dev
3372  *   Pointer to Ethernet device.
3373  * @param fdir_filter
3374  *   Flow director filter to add.
3375  * @param attributes
3376  *   Generic flow parameters structure.
3377  *
3378  * @return
3379  *   0 on success, a negative errno value otherwise and rte_errno is set.
3380  */
3381 static int
3382 flow_fdir_filter_convert(struct rte_eth_dev *dev,
3383                          const struct rte_eth_fdir_filter *fdir_filter,
3384                          struct mlx5_fdir *attributes)
3385 {
3386         struct mlx5_priv *priv = dev->data->dev_private;
3387         const struct rte_eth_fdir_input *input = &fdir_filter->input;
3388         const struct rte_eth_fdir_masks *mask =
3389                 &dev->data->dev_conf.fdir_conf.mask;
3390
3391         /* Validate queue number. */
3392         if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
3393                 DRV_LOG(ERR, "port %u invalid queue number %d",
3394                         dev->data->port_id, fdir_filter->action.rx_queue);
3395                 rte_errno = EINVAL;
3396                 return -rte_errno;
3397         }
3398         attributes->attr.ingress = 1;
3399         attributes->items[0] = (struct rte_flow_item) {
3400                 .type = RTE_FLOW_ITEM_TYPE_ETH,
3401                 .spec = &attributes->l2,
3402                 .mask = &attributes->l2_mask,
3403         };
3404         switch (fdir_filter->action.behavior) {
3405         case RTE_ETH_FDIR_ACCEPT:
3406                 attributes->actions[0] = (struct rte_flow_action){
3407                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
3408                         .conf = &attributes->queue,
3409                 };
3410                 break;
3411         case RTE_ETH_FDIR_REJECT:
3412                 attributes->actions[0] = (struct rte_flow_action){
3413                         .type = RTE_FLOW_ACTION_TYPE_DROP,
3414                 };
3415                 break;
3416         default:
3417                 DRV_LOG(ERR, "port %u invalid behavior %d",
3418                         dev->data->port_id,
3419                         fdir_filter->action.behavior);
3420                 rte_errno = ENOTSUP;
3421                 return -rte_errno;
3422         }
3423         attributes->queue.index = fdir_filter->action.rx_queue;
3424         /* Handle L3. */
3425         switch (fdir_filter->input.flow_type) {
3426         case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
3427         case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
3428         case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
3429                 attributes->l3.ipv4.hdr = (struct rte_ipv4_hdr){
3430                         .src_addr = input->flow.ip4_flow.src_ip,
3431                         .dst_addr = input->flow.ip4_flow.dst_ip,
3432                         .time_to_live = input->flow.ip4_flow.ttl,
3433                         .type_of_service = input->flow.ip4_flow.tos,
3434                 };
3435                 attributes->l3_mask.ipv4.hdr = (struct rte_ipv4_hdr){
3436                         .src_addr = mask->ipv4_mask.src_ip,
3437                         .dst_addr = mask->ipv4_mask.dst_ip,
3438                         .time_to_live = mask->ipv4_mask.ttl,
3439                         .type_of_service = mask->ipv4_mask.tos,
3440                         .next_proto_id = mask->ipv4_mask.proto,
3441                 };
3442                 attributes->items[1] = (struct rte_flow_item){
3443                         .type = RTE_FLOW_ITEM_TYPE_IPV4,
3444                         .spec = &attributes->l3,
3445                         .mask = &attributes->l3_mask,
3446                 };
3447                 break;
3448         case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
3449         case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
3450         case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
3451                 attributes->l3.ipv6.hdr = (struct rte_ipv6_hdr){
3452                         .hop_limits = input->flow.ipv6_flow.hop_limits,
3453                         .proto = input->flow.ipv6_flow.proto,
3454                 };
3455
3456                 memcpy(attributes->l3.ipv6.hdr.src_addr,
3457                        input->flow.ipv6_flow.src_ip,
3458                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
3459                 memcpy(attributes->l3.ipv6.hdr.dst_addr,
3460                        input->flow.ipv6_flow.dst_ip,
3461                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
3462                 memcpy(attributes->l3_mask.ipv6.hdr.src_addr,
3463                        mask->ipv6_mask.src_ip,
3464                        RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
3465                 memcpy(attributes->l3_mask.ipv6.hdr.dst_addr,
3466                        mask->ipv6_mask.dst_ip,
3467                        RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
3468                 attributes->items[1] = (struct rte_flow_item){
3469                         .type = RTE_FLOW_ITEM_TYPE_IPV6,
3470                         .spec = &attributes->l3,
3471                         .mask = &attributes->l3_mask,
3472                 };
3473                 break;
3474         default:
3475                 DRV_LOG(ERR, "port %u invalid flow type%d",
3476                         dev->data->port_id, fdir_filter->input.flow_type);
3477                 rte_errno = ENOTSUP;
3478                 return -rte_errno;
3479         }
3480         /* Handle L4. */
3481         switch (fdir_filter->input.flow_type) {
3482         case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
3483                 attributes->l4.udp.hdr = (struct rte_udp_hdr){
3484                         .src_port = input->flow.udp4_flow.src_port,
3485                         .dst_port = input->flow.udp4_flow.dst_port,
3486                 };
3487                 attributes->l4_mask.udp.hdr = (struct rte_udp_hdr){
3488                         .src_port = mask->src_port_mask,
3489                         .dst_port = mask->dst_port_mask,
3490                 };
3491                 attributes->items[2] = (struct rte_flow_item){
3492                         .type = RTE_FLOW_ITEM_TYPE_UDP,
3493                         .spec = &attributes->l4,
3494                         .mask = &attributes->l4_mask,
3495                 };
3496                 break;
3497         case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
3498                 attributes->l4.tcp.hdr = (struct rte_tcp_hdr){
3499                         .src_port = input->flow.tcp4_flow.src_port,
3500                         .dst_port = input->flow.tcp4_flow.dst_port,
3501                 };
3502                 attributes->l4_mask.tcp.hdr = (struct rte_tcp_hdr){
3503                         .src_port = mask->src_port_mask,
3504                         .dst_port = mask->dst_port_mask,
3505                 };
3506                 attributes->items[2] = (struct rte_flow_item){
3507                         .type = RTE_FLOW_ITEM_TYPE_TCP,
3508                         .spec = &attributes->l4,
3509                         .mask = &attributes->l4_mask,
3510                 };
3511                 break;
3512         case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
3513                 attributes->l4.udp.hdr = (struct rte_udp_hdr){
3514                         .src_port = input->flow.udp6_flow.src_port,
3515                         .dst_port = input->flow.udp6_flow.dst_port,
3516                 };
3517                 attributes->l4_mask.udp.hdr = (struct rte_udp_hdr){
3518                         .src_port = mask->src_port_mask,
3519                         .dst_port = mask->dst_port_mask,
3520                 };
3521                 attributes->items[2] = (struct rte_flow_item){
3522                         .type = RTE_FLOW_ITEM_TYPE_UDP,
3523                         .spec = &attributes->l4,
3524                         .mask = &attributes->l4_mask,
3525                 };
3526                 break;
3527         case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
3528                 attributes->l4.tcp.hdr = (struct rte_tcp_hdr){
3529                         .src_port = input->flow.tcp6_flow.src_port,
3530                         .dst_port = input->flow.tcp6_flow.dst_port,
3531                 };
3532                 attributes->l4_mask.tcp.hdr = (struct rte_tcp_hdr){
3533                         .src_port = mask->src_port_mask,
3534                         .dst_port = mask->dst_port_mask,
3535                 };
3536                 attributes->items[2] = (struct rte_flow_item){
3537                         .type = RTE_FLOW_ITEM_TYPE_TCP,
3538                         .spec = &attributes->l4,
3539                         .mask = &attributes->l4_mask,
3540                 };
3541                 break;
3542         case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
3543         case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
3544                 break;
3545         default:
3546                 DRV_LOG(ERR, "port %u invalid flow type%d",
3547                         dev->data->port_id, fdir_filter->input.flow_type);
3548                 rte_errno = ENOTSUP;
3549                 return -rte_errno;
3550         }
3551         return 0;
3552 }
3553
3554 #define FLOW_FDIR_CMP(f1, f2, fld) \
3555         memcmp(&(f1)->fld, &(f2)->fld, sizeof(f1->fld))
3556
3557 /**
3558  * Compare two FDIR flows. If items and actions are identical, the two flows are
3559  * regarded as same.
3560  *
3561  * @param dev
3562  *   Pointer to Ethernet device.
3563  * @param f1
3564  *   FDIR flow to compare.
3565  * @param f2
3566  *   FDIR flow to compare.
3567  *
3568  * @return
3569  *   Zero on match, 1 otherwise.
3570  */
3571 static int
3572 flow_fdir_cmp(const struct mlx5_fdir *f1, const struct mlx5_fdir *f2)
3573 {
3574         if (FLOW_FDIR_CMP(f1, f2, attr) ||
3575             FLOW_FDIR_CMP(f1, f2, l2) ||
3576             FLOW_FDIR_CMP(f1, f2, l2_mask) ||
3577             FLOW_FDIR_CMP(f1, f2, l3) ||
3578             FLOW_FDIR_CMP(f1, f2, l3_mask) ||
3579             FLOW_FDIR_CMP(f1, f2, l4) ||
3580             FLOW_FDIR_CMP(f1, f2, l4_mask) ||
3581             FLOW_FDIR_CMP(f1, f2, actions[0].type))
3582                 return 1;
3583         if (f1->actions[0].type == RTE_FLOW_ACTION_TYPE_QUEUE &&
3584             FLOW_FDIR_CMP(f1, f2, queue))
3585                 return 1;
3586         return 0;
3587 }
3588
3589 /**
3590  * Search device flow list to find out a matched FDIR flow.
3591  *
3592  * @param dev
3593  *   Pointer to Ethernet device.
3594  * @param fdir_flow
3595  *   FDIR flow to lookup.
3596  *
3597  * @return
3598  *   Pointer of flow if found, NULL otherwise.
3599  */
3600 static struct rte_flow *
3601 flow_fdir_filter_lookup(struct rte_eth_dev *dev, struct mlx5_fdir *fdir_flow)
3602 {
3603         struct mlx5_priv *priv = dev->data->dev_private;
3604         struct rte_flow *flow = NULL;
3605
3606         assert(fdir_flow);
3607         TAILQ_FOREACH(flow, &priv->flows, next) {
3608                 if (flow->fdir && !flow_fdir_cmp(flow->fdir, fdir_flow)) {
3609                         DRV_LOG(DEBUG, "port %u found FDIR flow %p",
3610                                 dev->data->port_id, (void *)flow);
3611                         break;
3612                 }
3613         }
3614         return flow;
3615 }
3616
3617 /**
3618  * Add new flow director filter and store it in list.
3619  *
3620  * @param dev
3621  *   Pointer to Ethernet device.
3622  * @param fdir_filter
3623  *   Flow director filter to add.
3624  *
3625  * @return
3626  *   0 on success, a negative errno value otherwise and rte_errno is set.
3627  */
3628 static int
3629 flow_fdir_filter_add(struct rte_eth_dev *dev,
3630                      const struct rte_eth_fdir_filter *fdir_filter)
3631 {
3632         struct mlx5_priv *priv = dev->data->dev_private;
3633         struct mlx5_fdir *fdir_flow;
3634         struct rte_flow *flow;
3635         int ret;
3636
3637         fdir_flow = rte_zmalloc(__func__, sizeof(*fdir_flow), 0);
3638         if (!fdir_flow) {
3639                 rte_errno = ENOMEM;
3640                 return -rte_errno;
3641         }
3642         ret = flow_fdir_filter_convert(dev, fdir_filter, fdir_flow);
3643         if (ret)
3644                 goto error;
3645         flow = flow_fdir_filter_lookup(dev, fdir_flow);
3646         if (flow) {
3647                 rte_errno = EEXIST;
3648                 goto error;
3649         }
3650         flow = flow_list_create(dev, &priv->flows, &fdir_flow->attr,
3651                                 fdir_flow->items, fdir_flow->actions, true,
3652                                 NULL);
3653         if (!flow)
3654                 goto error;
3655         assert(!flow->fdir);
3656         flow->fdir = fdir_flow;
3657         DRV_LOG(DEBUG, "port %u created FDIR flow %p",
3658                 dev->data->port_id, (void *)flow);
3659         return 0;
3660 error:
3661         rte_free(fdir_flow);
3662         return -rte_errno;
3663 }
3664
3665 /**
3666  * Delete specific filter.
3667  *
3668  * @param dev
3669  *   Pointer to Ethernet device.
3670  * @param fdir_filter
3671  *   Filter to be deleted.
3672  *
3673  * @return
3674  *   0 on success, a negative errno value otherwise and rte_errno is set.
3675  */
3676 static int
3677 flow_fdir_filter_delete(struct rte_eth_dev *dev,
3678                         const struct rte_eth_fdir_filter *fdir_filter)
3679 {
3680         struct mlx5_priv *priv = dev->data->dev_private;
3681         struct rte_flow *flow;
3682         struct mlx5_fdir fdir_flow = {
3683                 .attr.group = 0,
3684         };
3685         int ret;
3686
3687         ret = flow_fdir_filter_convert(dev, fdir_filter, &fdir_flow);
3688         if (ret)
3689                 return -rte_errno;
3690         flow = flow_fdir_filter_lookup(dev, &fdir_flow);
3691         if (!flow) {
3692                 rte_errno = ENOENT;
3693                 return -rte_errno;
3694         }
3695         flow_list_destroy(dev, &priv->flows, flow);
3696         DRV_LOG(DEBUG, "port %u deleted FDIR flow %p",
3697                 dev->data->port_id, (void *)flow);
3698         return 0;
3699 }
3700
3701 /**
3702  * Update queue for specific filter.
3703  *
3704  * @param dev
3705  *   Pointer to Ethernet device.
3706  * @param fdir_filter
3707  *   Filter to be updated.
3708  *
3709  * @return
3710  *   0 on success, a negative errno value otherwise and rte_errno is set.
3711  */
3712 static int
3713 flow_fdir_filter_update(struct rte_eth_dev *dev,
3714                         const struct rte_eth_fdir_filter *fdir_filter)
3715 {
3716         int ret;
3717
3718         ret = flow_fdir_filter_delete(dev, fdir_filter);
3719         if (ret)
3720                 return ret;
3721         return flow_fdir_filter_add(dev, fdir_filter);
3722 }
3723
3724 /**
3725  * Flush all filters.
3726  *
3727  * @param dev
3728  *   Pointer to Ethernet device.
3729  */
3730 static void
3731 flow_fdir_filter_flush(struct rte_eth_dev *dev)
3732 {
3733         struct mlx5_priv *priv = dev->data->dev_private;
3734
3735         mlx5_flow_list_flush(dev, &priv->flows);
3736 }
3737
3738 /**
3739  * Get flow director information.
3740  *
3741  * @param dev
3742  *   Pointer to Ethernet device.
3743  * @param[out] fdir_info
3744  *   Resulting flow director information.
3745  */
3746 static void
3747 flow_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info)
3748 {
3749         struct rte_eth_fdir_masks *mask =
3750                 &dev->data->dev_conf.fdir_conf.mask;
3751
3752         fdir_info->mode = dev->data->dev_conf.fdir_conf.mode;
3753         fdir_info->guarant_spc = 0;
3754         rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
3755         fdir_info->max_flexpayload = 0;
3756         fdir_info->flow_types_mask[0] = 0;
3757         fdir_info->flex_payload_unit = 0;
3758         fdir_info->max_flex_payload_segment_num = 0;
3759         fdir_info->flex_payload_limit = 0;
3760         memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
3761 }
3762
3763 /**
3764  * Deal with flow director operations.
3765  *
3766  * @param dev
3767  *   Pointer to Ethernet device.
3768  * @param filter_op
3769  *   Operation to perform.
3770  * @param arg
3771  *   Pointer to operation-specific structure.
3772  *
3773  * @return
3774  *   0 on success, a negative errno value otherwise and rte_errno is set.
3775  */
3776 static int
3777 flow_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op,
3778                     void *arg)
3779 {
3780         enum rte_fdir_mode fdir_mode =
3781                 dev->data->dev_conf.fdir_conf.mode;
3782
3783         if (filter_op == RTE_ETH_FILTER_NOP)
3784                 return 0;
3785         if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
3786             fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
3787                 DRV_LOG(ERR, "port %u flow director mode %d not supported",
3788                         dev->data->port_id, fdir_mode);
3789                 rte_errno = EINVAL;
3790                 return -rte_errno;
3791         }
3792         switch (filter_op) {
3793         case RTE_ETH_FILTER_ADD:
3794                 return flow_fdir_filter_add(dev, arg);
3795         case RTE_ETH_FILTER_UPDATE:
3796                 return flow_fdir_filter_update(dev, arg);
3797         case RTE_ETH_FILTER_DELETE:
3798                 return flow_fdir_filter_delete(dev, arg);
3799         case RTE_ETH_FILTER_FLUSH:
3800                 flow_fdir_filter_flush(dev);
3801                 break;
3802         case RTE_ETH_FILTER_INFO:
3803                 flow_fdir_info_get(dev, arg);
3804                 break;
3805         default:
3806                 DRV_LOG(DEBUG, "port %u unknown operation %u",
3807                         dev->data->port_id, filter_op);
3808                 rte_errno = EINVAL;
3809                 return -rte_errno;
3810         }
3811         return 0;
3812 }
3813
3814 /**
3815  * Manage filter operations.
3816  *
3817  * @param dev
3818  *   Pointer to Ethernet device structure.
3819  * @param filter_type
3820  *   Filter type.
3821  * @param filter_op
3822  *   Operation to perform.
3823  * @param arg
3824  *   Pointer to operation-specific structure.
3825  *
3826  * @return
3827  *   0 on success, a negative errno value otherwise and rte_errno is set.
3828  */
3829 int
3830 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
3831                      enum rte_filter_type filter_type,
3832                      enum rte_filter_op filter_op,
3833                      void *arg)
3834 {
3835         switch (filter_type) {
3836         case RTE_ETH_FILTER_GENERIC:
3837                 if (filter_op != RTE_ETH_FILTER_GET) {
3838                         rte_errno = EINVAL;
3839                         return -rte_errno;
3840                 }
3841                 *(const void **)arg = &mlx5_flow_ops;
3842                 return 0;
3843         case RTE_ETH_FILTER_FDIR:
3844                 return flow_fdir_ctrl_func(dev, filter_op, arg);
3845         default:
3846                 DRV_LOG(ERR, "port %u filter type (%d) not supported",
3847                         dev->data->port_id, filter_type);
3848                 rte_errno = ENOTSUP;
3849                 return -rte_errno;
3850         }
3851         return 0;
3852 }
3853
3854 #define MLX5_POOL_QUERY_FREQ_US 1000000
3855
3856 /**
3857  * Set the periodic procedure for triggering asynchronous batch queries for all
3858  * the counter pools.
3859  *
3860  * @param[in] sh
3861  *   Pointer to mlx5_ibv_shared object.
3862  */
3863 void
3864 mlx5_set_query_alarm(struct mlx5_ibv_shared *sh)
3865 {
3866         struct mlx5_pools_container *cont = MLX5_CNT_CONTAINER(sh, 0, 0);
3867         uint32_t pools_n = rte_atomic16_read(&cont->n_valid);
3868         uint32_t us;
3869
3870         cont = MLX5_CNT_CONTAINER(sh, 1, 0);
3871         pools_n += rte_atomic16_read(&cont->n_valid);
3872         us = MLX5_POOL_QUERY_FREQ_US / pools_n;
3873         DRV_LOG(DEBUG, "Set alarm for %u pools each %u us", pools_n, us);
3874         if (rte_eal_alarm_set(us, mlx5_flow_query_alarm, sh)) {
3875                 sh->cmng.query_thread_on = 0;
3876                 DRV_LOG(ERR, "Cannot reinitialize query alarm");
3877         } else {
3878                 sh->cmng.query_thread_on = 1;
3879         }
3880 }
3881
3882 /**
3883  * The periodic procedure for triggering asynchronous batch queries for all the
3884  * counter pools. This function is probably called by the host thread.
3885  *
3886  * @param[in] arg
3887  *   The parameter for the alarm process.
3888  */
3889 void
3890 mlx5_flow_query_alarm(void *arg)
3891 {
3892         struct mlx5_ibv_shared *sh = arg;
3893         struct mlx5_devx_obj *dcs;
3894         uint16_t offset;
3895         int ret;
3896         uint8_t batch = sh->cmng.batch;
3897         uint16_t pool_index = sh->cmng.pool_index;
3898         struct mlx5_pools_container *cont;
3899         struct mlx5_pools_container *mcont;
3900         struct mlx5_flow_counter_pool *pool;
3901
3902         if (sh->cmng.pending_queries >= MLX5_MAX_PENDING_QUERIES)
3903                 goto set_alarm;
3904 next_container:
3905         cont = MLX5_CNT_CONTAINER(sh, batch, 1);
3906         mcont = MLX5_CNT_CONTAINER(sh, batch, 0);
3907         /* Check if resize was done and need to flip a container. */
3908         if (cont != mcont) {
3909                 if (cont->pools) {
3910                         /* Clean the old container. */
3911                         rte_free(cont->pools);
3912                         memset(cont, 0, sizeof(*cont));
3913                 }
3914                 rte_cio_wmb();
3915                  /* Flip the host container. */
3916                 sh->cmng.mhi[batch] ^= (uint8_t)2;
3917                 cont = mcont;
3918         }
3919         if (!cont->pools) {
3920                 /* 2 empty containers case is unexpected. */
3921                 if (unlikely(batch != sh->cmng.batch))
3922                         goto set_alarm;
3923                 batch ^= 0x1;
3924                 pool_index = 0;
3925                 goto next_container;
3926         }
3927         pool = cont->pools[pool_index];
3928         if (pool->raw_hw)
3929                 /* There is a pool query in progress. */
3930                 goto set_alarm;
3931         pool->raw_hw =
3932                 LIST_FIRST(&sh->cmng.free_stat_raws);
3933         if (!pool->raw_hw)
3934                 /* No free counter statistics raw memory. */
3935                 goto set_alarm;
3936         dcs = (struct mlx5_devx_obj *)(uintptr_t)rte_atomic64_read
3937                                                               (&pool->a64_dcs);
3938         offset = batch ? 0 : dcs->id % MLX5_COUNTERS_PER_POOL;
3939         ret = mlx5_devx_cmd_flow_counter_query(dcs, 0, MLX5_COUNTERS_PER_POOL -
3940                                                offset, NULL, NULL,
3941                                                pool->raw_hw->mem_mng->dm->id,
3942                                                (void *)(uintptr_t)
3943                                                (pool->raw_hw->data + offset),
3944                                                sh->devx_comp,
3945                                                (uint64_t)(uintptr_t)pool);
3946         if (ret) {
3947                 DRV_LOG(ERR, "Failed to trigger asynchronous query for dcs ID"
3948                         " %d", pool->min_dcs->id);
3949                 pool->raw_hw = NULL;
3950                 goto set_alarm;
3951         }
3952         pool->raw_hw->min_dcs_id = dcs->id;
3953         LIST_REMOVE(pool->raw_hw, next);
3954         sh->cmng.pending_queries++;
3955         pool_index++;
3956         if (pool_index >= rte_atomic16_read(&cont->n_valid)) {
3957                 batch ^= 0x1;
3958                 pool_index = 0;
3959         }
3960 set_alarm:
3961         sh->cmng.batch = batch;
3962         sh->cmng.pool_index = pool_index;
3963         mlx5_set_query_alarm(sh);
3964 }
3965
3966 /**
3967  * Handler for the HW respond about ready values from an asynchronous batch
3968  * query. This function is probably called by the host thread.
3969  *
3970  * @param[in] sh
3971  *   The pointer to the shared IB device context.
3972  * @param[in] async_id
3973  *   The Devx async ID.
3974  * @param[in] status
3975  *   The status of the completion.
3976  */
3977 void
3978 mlx5_flow_async_pool_query_handle(struct mlx5_ibv_shared *sh,
3979                                   uint64_t async_id, int status)
3980 {
3981         struct mlx5_flow_counter_pool *pool =
3982                 (struct mlx5_flow_counter_pool *)(uintptr_t)async_id;
3983         struct mlx5_counter_stats_raw *raw_to_free;
3984
3985         if (unlikely(status)) {
3986                 raw_to_free = pool->raw_hw;
3987         } else {
3988                 raw_to_free = pool->raw;
3989                 rte_spinlock_lock(&pool->sl);
3990                 pool->raw = pool->raw_hw;
3991                 rte_spinlock_unlock(&pool->sl);
3992                 rte_atomic64_add(&pool->query_gen, 1);
3993                 /* Be sure the new raw counters data is updated in memory. */
3994                 rte_cio_wmb();
3995         }
3996         LIST_INSERT_HEAD(&sh->cmng.free_stat_raws, raw_to_free, next);
3997         pool->raw_hw = NULL;
3998         sh->cmng.pending_queries--;
3999 }
4000
4001 /**
4002  * Translate the rte_flow group index to HW table value.
4003  *
4004  * @param[in] attributes
4005  *   Pointer to flow attributes
4006  * @param[in] external
4007  *   Value is part of flow rule created by request external to PMD.
4008  * @param[in] group
4009  *   rte_flow group index value.
4010  * @param[out] table
4011  *   HW table value.
4012  * @param[out] error
4013  *   Pointer to error structure.
4014  *
4015  * @return
4016  *   0 on success, a negative errno value otherwise and rte_errno is set.
4017  */
4018 int
4019 mlx5_flow_group_to_table(const struct rte_flow_attr *attributes, bool external,
4020                          uint32_t group, uint32_t *table,
4021                          struct rte_flow_error *error)
4022 {
4023         if (attributes->transfer && external) {
4024                 if (group == UINT32_MAX)
4025                         return rte_flow_error_set
4026                                                 (error, EINVAL,
4027                                                  RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
4028                                                  NULL,
4029                                                  "group index not supported");
4030                 *table = group + 1;
4031         } else {
4032                 *table = group;
4033         }
4034         return 0;
4035 }