net/mlx5: split flow validation to dedicated function
[dpdk.git] / drivers / net / mlx5 / mlx5_flow.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2016 6WIND S.A.
3  * Copyright 2016 Mellanox Technologies, Ltd
4  */
5
6 #include <sys/queue.h>
7 #include <stdalign.h>
8 #include <stdint.h>
9 #include <string.h>
10
11 /* Verbs header. */
12 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
13 #ifdef PEDANTIC
14 #pragma GCC diagnostic ignored "-Wpedantic"
15 #endif
16 #include <infiniband/verbs.h>
17 #ifdef PEDANTIC
18 #pragma GCC diagnostic error "-Wpedantic"
19 #endif
20
21 #include <rte_common.h>
22 #include <rte_ether.h>
23 #include <rte_eth_ctrl.h>
24 #include <rte_ethdev_driver.h>
25 #include <rte_flow.h>
26 #include <rte_flow_driver.h>
27 #include <rte_malloc.h>
28 #include <rte_ip.h>
29
30 #include "mlx5.h"
31 #include "mlx5_defs.h"
32 #include "mlx5_prm.h"
33 #include "mlx5_glue.h"
34
35 /* Dev ops structure defined in mlx5.c */
36 extern const struct eth_dev_ops mlx5_dev_ops;
37 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
38
39 /* Pattern outer Layer bits. */
40 #define MLX5_FLOW_LAYER_OUTER_L2 (1u << 0)
41 #define MLX5_FLOW_LAYER_OUTER_L3_IPV4 (1u << 1)
42 #define MLX5_FLOW_LAYER_OUTER_L3_IPV6 (1u << 2)
43 #define MLX5_FLOW_LAYER_OUTER_L4_UDP (1u << 3)
44 #define MLX5_FLOW_LAYER_OUTER_L4_TCP (1u << 4)
45 #define MLX5_FLOW_LAYER_OUTER_VLAN (1u << 5)
46
47 /* Pattern inner Layer bits. */
48 #define MLX5_FLOW_LAYER_INNER_L2 (1u << 6)
49 #define MLX5_FLOW_LAYER_INNER_L3_IPV4 (1u << 7)
50 #define MLX5_FLOW_LAYER_INNER_L3_IPV6 (1u << 8)
51 #define MLX5_FLOW_LAYER_INNER_L4_UDP (1u << 9)
52 #define MLX5_FLOW_LAYER_INNER_L4_TCP (1u << 10)
53 #define MLX5_FLOW_LAYER_INNER_VLAN (1u << 11)
54
55 /* Pattern tunnel Layer bits. */
56 #define MLX5_FLOW_LAYER_VXLAN (1u << 12)
57 #define MLX5_FLOW_LAYER_VXLAN_GPE (1u << 13)
58 #define MLX5_FLOW_LAYER_GRE (1u << 14)
59 #define MLX5_FLOW_LAYER_MPLS (1u << 15)
60
61 /* Outer Masks. */
62 #define MLX5_FLOW_LAYER_OUTER_L3 \
63         (MLX5_FLOW_LAYER_OUTER_L3_IPV4 | MLX5_FLOW_LAYER_OUTER_L3_IPV6)
64 #define MLX5_FLOW_LAYER_OUTER_L4 \
65         (MLX5_FLOW_LAYER_OUTER_L4_UDP | MLX5_FLOW_LAYER_OUTER_L4_TCP)
66 #define MLX5_FLOW_LAYER_OUTER \
67         (MLX5_FLOW_LAYER_OUTER_L2 | MLX5_FLOW_LAYER_OUTER_L3 | \
68          MLX5_FLOW_LAYER_OUTER_L4)
69
70 /* Tunnel Masks. */
71 #define MLX5_FLOW_LAYER_TUNNEL \
72         (MLX5_FLOW_LAYER_VXLAN | MLX5_FLOW_LAYER_VXLAN_GPE | \
73          MLX5_FLOW_LAYER_GRE | MLX5_FLOW_LAYER_MPLS)
74
75 /* Inner Masks. */
76 #define MLX5_FLOW_LAYER_INNER_L3 \
77         (MLX5_FLOW_LAYER_INNER_L3_IPV4 | MLX5_FLOW_LAYER_INNER_L3_IPV6)
78 #define MLX5_FLOW_LAYER_INNER_L4 \
79         (MLX5_FLOW_LAYER_INNER_L4_UDP | MLX5_FLOW_LAYER_INNER_L4_TCP)
80 #define MLX5_FLOW_LAYER_INNER \
81         (MLX5_FLOW_LAYER_INNER_L2 | MLX5_FLOW_LAYER_INNER_L3 | \
82          MLX5_FLOW_LAYER_INNER_L4)
83
84 /* Actions that modify the fate of matching traffic. */
85 #define MLX5_FLOW_FATE_DROP (1u << 0)
86 #define MLX5_FLOW_FATE_QUEUE (1u << 1)
87 #define MLX5_FLOW_FATE_RSS (1u << 2)
88
89 /* Modify a packet. */
90 #define MLX5_FLOW_MOD_FLAG (1u << 0)
91 #define MLX5_FLOW_MOD_MARK (1u << 1)
92 #define MLX5_FLOW_MOD_COUNT (1u << 2)
93
94 /* Actions */
95 #define MLX5_FLOW_ACTION_DROP (1u << 0)
96 #define MLX5_FLOW_ACTION_QUEUE (1u << 1)
97 #define MLX5_FLOW_ACTION_RSS (1u << 2)
98 #define MLX5_FLOW_ACTION_FLAG (1u << 3)
99 #define MLX5_FLOW_ACTION_MARK (1u << 4)
100 #define MLX5_FLOW_ACTION_COUNT (1u << 5)
101
102 #define MLX5_FLOW_FATE_ACTIONS \
103         (MLX5_FLOW_ACTION_DROP | MLX5_FLOW_ACTION_QUEUE | MLX5_FLOW_ACTION_RSS)
104
105 /* possible L3 layers protocols filtering. */
106 #define MLX5_IP_PROTOCOL_TCP 6
107 #define MLX5_IP_PROTOCOL_UDP 17
108 #define MLX5_IP_PROTOCOL_GRE 47
109 #define MLX5_IP_PROTOCOL_MPLS 147
110
111 /* Priority reserved for default flows. */
112 #define MLX5_FLOW_PRIO_RSVD ((uint32_t)-1)
113
114 enum mlx5_expansion {
115         MLX5_EXPANSION_ROOT,
116         MLX5_EXPANSION_ROOT_OUTER,
117         MLX5_EXPANSION_ROOT_ETH_VLAN,
118         MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN,
119         MLX5_EXPANSION_OUTER_ETH,
120         MLX5_EXPANSION_OUTER_ETH_VLAN,
121         MLX5_EXPANSION_OUTER_VLAN,
122         MLX5_EXPANSION_OUTER_IPV4,
123         MLX5_EXPANSION_OUTER_IPV4_UDP,
124         MLX5_EXPANSION_OUTER_IPV4_TCP,
125         MLX5_EXPANSION_OUTER_IPV6,
126         MLX5_EXPANSION_OUTER_IPV6_UDP,
127         MLX5_EXPANSION_OUTER_IPV6_TCP,
128         MLX5_EXPANSION_VXLAN,
129         MLX5_EXPANSION_VXLAN_GPE,
130         MLX5_EXPANSION_GRE,
131         MLX5_EXPANSION_MPLS,
132         MLX5_EXPANSION_ETH,
133         MLX5_EXPANSION_ETH_VLAN,
134         MLX5_EXPANSION_VLAN,
135         MLX5_EXPANSION_IPV4,
136         MLX5_EXPANSION_IPV4_UDP,
137         MLX5_EXPANSION_IPV4_TCP,
138         MLX5_EXPANSION_IPV6,
139         MLX5_EXPANSION_IPV6_UDP,
140         MLX5_EXPANSION_IPV6_TCP,
141 };
142
143 /** Supported expansion of items. */
144 static const struct rte_flow_expand_node mlx5_support_expansion[] = {
145         [MLX5_EXPANSION_ROOT] = {
146                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
147                                                  MLX5_EXPANSION_IPV4,
148                                                  MLX5_EXPANSION_IPV6),
149                 .type = RTE_FLOW_ITEM_TYPE_END,
150         },
151         [MLX5_EXPANSION_ROOT_OUTER] = {
152                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_ETH,
153                                                  MLX5_EXPANSION_OUTER_IPV4,
154                                                  MLX5_EXPANSION_OUTER_IPV6),
155                 .type = RTE_FLOW_ITEM_TYPE_END,
156         },
157         [MLX5_EXPANSION_ROOT_ETH_VLAN] = {
158                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH_VLAN),
159                 .type = RTE_FLOW_ITEM_TYPE_END,
160         },
161         [MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN] = {
162                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_ETH_VLAN),
163                 .type = RTE_FLOW_ITEM_TYPE_END,
164         },
165         [MLX5_EXPANSION_OUTER_ETH] = {
166                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_IPV4,
167                                                  MLX5_EXPANSION_OUTER_IPV6,
168                                                  MLX5_EXPANSION_MPLS),
169                 .type = RTE_FLOW_ITEM_TYPE_ETH,
170                 .rss_types = 0,
171         },
172         [MLX5_EXPANSION_OUTER_ETH_VLAN] = {
173                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_VLAN),
174                 .type = RTE_FLOW_ITEM_TYPE_ETH,
175                 .rss_types = 0,
176         },
177         [MLX5_EXPANSION_OUTER_VLAN] = {
178                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_IPV4,
179                                                  MLX5_EXPANSION_OUTER_IPV6),
180                 .type = RTE_FLOW_ITEM_TYPE_VLAN,
181         },
182         [MLX5_EXPANSION_OUTER_IPV4] = {
183                 .next = RTE_FLOW_EXPAND_RSS_NEXT
184                         (MLX5_EXPANSION_OUTER_IPV4_UDP,
185                          MLX5_EXPANSION_OUTER_IPV4_TCP,
186                          MLX5_EXPANSION_GRE),
187                 .type = RTE_FLOW_ITEM_TYPE_IPV4,
188                 .rss_types = ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
189                         ETH_RSS_NONFRAG_IPV4_OTHER,
190         },
191         [MLX5_EXPANSION_OUTER_IPV4_UDP] = {
192                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN,
193                                                  MLX5_EXPANSION_VXLAN_GPE),
194                 .type = RTE_FLOW_ITEM_TYPE_UDP,
195                 .rss_types = ETH_RSS_NONFRAG_IPV4_UDP,
196         },
197         [MLX5_EXPANSION_OUTER_IPV4_TCP] = {
198                 .type = RTE_FLOW_ITEM_TYPE_TCP,
199                 .rss_types = ETH_RSS_NONFRAG_IPV4_TCP,
200         },
201         [MLX5_EXPANSION_OUTER_IPV6] = {
202                 .next = RTE_FLOW_EXPAND_RSS_NEXT
203                         (MLX5_EXPANSION_OUTER_IPV6_UDP,
204                          MLX5_EXPANSION_OUTER_IPV6_TCP),
205                 .type = RTE_FLOW_ITEM_TYPE_IPV6,
206                 .rss_types = ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 |
207                         ETH_RSS_NONFRAG_IPV6_OTHER,
208         },
209         [MLX5_EXPANSION_OUTER_IPV6_UDP] = {
210                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN,
211                                                  MLX5_EXPANSION_VXLAN_GPE),
212                 .type = RTE_FLOW_ITEM_TYPE_UDP,
213                 .rss_types = ETH_RSS_NONFRAG_IPV6_UDP,
214         },
215         [MLX5_EXPANSION_OUTER_IPV6_TCP] = {
216                 .type = RTE_FLOW_ITEM_TYPE_TCP,
217                 .rss_types = ETH_RSS_NONFRAG_IPV6_TCP,
218         },
219         [MLX5_EXPANSION_VXLAN] = {
220                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH),
221                 .type = RTE_FLOW_ITEM_TYPE_VXLAN,
222         },
223         [MLX5_EXPANSION_VXLAN_GPE] = {
224                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
225                                                  MLX5_EXPANSION_IPV4,
226                                                  MLX5_EXPANSION_IPV6),
227                 .type = RTE_FLOW_ITEM_TYPE_VXLAN_GPE,
228         },
229         [MLX5_EXPANSION_GRE] = {
230                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4),
231                 .type = RTE_FLOW_ITEM_TYPE_GRE,
232         },
233         [MLX5_EXPANSION_MPLS] = {
234                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
235                                                  MLX5_EXPANSION_IPV6),
236                 .type = RTE_FLOW_ITEM_TYPE_MPLS,
237         },
238         [MLX5_EXPANSION_ETH] = {
239                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
240                                                  MLX5_EXPANSION_IPV6),
241                 .type = RTE_FLOW_ITEM_TYPE_ETH,
242         },
243         [MLX5_EXPANSION_ETH_VLAN] = {
244                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VLAN),
245                 .type = RTE_FLOW_ITEM_TYPE_ETH,
246         },
247         [MLX5_EXPANSION_VLAN] = {
248                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
249                                                  MLX5_EXPANSION_IPV6),
250                 .type = RTE_FLOW_ITEM_TYPE_VLAN,
251         },
252         [MLX5_EXPANSION_IPV4] = {
253                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4_UDP,
254                                                  MLX5_EXPANSION_IPV4_TCP),
255                 .type = RTE_FLOW_ITEM_TYPE_IPV4,
256                 .rss_types = ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
257                         ETH_RSS_NONFRAG_IPV4_OTHER,
258         },
259         [MLX5_EXPANSION_IPV4_UDP] = {
260                 .type = RTE_FLOW_ITEM_TYPE_UDP,
261                 .rss_types = ETH_RSS_NONFRAG_IPV4_UDP,
262         },
263         [MLX5_EXPANSION_IPV4_TCP] = {
264                 .type = RTE_FLOW_ITEM_TYPE_TCP,
265                 .rss_types = ETH_RSS_NONFRAG_IPV4_TCP,
266         },
267         [MLX5_EXPANSION_IPV6] = {
268                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV6_UDP,
269                                                  MLX5_EXPANSION_IPV6_TCP),
270                 .type = RTE_FLOW_ITEM_TYPE_IPV6,
271                 .rss_types = ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 |
272                         ETH_RSS_NONFRAG_IPV6_OTHER,
273         },
274         [MLX5_EXPANSION_IPV6_UDP] = {
275                 .type = RTE_FLOW_ITEM_TYPE_UDP,
276                 .rss_types = ETH_RSS_NONFRAG_IPV6_UDP,
277         },
278         [MLX5_EXPANSION_IPV6_TCP] = {
279                 .type = RTE_FLOW_ITEM_TYPE_TCP,
280                 .rss_types = ETH_RSS_NONFRAG_IPV6_TCP,
281         },
282 };
283
284 /** Handles information leading to a drop fate. */
285 struct mlx5_flow_verbs {
286         LIST_ENTRY(mlx5_flow_verbs) next;
287         unsigned int size; /**< Size of the attribute. */
288         struct {
289                 struct ibv_flow_attr *attr;
290                 /**< Pointer to the Specification buffer. */
291                 uint8_t *specs; /**< Pointer to the specifications. */
292         };
293         struct ibv_flow *flow; /**< Verbs flow pointer. */
294         struct mlx5_hrxq *hrxq; /**< Hash Rx queue object. */
295         uint64_t hash_fields; /**< Verbs hash Rx queue hash fields. */
296 };
297
298 /* Counters information. */
299 struct mlx5_flow_counter {
300         LIST_ENTRY(mlx5_flow_counter) next; /**< Pointer to the next counter. */
301         uint32_t shared:1; /**< Share counter ID with other flow rules. */
302         uint32_t ref_cnt:31; /**< Reference counter. */
303         uint32_t id; /**< Counter ID. */
304         struct ibv_counter_set *cs; /**< Holds the counters for the rule. */
305         uint64_t hits; /**< Number of packets matched by the rule. */
306         uint64_t bytes; /**< Number of bytes matched by the rule. */
307 };
308
309 /* Flow structure. */
310 struct rte_flow {
311         TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
312         struct rte_flow_attr attributes; /**< User flow attribute. */
313         uint32_t layers;
314         /**< Bit-fields of present layers see MLX5_FLOW_LAYER_*. */
315         uint32_t modifier;
316         /**< Bit-fields of present modifier see MLX5_FLOW_MOD_*. */
317         uint32_t fate;
318         /**< Bit-fields of present fate see MLX5_FLOW_FATE_*. */
319         LIST_HEAD(verbs, mlx5_flow_verbs) verbs; /**< Verbs flows list. */
320         struct mlx5_flow_verbs *cur_verbs;
321         /**< Current Verbs flow structure being filled. */
322         struct mlx5_flow_counter *counter; /**< Holds Verbs flow counter. */
323         struct rte_flow_action_rss rss;/**< RSS context. */
324         uint8_t key[MLX5_RSS_HASH_KEY_LEN]; /**< RSS hash key. */
325         uint16_t (*queue)[]; /**< Destination queues to redirect traffic to. */
326         void *nl_flow; /**< Netlink flow buffer if relevant. */
327 };
328
329 static const struct rte_flow_ops mlx5_flow_ops = {
330         .validate = mlx5_flow_validate,
331         .create = mlx5_flow_create,
332         .destroy = mlx5_flow_destroy,
333         .flush = mlx5_flow_flush,
334         .isolate = mlx5_flow_isolate,
335         .query = mlx5_flow_query,
336 };
337
338 /* Convert FDIR request to Generic flow. */
339 struct mlx5_fdir {
340         struct rte_flow_attr attr;
341         struct rte_flow_action actions[2];
342         struct rte_flow_item items[4];
343         struct rte_flow_item_eth l2;
344         struct rte_flow_item_eth l2_mask;
345         union {
346                 struct rte_flow_item_ipv4 ipv4;
347                 struct rte_flow_item_ipv6 ipv6;
348         } l3;
349         union {
350                 struct rte_flow_item_ipv4 ipv4;
351                 struct rte_flow_item_ipv6 ipv6;
352         } l3_mask;
353         union {
354                 struct rte_flow_item_udp udp;
355                 struct rte_flow_item_tcp tcp;
356         } l4;
357         union {
358                 struct rte_flow_item_udp udp;
359                 struct rte_flow_item_tcp tcp;
360         } l4_mask;
361         struct rte_flow_action_queue queue;
362 };
363
364 /* Verbs specification header. */
365 struct ibv_spec_header {
366         enum ibv_flow_spec_type type;
367         uint16_t size;
368 };
369
370 /*
371  * Number of sub priorities.
372  * For each kind of pattern matching i.e. L2, L3, L4 to have a correct
373  * matching on the NIC (firmware dependent) L4 most have the higher priority
374  * followed by L3 and ending with L2.
375  */
376 #define MLX5_PRIORITY_MAP_L2 2
377 #define MLX5_PRIORITY_MAP_L3 1
378 #define MLX5_PRIORITY_MAP_L4 0
379 #define MLX5_PRIORITY_MAP_MAX 3
380
381 /* Map of Verbs to Flow priority with 8 Verbs priorities. */
382 static const uint32_t priority_map_3[][MLX5_PRIORITY_MAP_MAX] = {
383         { 0, 1, 2 }, { 2, 3, 4 }, { 5, 6, 7 },
384 };
385
386 /* Map of Verbs to Flow priority with 16 Verbs priorities. */
387 static const uint32_t priority_map_5[][MLX5_PRIORITY_MAP_MAX] = {
388         { 0, 1, 2 }, { 3, 4, 5 }, { 6, 7, 8 },
389         { 9, 10, 11 }, { 12, 13, 14 },
390 };
391
392 /* Tunnel information. */
393 struct mlx5_flow_tunnel_info {
394         uint32_t tunnel; /**< Tunnel bit (see MLX5_FLOW_*). */
395         uint32_t ptype; /**< Tunnel Ptype (see RTE_PTYPE_*). */
396 };
397
398 static struct mlx5_flow_tunnel_info tunnels_info[] = {
399         {
400                 .tunnel = MLX5_FLOW_LAYER_VXLAN,
401                 .ptype = RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L4_UDP,
402         },
403         {
404                 .tunnel = MLX5_FLOW_LAYER_VXLAN_GPE,
405                 .ptype = RTE_PTYPE_TUNNEL_VXLAN_GPE | RTE_PTYPE_L4_UDP,
406         },
407         {
408                 .tunnel = MLX5_FLOW_LAYER_GRE,
409                 .ptype = RTE_PTYPE_TUNNEL_GRE,
410         },
411         {
412                 .tunnel = MLX5_FLOW_LAYER_MPLS | MLX5_FLOW_LAYER_OUTER_L4_UDP,
413                 .ptype = RTE_PTYPE_TUNNEL_MPLS_IN_GRE | RTE_PTYPE_L4_UDP,
414         },
415         {
416                 .tunnel = MLX5_FLOW_LAYER_MPLS,
417                 .ptype = RTE_PTYPE_TUNNEL_MPLS_IN_GRE,
418         },
419 };
420
421 /**
422  * Discover the maximum number of priority available.
423  *
424  * @param[in] dev
425  *   Pointer to Ethernet device.
426  *
427  * @return
428  *   number of supported flow priority on success, a negative errno
429  *   value otherwise and rte_errno is set.
430  */
431 int
432 mlx5_flow_discover_priorities(struct rte_eth_dev *dev)
433 {
434         struct {
435                 struct ibv_flow_attr attr;
436                 struct ibv_flow_spec_eth eth;
437                 struct ibv_flow_spec_action_drop drop;
438         } flow_attr = {
439                 .attr = {
440                         .num_of_specs = 2,
441                 },
442                 .eth = {
443                         .type = IBV_FLOW_SPEC_ETH,
444                         .size = sizeof(struct ibv_flow_spec_eth),
445                 },
446                 .drop = {
447                         .size = sizeof(struct ibv_flow_spec_action_drop),
448                         .type = IBV_FLOW_SPEC_ACTION_DROP,
449                 },
450         };
451         struct ibv_flow *flow;
452         struct mlx5_hrxq *drop = mlx5_hrxq_drop_new(dev);
453         uint16_t vprio[] = { 8, 16 };
454         int i;
455         int priority = 0;
456
457         if (!drop) {
458                 rte_errno = ENOTSUP;
459                 return -rte_errno;
460         }
461         for (i = 0; i != RTE_DIM(vprio); i++) {
462                 flow_attr.attr.priority = vprio[i] - 1;
463                 flow = mlx5_glue->create_flow(drop->qp, &flow_attr.attr);
464                 if (!flow)
465                         break;
466                 claim_zero(mlx5_glue->destroy_flow(flow));
467                 priority = vprio[i];
468         }
469         switch (priority) {
470         case 8:
471                 priority = RTE_DIM(priority_map_3);
472                 break;
473         case 16:
474                 priority = RTE_DIM(priority_map_5);
475                 break;
476         default:
477                 rte_errno = ENOTSUP;
478                 DRV_LOG(ERR,
479                         "port %u verbs maximum priority: %d expected 8/16",
480                         dev->data->port_id, vprio[i]);
481                 return -rte_errno;
482         }
483         mlx5_hrxq_drop_release(dev);
484         DRV_LOG(INFO, "port %u flow maximum priority: %d",
485                 dev->data->port_id, priority);
486         return priority;
487 }
488
489 /**
490  * Adjust flow priority.
491  *
492  * @param dev
493  *   Pointer to Ethernet device.
494  * @param flow
495  *   Pointer to an rte flow.
496  */
497 static void
498 mlx5_flow_adjust_priority(struct rte_eth_dev *dev, struct rte_flow *flow)
499 {
500         struct priv *priv = dev->data->dev_private;
501         uint32_t priority = flow->attributes.priority;
502         uint32_t subpriority = flow->cur_verbs->attr->priority;
503
504         switch (priv->config.flow_prio) {
505         case RTE_DIM(priority_map_3):
506                 priority = priority_map_3[priority][subpriority];
507                 break;
508         case RTE_DIM(priority_map_5):
509                 priority = priority_map_5[priority][subpriority];
510                 break;
511         }
512         flow->cur_verbs->attr->priority = priority;
513 }
514
515 /**
516  * Get a flow counter.
517  *
518  * @param[in] dev
519  *   Pointer to Ethernet device.
520  * @param[in] shared
521  *   Indicate if this counter is shared with other flows.
522  * @param[in] id
523  *   Counter identifier.
524  *
525  * @return
526  *   A pointer to the counter, NULL otherwise and rte_errno is set.
527  */
528 static struct mlx5_flow_counter *
529 mlx5_flow_counter_new(struct rte_eth_dev *dev, uint32_t shared, uint32_t id)
530 {
531         struct priv *priv = dev->data->dev_private;
532         struct mlx5_flow_counter *cnt;
533
534         LIST_FOREACH(cnt, &priv->flow_counters, next) {
535                 if (!cnt->shared || cnt->shared != shared)
536                         continue;
537                 if (cnt->id != id)
538                         continue;
539                 cnt->ref_cnt++;
540                 return cnt;
541         }
542 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
543
544         struct mlx5_flow_counter tmpl = {
545                 .shared = shared,
546                 .id = id,
547                 .cs = mlx5_glue->create_counter_set
548                         (priv->ctx,
549                          &(struct ibv_counter_set_init_attr){
550                                  .counter_set_id = id,
551                          }),
552                 .hits = 0,
553                 .bytes = 0,
554         };
555
556         if (!tmpl.cs) {
557                 rte_errno = errno;
558                 return NULL;
559         }
560         cnt = rte_calloc(__func__, 1, sizeof(*cnt), 0);
561         if (!cnt) {
562                 rte_errno = ENOMEM;
563                 return NULL;
564         }
565         *cnt = tmpl;
566         LIST_INSERT_HEAD(&priv->flow_counters, cnt, next);
567         return cnt;
568 #endif
569         rte_errno = ENOTSUP;
570         return NULL;
571 }
572
573 /**
574  * Release a flow counter.
575  *
576  * @param[in] counter
577  *   Pointer to the counter handler.
578  */
579 static void
580 mlx5_flow_counter_release(struct mlx5_flow_counter *counter)
581 {
582         if (--counter->ref_cnt == 0) {
583                 claim_zero(mlx5_glue->destroy_counter_set(counter->cs));
584                 LIST_REMOVE(counter, next);
585                 rte_free(counter);
586         }
587 }
588
589 /**
590  * Verify the @p attributes will be correctly understood by the NIC and store
591  * them in the @p flow if everything is correct.
592  *
593  * @param[in] dev
594  *   Pointer to Ethernet device structure.
595  * @param[in] attributes
596  *   Pointer to flow attributes
597  * @param[in, out] flow
598  *   Pointer to the rte_flow structure.
599  *
600  * @return
601  *   0 on success.
602  */
603 static int
604 mlx5_flow_attributes(struct rte_eth_dev *dev,
605                      const struct rte_flow_attr *attributes,
606                      struct rte_flow *flow)
607 {
608         struct priv *priv = dev->data->dev_private;
609         uint32_t priority_max = priv->config.flow_prio - 1;
610
611         flow->attributes = *attributes;
612         if (attributes->priority == MLX5_FLOW_PRIO_RSVD)
613                 flow->attributes.priority = priority_max;
614         return 0;
615 }
616
617 /**
618  * Verify the @p item specifications (spec, last, mask) are compatible with the
619  * NIC capabilities.
620  *
621  * @param[in] item
622  *   Item specification.
623  * @param[in] mask
624  *   @p item->mask or flow default bit-masks.
625  * @param[in] nic_mask
626  *   Bit-masks covering supported fields by the NIC to compare with user mask.
627  * @param[in] size
628  *   Bit-masks size in bytes.
629  * @param[out] error
630  *   Pointer to error structure.
631  *
632  * @return
633  *   0 on success, a negative errno value otherwise and rte_errno is set.
634  */
635 static int
636 mlx5_flow_item_acceptable(const struct rte_flow_item *item,
637                           const uint8_t *mask,
638                           const uint8_t *nic_mask,
639                           unsigned int size,
640                           struct rte_flow_error *error)
641 {
642         unsigned int i;
643
644         assert(nic_mask);
645         for (i = 0; i < size; ++i)
646                 if ((nic_mask[i] | mask[i]) != nic_mask[i])
647                         return rte_flow_error_set(error, ENOTSUP,
648                                                   RTE_FLOW_ERROR_TYPE_ITEM,
649                                                   item,
650                                                   "mask enables non supported"
651                                                   " bits");
652         if (!item->spec && (item->mask || item->last))
653                 return rte_flow_error_set(error, EINVAL,
654                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
655                                           "mask/last without a spec is not"
656                                           " supported");
657         if (item->spec && item->last) {
658                 uint8_t spec[size];
659                 uint8_t last[size];
660                 unsigned int i;
661                 int ret;
662
663                 for (i = 0; i < size; ++i) {
664                         spec[i] = ((const uint8_t *)item->spec)[i] & mask[i];
665                         last[i] = ((const uint8_t *)item->last)[i] & mask[i];
666                 }
667                 ret = memcmp(spec, last, size);
668                 if (ret != 0)
669                         return rte_flow_error_set(error, ENOTSUP,
670                                                   RTE_FLOW_ERROR_TYPE_ITEM,
671                                                   item,
672                                                   "range is not supported");
673         }
674         return 0;
675 }
676
677 /**
678  * Add a verbs item specification into @p flow.
679  *
680  * @param[in, out] flow
681  *   Pointer to flow structure.
682  * @param[in] src
683  *   Create specification.
684  * @param[in] size
685  *   Size in bytes of the specification to copy.
686  */
687 static void
688 mlx5_flow_spec_verbs_add(struct rte_flow *flow, void *src, unsigned int size)
689 {
690         struct mlx5_flow_verbs *verbs = flow->cur_verbs;
691
692         if (verbs->specs) {
693                 void *dst;
694
695                 dst = (void *)(verbs->specs + verbs->size);
696                 memcpy(dst, src, size);
697                 ++verbs->attr->num_of_specs;
698         }
699         verbs->size += size;
700 }
701
702 /**
703  * Adjust verbs hash fields according to the @p flow information.
704  *
705  * @param[in, out] flow.
706  *   Pointer to flow structure.
707  * @param[in] tunnel
708  *   1 when the hash field is for a tunnel item.
709  * @param[in] layer_types
710  *   ETH_RSS_* types.
711  * @param[in] hash_fields
712  *   Item hash fields.
713  */
714 static void
715 mlx5_flow_verbs_hashfields_adjust(struct rte_flow *flow,
716                                   int tunnel __rte_unused,
717                                   uint32_t layer_types, uint64_t hash_fields)
718 {
719 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
720         hash_fields |= (tunnel ? IBV_RX_HASH_INNER : 0);
721         if (flow->rss.level == 2 && !tunnel)
722                 hash_fields = 0;
723         else if (flow->rss.level < 2 && tunnel)
724                 hash_fields = 0;
725 #endif
726         if (!(flow->rss.types & layer_types))
727                 hash_fields = 0;
728         flow->cur_verbs->hash_fields |= hash_fields;
729 }
730
731 /**
732  * Convert the @p item into a Verbs specification after ensuring the NIC
733  * will understand and process it correctly.
734  * If the necessary size for the conversion is greater than the @p flow_size,
735  * nothing is written in @p flow, the validation is still performed.
736  *
737  * @param[in] item
738  *   Item specification.
739  * @param[in, out] flow
740  *   Pointer to flow structure.
741  * @param[in] flow_size
742  *   Size in bytes of the available space in @p flow, if too small, nothing is
743  *   written.
744  *
745  * @return
746  *   On success the number of bytes consumed/necessary, if the returned value
747  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
748  *   otherwise another call with this returned memory size should be done.
749  *   On error, a negative errno value is returned and rte_errno is set.
750  */
751 static int
752 mlx5_flow_item_eth(const struct rte_flow_item *item, struct rte_flow *flow,
753                    const size_t flow_size)
754 {
755         const struct rte_flow_item_eth *spec = item->spec;
756         const struct rte_flow_item_eth *mask = item->mask;
757         const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
758         const unsigned int size = sizeof(struct ibv_flow_spec_eth);
759         struct ibv_flow_spec_eth eth = {
760                 .type = IBV_FLOW_SPEC_ETH | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
761                 .size = size,
762         };
763
764         if (!mask)
765                 mask = &rte_flow_item_eth_mask;
766         flow->layers |= tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
767                 MLX5_FLOW_LAYER_OUTER_L2;
768         if (size > flow_size)
769                 return size;
770         if (spec) {
771                 unsigned int i;
772
773                 memcpy(&eth.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
774                 memcpy(&eth.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
775                 eth.val.ether_type = spec->type;
776                 memcpy(&eth.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
777                 memcpy(&eth.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
778                 eth.mask.ether_type = mask->type;
779                 /* Remove unwanted bits from values. */
780                 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
781                         eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
782                         eth.val.src_mac[i] &= eth.mask.src_mac[i];
783                 }
784                 eth.val.ether_type &= eth.mask.ether_type;
785         }
786         flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2;
787         mlx5_flow_spec_verbs_add(flow, &eth, size);
788         return size;
789 }
790
791 /**
792  * Update the VLAN tag in the Verbs Ethernet specification.
793  *
794  * @param[in, out] attr
795  *   Pointer to Verbs attributes structure.
796  * @param[in] eth
797  *   Verbs structure containing the VLAN information to copy.
798  */
799 static void
800 mlx5_flow_item_vlan_update(struct ibv_flow_attr *attr,
801                            struct ibv_flow_spec_eth *eth)
802 {
803         unsigned int i;
804         const enum ibv_flow_spec_type search = eth->type;
805         struct ibv_spec_header *hdr = (struct ibv_spec_header *)
806                 ((uint8_t *)attr + sizeof(struct ibv_flow_attr));
807
808         for (i = 0; i != attr->num_of_specs; ++i) {
809                 if (hdr->type == search) {
810                         struct ibv_flow_spec_eth *e =
811                                 (struct ibv_flow_spec_eth *)hdr;
812
813                         e->val.vlan_tag = eth->val.vlan_tag;
814                         e->mask.vlan_tag = eth->mask.vlan_tag;
815                         e->val.ether_type = eth->val.ether_type;
816                         e->mask.ether_type = eth->mask.ether_type;
817                         break;
818                 }
819                 hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size);
820         }
821 }
822
823 /**
824  * Convert the @p item into @p flow (or by updating the already present
825  * Ethernet Verbs) specification after ensuring the NIC will understand and
826  * process it correctly.
827  * If the necessary size for the conversion is greater than the @p flow_size,
828  * nothing is written in @p flow, the validation is still performed.
829  *
830  * @param[in] item
831  *   Item specification.
832  * @param[in, out] flow
833  *   Pointer to flow structure.
834  * @param[in] flow_size
835  *   Size in bytes of the available space in @p flow, if too small, nothing is
836  *   written.
837  *
838  * @return
839  *   On success the number of bytes consumed/necessary, if the returned value
840  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
841  *   otherwise another call with this returned memory size should be done.
842  *   On error, a negative errno value is returned and rte_errno is set.
843  */
844 static int
845 mlx5_flow_item_vlan(const struct rte_flow_item *item, struct rte_flow *flow,
846                     const size_t flow_size)
847 {
848         const struct rte_flow_item_vlan *spec = item->spec;
849         const struct rte_flow_item_vlan *mask = item->mask;
850         unsigned int size = sizeof(struct ibv_flow_spec_eth);
851         const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
852         struct ibv_flow_spec_eth eth = {
853                 .type = IBV_FLOW_SPEC_ETH | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
854                 .size = size,
855         };
856         const uint32_t l2m = tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
857                 MLX5_FLOW_LAYER_OUTER_L2;
858
859         if (!mask)
860                 mask = &rte_flow_item_vlan_mask;
861         if (spec) {
862                 eth.val.vlan_tag = spec->tci;
863                 eth.mask.vlan_tag = mask->tci;
864                 eth.val.vlan_tag &= eth.mask.vlan_tag;
865                 eth.val.ether_type = spec->inner_type;
866                 eth.mask.ether_type = mask->inner_type;
867                 eth.val.ether_type &= eth.mask.ether_type;
868         }
869         if (!(flow->layers & l2m)) {
870                 if (size <= flow_size) {
871                         flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2;
872                         mlx5_flow_spec_verbs_add(flow, &eth, size);
873                 }
874         } else {
875                 if (flow->cur_verbs)
876                         mlx5_flow_item_vlan_update(flow->cur_verbs->attr,
877                                                    &eth);
878                 size = 0; /* Only an update is done in eth specification. */
879         }
880         flow->layers |= tunnel ?
881                 (MLX5_FLOW_LAYER_INNER_L2 | MLX5_FLOW_LAYER_INNER_VLAN) :
882                 (MLX5_FLOW_LAYER_OUTER_L2 | MLX5_FLOW_LAYER_OUTER_VLAN);
883         return size;
884 }
885
886 /**
887  * Convert the @p item into a Verbs specification after ensuring the NIC
888  * will understand and process it correctly.
889  * If the necessary size for the conversion is greater than the @p flow_size,
890  * nothing is written in @p flow, the validation is still performed.
891  *
892  * @param[in] item
893  *   Item specification.
894  * @param[in, out] flow
895  *   Pointer to flow structure.
896  * @param[in] flow_size
897  *   Size in bytes of the available space in @p flow, if too small, nothing is
898  *   written.
899  *
900  * @return
901  *   On success the number of bytes consumed/necessary, if the returned value
902  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
903  *   otherwise another call with this returned memory size should be done.
904  */
905 static int
906 mlx5_flow_item_ipv4(const struct rte_flow_item *item, struct rte_flow *flow,
907                     const size_t flow_size)
908 {
909         const struct rte_flow_item_ipv4 *spec = item->spec;
910         const struct rte_flow_item_ipv4 *mask = item->mask;
911         const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
912         unsigned int size = sizeof(struct ibv_flow_spec_ipv4_ext);
913         struct ibv_flow_spec_ipv4_ext ipv4 = {
914                 .type = IBV_FLOW_SPEC_IPV4_EXT |
915                         (tunnel ? IBV_FLOW_SPEC_INNER : 0),
916                 .size = size,
917         };
918
919         if (!mask)
920                 mask = &rte_flow_item_ipv4_mask;
921         flow->layers |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
922                 MLX5_FLOW_LAYER_OUTER_L3_IPV4;
923         if (spec) {
924                 ipv4.val = (struct ibv_flow_ipv4_ext_filter){
925                         .src_ip = spec->hdr.src_addr,
926                         .dst_ip = spec->hdr.dst_addr,
927                         .proto = spec->hdr.next_proto_id,
928                         .tos = spec->hdr.type_of_service,
929                 };
930                 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
931                         .src_ip = mask->hdr.src_addr,
932                         .dst_ip = mask->hdr.dst_addr,
933                         .proto = mask->hdr.next_proto_id,
934                         .tos = mask->hdr.type_of_service,
935                 };
936                 /* Remove unwanted bits from values. */
937                 ipv4.val.src_ip &= ipv4.mask.src_ip;
938                 ipv4.val.dst_ip &= ipv4.mask.dst_ip;
939                 ipv4.val.proto &= ipv4.mask.proto;
940                 ipv4.val.tos &= ipv4.mask.tos;
941         }
942         if (size <= flow_size) {
943                 mlx5_flow_verbs_hashfields_adjust
944                         (flow, tunnel,
945                          (ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
946                           ETH_RSS_NONFRAG_IPV4_TCP |
947                           ETH_RSS_NONFRAG_IPV4_UDP |
948                           ETH_RSS_NONFRAG_IPV4_OTHER),
949                          (IBV_RX_HASH_SRC_IPV4 | IBV_RX_HASH_DST_IPV4));
950                 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L3;
951                 mlx5_flow_spec_verbs_add(flow, &ipv4, size);
952         }
953         return size;
954 }
955
956 /**
957  * Convert the @p item into a Verbs specification after ensuring the NIC
958  * will understand and process it correctly.
959  * If the necessary size for the conversion is greater than the @p flow_size,
960  * nothing is written in @p flow, the validation is still performed.
961  *
962  * @param[in] item
963  *   Item specification.
964  * @param[in, out] flow
965  *   Pointer to flow structure.
966  * @param[in] flow_size
967  *   Size in bytes of the available space in @p flow, if too small, nothing is
968  *   written.
969  *
970  * @return
971  *   On success the number of bytes consumed/necessary, if the returned value
972  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
973  *   otherwise another call with this returned memory size should be done.
974  */
975 static int
976 mlx5_flow_item_ipv6(const struct rte_flow_item *item, struct rte_flow *flow,
977                     const size_t flow_size)
978 {
979         const struct rte_flow_item_ipv6 *spec = item->spec;
980         const struct rte_flow_item_ipv6 *mask = item->mask;
981         const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
982         unsigned int size = sizeof(struct ibv_flow_spec_ipv6);
983         struct ibv_flow_spec_ipv6 ipv6 = {
984                 .type = IBV_FLOW_SPEC_IPV6 | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
985                 .size = size,
986         };
987
988         if (!mask)
989                 mask = &rte_flow_item_ipv6_mask;
990         flow->layers |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
991                 MLX5_FLOW_LAYER_OUTER_L3_IPV6;
992         if (spec) {
993                 unsigned int i;
994                 uint32_t vtc_flow_val;
995                 uint32_t vtc_flow_mask;
996
997                 memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
998                        RTE_DIM(ipv6.val.src_ip));
999                 memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
1000                        RTE_DIM(ipv6.val.dst_ip));
1001                 memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
1002                        RTE_DIM(ipv6.mask.src_ip));
1003                 memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
1004                        RTE_DIM(ipv6.mask.dst_ip));
1005                 vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
1006                 vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
1007                 ipv6.val.flow_label =
1008                         rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >>
1009                                          IPV6_HDR_FL_SHIFT);
1010                 ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >>
1011                                          IPV6_HDR_TC_SHIFT;
1012                 ipv6.val.next_hdr = spec->hdr.proto;
1013                 ipv6.val.hop_limit = spec->hdr.hop_limits;
1014                 ipv6.mask.flow_label =
1015                         rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >>
1016                                          IPV6_HDR_FL_SHIFT);
1017                 ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >>
1018                                           IPV6_HDR_TC_SHIFT;
1019                 ipv6.mask.next_hdr = mask->hdr.proto;
1020                 ipv6.mask.hop_limit = mask->hdr.hop_limits;
1021                 /* Remove unwanted bits from values. */
1022                 for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
1023                         ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
1024                         ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
1025                 }
1026                 ipv6.val.flow_label &= ipv6.mask.flow_label;
1027                 ipv6.val.traffic_class &= ipv6.mask.traffic_class;
1028                 ipv6.val.next_hdr &= ipv6.mask.next_hdr;
1029                 ipv6.val.hop_limit &= ipv6.mask.hop_limit;
1030         }
1031         if (size <= flow_size) {
1032                 mlx5_flow_verbs_hashfields_adjust
1033                         (flow, tunnel,
1034                          (ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 |
1035                           ETH_RSS_NONFRAG_IPV6_TCP | ETH_RSS_NONFRAG_IPV6_UDP |
1036                           ETH_RSS_NONFRAG_IPV6_OTHER | ETH_RSS_IPV6_EX |
1037                           ETH_RSS_IPV6_TCP_EX | ETH_RSS_IPV6_UDP_EX),
1038                          (IBV_RX_HASH_SRC_IPV6 | IBV_RX_HASH_DST_IPV6));
1039                 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L3;
1040                 mlx5_flow_spec_verbs_add(flow, &ipv6, size);
1041         }
1042         return size;
1043 }
1044
1045 /**
1046  * Convert the @p item into a Verbs specification after ensuring the NIC
1047  * will understand and process it correctly.
1048  * If the necessary size for the conversion is greater than the @p flow_size,
1049  * nothing is written in @p flow, the validation is still performed.
1050  *
1051  * @param[in] item
1052  *   Item specification.
1053  * @param[in, out] flow
1054  *   Pointer to flow structure.
1055  * @param[in] flow_size
1056  *   Size in bytes of the available space in @p flow, if too small, nothing is
1057  *   written.
1058  *
1059  * @return
1060  *   On success the number of bytes consumed/necessary, if the returned value
1061  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
1062  *   otherwise another call with this returned memory size should be done.
1063  */
1064 static int
1065 mlx5_flow_item_udp(const struct rte_flow_item *item, struct rte_flow *flow,
1066                    const size_t flow_size)
1067 {
1068         const struct rte_flow_item_udp *spec = item->spec;
1069         const struct rte_flow_item_udp *mask = item->mask;
1070         const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
1071         unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
1072         struct ibv_flow_spec_tcp_udp udp = {
1073                 .type = IBV_FLOW_SPEC_UDP | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
1074                 .size = size,
1075         };
1076
1077         if (!mask)
1078                 mask = &rte_flow_item_udp_mask;
1079         flow->layers |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP :
1080                 MLX5_FLOW_LAYER_OUTER_L4_UDP;
1081         if (spec) {
1082                 udp.val.dst_port = spec->hdr.dst_port;
1083                 udp.val.src_port = spec->hdr.src_port;
1084                 udp.mask.dst_port = mask->hdr.dst_port;
1085                 udp.mask.src_port = mask->hdr.src_port;
1086                 /* Remove unwanted bits from values. */
1087                 udp.val.src_port &= udp.mask.src_port;
1088                 udp.val.dst_port &= udp.mask.dst_port;
1089         }
1090         if (size <= flow_size) {
1091                 mlx5_flow_verbs_hashfields_adjust(flow, tunnel, ETH_RSS_UDP,
1092                                                   (IBV_RX_HASH_SRC_PORT_UDP |
1093                                                    IBV_RX_HASH_DST_PORT_UDP));
1094                 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L4;
1095                 mlx5_flow_spec_verbs_add(flow, &udp, size);
1096         }
1097         return size;
1098 }
1099
1100 /**
1101  * Convert the @p item into a Verbs specification after ensuring the NIC
1102  * will understand and process it correctly.
1103  * If the necessary size for the conversion is greater than the @p flow_size,
1104  * nothing is written in @p flow, the validation is still performed.
1105  *
1106  * @param[in] item
1107  *   Item specification.
1108  * @param[in, out] flow
1109  *   Pointer to flow structure.
1110  * @param[in] flow_size
1111  *   Size in bytes of the available space in @p flow, if too small, nothing is
1112  *   written.
1113  * @param[out] error
1114  *   Pointer to error structure.
1115  *
1116  * @return
1117  *   On success the number of bytes consumed/necessary, if the returned value
1118  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
1119  *   otherwise another call with this returned memory size should be done.
1120  */
1121 static int
1122 mlx5_flow_item_tcp(const struct rte_flow_item *item, struct rte_flow *flow,
1123                    const size_t flow_size)
1124 {
1125         const struct rte_flow_item_tcp *spec = item->spec;
1126         const struct rte_flow_item_tcp *mask = item->mask;
1127         const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
1128         unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
1129         struct ibv_flow_spec_tcp_udp tcp = {
1130                 .type = IBV_FLOW_SPEC_TCP | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
1131                 .size = size,
1132         };
1133
1134         if (!mask)
1135                 mask = &rte_flow_item_tcp_mask;
1136         flow->layers |=  tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP :
1137                 MLX5_FLOW_LAYER_OUTER_L4_TCP;
1138         if (spec) {
1139                 tcp.val.dst_port = spec->hdr.dst_port;
1140                 tcp.val.src_port = spec->hdr.src_port;
1141                 tcp.mask.dst_port = mask->hdr.dst_port;
1142                 tcp.mask.src_port = mask->hdr.src_port;
1143                 /* Remove unwanted bits from values. */
1144                 tcp.val.src_port &= tcp.mask.src_port;
1145                 tcp.val.dst_port &= tcp.mask.dst_port;
1146         }
1147         if (size <= flow_size) {
1148                 mlx5_flow_verbs_hashfields_adjust(flow, tunnel, ETH_RSS_TCP,
1149                                                   (IBV_RX_HASH_SRC_PORT_TCP |
1150                                                    IBV_RX_HASH_DST_PORT_TCP));
1151                 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L4;
1152                 mlx5_flow_spec_verbs_add(flow, &tcp, size);
1153         }
1154         return size;
1155 }
1156
1157 /**
1158  * Convert the @p item into a Verbs specification after ensuring the NIC
1159  * will understand and process it correctly.
1160  * If the necessary size for the conversion is greater than the @p flow_size,
1161  * nothing is written in @p flow, the validation is still performed.
1162  *
1163  * @param[in] item
1164  *   Item specification.
1165  * @param[in, out] flow
1166  *   Pointer to flow structure.
1167  * @param[in] flow_size
1168  *   Size in bytes of the available space in @p flow, if too small, nothing is
1169  *   written.
1170  * @param[out] error
1171  *   Pointer to error structure.
1172  *
1173  * @return
1174  *   On success the number of bytes consumed/necessary, if the returned value
1175  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
1176  *   otherwise another call with this returned memory size should be done.
1177  */
1178 static int
1179 mlx5_flow_item_vxlan(const struct rte_flow_item *item, struct rte_flow *flow,
1180                      const size_t flow_size)
1181 {
1182         const struct rte_flow_item_vxlan *spec = item->spec;
1183         const struct rte_flow_item_vxlan *mask = item->mask;
1184         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1185         struct ibv_flow_spec_tunnel vxlan = {
1186                 .type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
1187                 .size = size,
1188         };
1189         union vni {
1190                 uint32_t vlan_id;
1191                 uint8_t vni[4];
1192         } id = { .vlan_id = 0, };
1193
1194         if (!mask)
1195                 mask = &rte_flow_item_vxlan_mask;
1196         if (spec) {
1197                 memcpy(&id.vni[1], spec->vni, 3);
1198                 vxlan.val.tunnel_id = id.vlan_id;
1199                 memcpy(&id.vni[1], mask->vni, 3);
1200                 vxlan.mask.tunnel_id = id.vlan_id;
1201                 /* Remove unwanted bits from values. */
1202                 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
1203         }
1204         if (size <= flow_size) {
1205                 mlx5_flow_spec_verbs_add(flow, &vxlan, size);
1206                 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2;
1207         }
1208         flow->layers |= MLX5_FLOW_LAYER_VXLAN;
1209         return size;
1210 }
1211
1212 /**
1213  * Convert the @p item into a Verbs specification after ensuring the NIC
1214  * will understand and process it correctly.
1215  * If the necessary size for the conversion is greater than the @p flow_size,
1216  * nothing is written in @p flow, the validation is still performed.
1217  *
1218  * @param[in] item
1219  *   Item specification.
1220  * @param[in, out] flow
1221  *   Pointer to flow structure.
1222  * @param[in] flow_size
1223  *   Size in bytes of the available space in @p flow, if too small, nothing is
1224  *   written.
1225  * @param[out] error
1226  *   Pointer to error structure.
1227  *
1228  * @return
1229  *   On success the number of bytes consumed/necessary, if the returned value
1230  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
1231  *   otherwise another call with this returned memory size should be done.
1232  */
1233 static int
1234 mlx5_flow_item_vxlan_gpe(const struct rte_flow_item *item,
1235                          struct rte_flow *flow, const size_t flow_size)
1236 {
1237         const struct rte_flow_item_vxlan_gpe *spec = item->spec;
1238         const struct rte_flow_item_vxlan_gpe *mask = item->mask;
1239         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1240         struct ibv_flow_spec_tunnel vxlan_gpe = {
1241                 .type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
1242                 .size = size,
1243         };
1244         union vni {
1245                 uint32_t vlan_id;
1246                 uint8_t vni[4];
1247         } id = { .vlan_id = 0, };
1248
1249         if (!mask)
1250                 mask = &rte_flow_item_vxlan_gpe_mask;
1251         if (spec) {
1252                 memcpy(&id.vni[1], spec->vni, 3);
1253                 vxlan_gpe.val.tunnel_id = id.vlan_id;
1254                 memcpy(&id.vni[1], mask->vni, 3);
1255                 vxlan_gpe.mask.tunnel_id = id.vlan_id;
1256                 /* Remove unwanted bits from values. */
1257                 vxlan_gpe.val.tunnel_id &= vxlan_gpe.mask.tunnel_id;
1258         }
1259         if (size <= flow_size) {
1260                 mlx5_flow_spec_verbs_add(flow, &vxlan_gpe, size);
1261                 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2;
1262         }
1263         flow->layers |= MLX5_FLOW_LAYER_VXLAN_GPE;
1264         return size;
1265 }
1266
1267 /**
1268  * Update the protocol in Verbs IPv4/IPv6 spec.
1269  *
1270  * @param[in, out] attr
1271  *   Pointer to Verbs attributes structure.
1272  * @param[in] search
1273  *   Specification type to search in order to update the IP protocol.
1274  * @param[in] protocol
1275  *   Protocol value to set if none is present in the specification.
1276  */
1277 static void
1278 mlx5_flow_item_gre_ip_protocol_update(struct ibv_flow_attr *attr,
1279                                       enum ibv_flow_spec_type search,
1280                                       uint8_t protocol)
1281 {
1282         unsigned int i;
1283         struct ibv_spec_header *hdr = (struct ibv_spec_header *)
1284                 ((uint8_t *)attr + sizeof(struct ibv_flow_attr));
1285
1286         if (!attr)
1287                 return;
1288         for (i = 0; i != attr->num_of_specs; ++i) {
1289                 if (hdr->type == search) {
1290                         union {
1291                                 struct ibv_flow_spec_ipv4_ext *ipv4;
1292                                 struct ibv_flow_spec_ipv6 *ipv6;
1293                         } ip;
1294
1295                         switch (search) {
1296                         case IBV_FLOW_SPEC_IPV4_EXT:
1297                                 ip.ipv4 = (struct ibv_flow_spec_ipv4_ext *)hdr;
1298                                 if (!ip.ipv4->val.proto) {
1299                                         ip.ipv4->val.proto = protocol;
1300                                         ip.ipv4->mask.proto = 0xff;
1301                                 }
1302                                 break;
1303                         case IBV_FLOW_SPEC_IPV6:
1304                                 ip.ipv6 = (struct ibv_flow_spec_ipv6 *)hdr;
1305                                 if (!ip.ipv6->val.next_hdr) {
1306                                         ip.ipv6->val.next_hdr = protocol;
1307                                         ip.ipv6->mask.next_hdr = 0xff;
1308                                 }
1309                                 break;
1310                         default:
1311                                 break;
1312                         }
1313                         break;
1314                 }
1315                 hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size);
1316         }
1317 }
1318
1319 /**
1320  * Convert the @p item into a Verbs specification after ensuring the NIC
1321  * will understand and process it correctly.
1322  * It will also update the previous L3 layer with the protocol value matching
1323  * the GRE.
1324  * If the necessary size for the conversion is greater than the @p flow_size,
1325  * nothing is written in @p flow, the validation is still performed.
1326  *
1327  * @param dev
1328  *   Pointer to Ethernet device.
1329  * @param[in] item
1330  *   Item specification.
1331  * @param[in, out] flow
1332  *   Pointer to flow structure.
1333  * @param[in] flow_size
1334  *   Size in bytes of the available space in @p flow, if too small, nothing is
1335  *   written.
1336  *
1337  * @return
1338  *   On success the number of bytes consumed/necessary, if the returned value
1339  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
1340  *   otherwise another call with this returned memory size should be done.
1341  */
1342 static int
1343 mlx5_flow_item_gre(const struct rte_flow_item *item __rte_unused,
1344                    struct rte_flow *flow, const size_t flow_size)
1345 {
1346         struct mlx5_flow_verbs *verbs = flow->cur_verbs;
1347 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
1348         const struct rte_flow_item_gre *spec = item->spec;
1349         const struct rte_flow_item_gre *mask = item->mask;
1350         unsigned int size = sizeof(struct ibv_flow_spec_gre);
1351         struct ibv_flow_spec_gre tunnel = {
1352                 .type = IBV_FLOW_SPEC_GRE,
1353                 .size = size,
1354         };
1355 #else
1356         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1357         struct ibv_flow_spec_tunnel tunnel = {
1358                 .type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
1359                 .size = size,
1360         };
1361 #endif
1362
1363 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
1364         if (!mask)
1365                 mask = &rte_flow_item_gre_mask;
1366         if (spec) {
1367                 tunnel.val.c_ks_res0_ver = spec->c_rsvd0_ver;
1368                 tunnel.val.protocol = spec->protocol;
1369                 tunnel.mask.c_ks_res0_ver = mask->c_rsvd0_ver;
1370                 tunnel.mask.protocol = mask->protocol;
1371                 /* Remove unwanted bits from values. */
1372                 tunnel.val.c_ks_res0_ver &= tunnel.mask.c_ks_res0_ver;
1373                 tunnel.val.protocol &= tunnel.mask.protocol;
1374                 tunnel.val.key &= tunnel.mask.key;
1375         }
1376 #else
1377 #endif /* !HAVE_IBV_DEVICE_MPLS_SUPPORT */
1378         if (size <= flow_size) {
1379                 if (flow->layers & MLX5_FLOW_LAYER_OUTER_L3_IPV4)
1380                         mlx5_flow_item_gre_ip_protocol_update
1381                                 (verbs->attr, IBV_FLOW_SPEC_IPV4_EXT,
1382                                  MLX5_IP_PROTOCOL_GRE);
1383                 else
1384                         mlx5_flow_item_gre_ip_protocol_update
1385                                 (verbs->attr, IBV_FLOW_SPEC_IPV6,
1386                                  MLX5_IP_PROTOCOL_GRE);
1387                 mlx5_flow_spec_verbs_add(flow, &tunnel, size);
1388                 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2;
1389         }
1390         flow->layers |= MLX5_FLOW_LAYER_GRE;
1391         return size;
1392 }
1393
1394 /**
1395  * Convert the @p item into a Verbs specification after ensuring the NIC
1396  * will understand and process it correctly.
1397  * If the necessary size for the conversion is greater than the @p flow_size,
1398  * nothing is written in @p flow, the validation is still performed.
1399  *
1400  * @param[in] item
1401  *   Item specification.
1402  * @param[in, out] flow
1403  *   Pointer to flow structure.
1404  * @param[in] flow_size
1405  *   Size in bytes of the available space in @p flow, if too small, nothing is
1406  *   written.
1407  * @param[out] error
1408  *   Pointer to error structure.
1409  *
1410  * @return
1411  *   On success the number of bytes consumed/necessary, if the returned value
1412  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
1413  *   otherwise another call with this returned memory size should be done.
1414  *   On error, a negative errno value is returned and rte_errno is set.
1415  */
1416 static int
1417 mlx5_flow_item_mpls(const struct rte_flow_item *item __rte_unused,
1418                     struct rte_flow *flow __rte_unused,
1419                     const size_t flow_size __rte_unused,
1420                     struct rte_flow_error *error)
1421 {
1422 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
1423         const struct rte_flow_item_mpls *spec = item->spec;
1424         const struct rte_flow_item_mpls *mask = item->mask;
1425         unsigned int size = sizeof(struct ibv_flow_spec_mpls);
1426         struct ibv_flow_spec_mpls mpls = {
1427                 .type = IBV_FLOW_SPEC_MPLS,
1428                 .size = size,
1429         };
1430
1431         if (!mask)
1432                 mask = &rte_flow_item_mpls_mask;
1433         if (spec) {
1434                 memcpy(&mpls.val.label, spec, sizeof(mpls.val.label));
1435                 memcpy(&mpls.mask.label, mask, sizeof(mpls.mask.label));
1436                 /* Remove unwanted bits from values.  */
1437                 mpls.val.label &= mpls.mask.label;
1438         }
1439         if (size <= flow_size) {
1440                 mlx5_flow_spec_verbs_add(flow, &mpls, size);
1441                 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2;
1442         }
1443         flow->layers |= MLX5_FLOW_LAYER_MPLS;
1444         return size;
1445 #endif /* !HAVE_IBV_DEVICE_MPLS_SUPPORT */
1446         return rte_flow_error_set(error, ENOTSUP,
1447                                   RTE_FLOW_ERROR_TYPE_ITEM,
1448                                   item,
1449                                   "MPLS is not supported by Verbs, please"
1450                                   " update.");
1451 }
1452
1453 /**
1454  * Convert the @p pattern into a Verbs specifications after ensuring the NIC
1455  * will understand and process it correctly.
1456  * The conversion is performed item per item, each of them is written into
1457  * the @p flow if its size is lesser or equal to @p flow_size.
1458  * Validation and memory consumption computation are still performed until the
1459  * end of @p pattern, unless an error is encountered.
1460  *
1461  * @param[in] pattern
1462  *   Flow pattern.
1463  * @param[in, out] flow
1464  *   Pointer to the rte_flow structure.
1465  * @param[in] flow_size
1466  *   Size in bytes of the available space in @p flow, if too small some
1467  *   garbage may be present.
1468  * @param[out] error
1469  *   Pointer to error structure.
1470  *
1471  * @return
1472  *   On success the number of bytes consumed/necessary, if the returned value
1473  *   is lesser or equal to @p flow_size, the @pattern  has fully been
1474  *   converted, otherwise another call with this returned memory size should
1475  *   be done.
1476  *   On error, a negative errno value is returned and rte_errno is set.
1477  */
1478 static int
1479 mlx5_flow_items(const struct rte_flow_item pattern[],
1480                 struct rte_flow *flow, const size_t flow_size,
1481                 struct rte_flow_error *error)
1482 {
1483         int remain = flow_size;
1484         size_t size = 0;
1485
1486         for (; pattern->type != RTE_FLOW_ITEM_TYPE_END; pattern++) {
1487                 int ret = 0;
1488
1489                 switch (pattern->type) {
1490                 case RTE_FLOW_ITEM_TYPE_VOID:
1491                         break;
1492                 case RTE_FLOW_ITEM_TYPE_ETH:
1493                         ret = mlx5_flow_item_eth(pattern, flow, remain);
1494                         break;
1495                 case RTE_FLOW_ITEM_TYPE_VLAN:
1496                         ret = mlx5_flow_item_vlan(pattern, flow, remain);
1497                         break;
1498                 case RTE_FLOW_ITEM_TYPE_IPV4:
1499                         ret = mlx5_flow_item_ipv4(pattern, flow, remain);
1500                         break;
1501                 case RTE_FLOW_ITEM_TYPE_IPV6:
1502                         ret = mlx5_flow_item_ipv6(pattern, flow, remain);
1503                         break;
1504                 case RTE_FLOW_ITEM_TYPE_UDP:
1505                         ret = mlx5_flow_item_udp(pattern, flow, remain);
1506                         break;
1507                 case RTE_FLOW_ITEM_TYPE_TCP:
1508                         ret = mlx5_flow_item_tcp(pattern, flow, remain);
1509                         break;
1510                 case RTE_FLOW_ITEM_TYPE_VXLAN:
1511                         ret = mlx5_flow_item_vxlan(pattern, flow, remain);
1512                         break;
1513                 case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
1514                         ret = mlx5_flow_item_vxlan_gpe(pattern, flow,
1515                                                        remain);
1516                         break;
1517                 case RTE_FLOW_ITEM_TYPE_GRE:
1518                         ret = mlx5_flow_item_gre(pattern, flow, remain);
1519                         break;
1520                 case RTE_FLOW_ITEM_TYPE_MPLS:
1521                         ret = mlx5_flow_item_mpls(pattern, flow, remain, error);
1522                         break;
1523                 default:
1524                         return rte_flow_error_set(error, ENOTSUP,
1525                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1526                                                   pattern,
1527                                                   "item not supported");
1528                 }
1529                 if (ret < 0)
1530                         return ret;
1531                 if (remain > ret)
1532                         remain -= ret;
1533                 else
1534                         remain = 0;
1535                 size += ret;
1536         }
1537         if (!flow->layers) {
1538                 const struct rte_flow_item item = {
1539                         .type = RTE_FLOW_ITEM_TYPE_ETH,
1540                 };
1541
1542                 return mlx5_flow_item_eth(&item, flow, flow_size);
1543         }
1544         return size;
1545 }
1546
1547 /**
1548  * Convert the @p action into a Verbs specification after ensuring the NIC
1549  * will understand and process it correctly.
1550  * If the necessary size for the conversion is greater than the @p flow_size,
1551  * nothing is written in @p flow, the validation is still performed.
1552  *
1553  * @param[in, out] flow
1554  *   Pointer to flow structure.
1555  * @param[in] flow_size
1556  *   Size in bytes of the available space in @p flow, if too small, nothing is
1557  *   written.
1558  *
1559  * @return
1560  *   On success the number of bytes consumed/necessary, if the returned value
1561  *   is lesser or equal to @p flow_size, the @p action has fully been
1562  *   converted, otherwise another call with this returned memory size should
1563  *   be done.
1564  *   On error, a negative errno value is returned and rte_errno is set.
1565  */
1566 static int
1567 mlx5_flow_action_drop(struct rte_flow *flow, const size_t flow_size)
1568 {
1569         unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
1570         struct ibv_flow_spec_action_drop drop = {
1571                         .type = IBV_FLOW_SPEC_ACTION_DROP,
1572                         .size = size,
1573         };
1574
1575         if (size < flow_size)
1576                 mlx5_flow_spec_verbs_add(flow, &drop, size);
1577         flow->fate |= MLX5_FLOW_FATE_DROP;
1578         return size;
1579 }
1580
1581 /**
1582  * Convert the @p action into @p flow after ensuring the NIC will understand
1583  * and process it correctly.
1584  *
1585  * @param[in] action
1586  *   Action configuration.
1587  * @param[in, out] flow
1588  *   Pointer to flow structure.
1589  *
1590  * @return
1591  *   0 on success, a negative errno value otherwise and rte_errno is set.
1592  */
1593 static int
1594 mlx5_flow_action_queue(const struct rte_flow_action *action,
1595                        struct rte_flow *flow)
1596 {
1597         const struct rte_flow_action_queue *queue = action->conf;
1598
1599         if (flow->queue)
1600                 (*flow->queue)[0] = queue->index;
1601         flow->rss.queue_num = 1;
1602         flow->fate |= MLX5_FLOW_FATE_QUEUE;
1603         return 0;
1604 }
1605
1606 /**
1607  * Ensure the @p action will be understood and used correctly by the  NIC.
1608  *
1609  * @param[in] action
1610  *   Action configuration.
1611  * @param flow[in, out]
1612  *   Pointer to the rte_flow structure.
1613  *
1614  * @return
1615  *   0 On success.
1616  */
1617 static int
1618 mlx5_flow_action_rss(const struct rte_flow_action *action,
1619                         struct rte_flow *flow)
1620 {
1621         const struct rte_flow_action_rss *rss = action->conf;
1622
1623         if (flow->queue)
1624                 memcpy((*flow->queue), rss->queue,
1625                        rss->queue_num * sizeof(uint16_t));
1626         flow->rss.queue_num = rss->queue_num;
1627         memcpy(flow->key, rss->key, MLX5_RSS_HASH_KEY_LEN);
1628         flow->rss.types = rss->types;
1629         flow->rss.level = rss->level;
1630         flow->fate |= MLX5_FLOW_FATE_RSS;
1631         return 0;
1632 }
1633
1634 /**
1635  * Convert the @p action into a Verbs specification after ensuring the NIC
1636  * will understand and process it correctly.
1637  * If the necessary size for the conversion is greater than the @p flow_size,
1638  * nothing is written in @p flow, the validation is still performed.
1639  *
1640  * @param[in, out] flow
1641  *   Pointer to flow structure.
1642  * @param[in] flow_size
1643  *   Size in bytes of the available space in @p flow, if too small, nothing is
1644  *   written.
1645  *
1646  * @return
1647  *   On success the number of bytes consumed/necessary, if the returned value
1648  *   is lesser or equal to @p flow_size, the @p action has fully been
1649  *   converted, otherwise another call with this returned memory size should
1650  *   be done.
1651  */
1652 static int
1653 mlx5_flow_action_flag(struct rte_flow *flow, const size_t flow_size)
1654 {
1655         unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1656         struct ibv_flow_spec_action_tag tag = {
1657                 .type = IBV_FLOW_SPEC_ACTION_TAG,
1658                 .size = size,
1659                 .tag_id = mlx5_flow_mark_set(MLX5_FLOW_MARK_DEFAULT),
1660         };
1661         struct mlx5_flow_verbs *verbs = flow->cur_verbs;
1662
1663         if (flow->modifier & MLX5_FLOW_MOD_MARK)
1664                 size = 0;
1665         else if (size <= flow_size && verbs)
1666                 mlx5_flow_spec_verbs_add(flow, &tag, size);
1667         flow->modifier |= MLX5_FLOW_MOD_FLAG;
1668         return size;
1669 }
1670
1671 /**
1672  * Update verbs specification to modify the flag to mark.
1673  *
1674  * @param[in, out] verbs
1675  *   Pointer to the mlx5_flow_verbs structure.
1676  * @param[in] mark_id
1677  *   Mark identifier to replace the flag.
1678  */
1679 static void
1680 mlx5_flow_verbs_mark_update(struct mlx5_flow_verbs *verbs, uint32_t mark_id)
1681 {
1682         struct ibv_spec_header *hdr;
1683         int i;
1684
1685         if (!verbs)
1686                 return;
1687         /* Update Verbs specification. */
1688         hdr = (struct ibv_spec_header *)verbs->specs;
1689         if (!hdr)
1690                 return;
1691         for (i = 0; i != verbs->attr->num_of_specs; ++i) {
1692                 if (hdr->type == IBV_FLOW_SPEC_ACTION_TAG) {
1693                         struct ibv_flow_spec_action_tag *t =
1694                                 (struct ibv_flow_spec_action_tag *)hdr;
1695
1696                         t->tag_id = mlx5_flow_mark_set(mark_id);
1697                 }
1698                 hdr = (struct ibv_spec_header *)((uintptr_t)hdr + hdr->size);
1699         }
1700 }
1701
1702 /**
1703  * Convert the @p action into @p flow (or by updating the already present
1704  * Flag Verbs specification) after ensuring the NIC will understand and
1705  * process it correctly.
1706  * If the necessary size for the conversion is greater than the @p flow_size,
1707  * nothing is written in @p flow, the validation is still performed.
1708  *
1709  * @param[in] action
1710  *   Action configuration.
1711  * @param[in, out] flow
1712  *   Pointer to flow structure.
1713  * @param[in] flow_size
1714  *   Size in bytes of the available space in @p flow, if too small, nothing is
1715  *   written.
1716  *
1717  * @return
1718  *   On success the number of bytes consumed/necessary, if the returned value
1719  *   is lesser or equal to @p flow_size, the @p action has fully been
1720  *   converted, otherwise another call with this returned memory size should
1721  *   be done.
1722  */
1723 static int
1724 mlx5_flow_action_mark(const struct rte_flow_action *action,
1725                       struct rte_flow *flow, const size_t flow_size)
1726 {
1727         const struct rte_flow_action_mark *mark = action->conf;
1728         unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1729         struct ibv_flow_spec_action_tag tag = {
1730                 .type = IBV_FLOW_SPEC_ACTION_TAG,
1731                 .size = size,
1732         };
1733         struct mlx5_flow_verbs *verbs = flow->cur_verbs;
1734
1735         if (flow->modifier & MLX5_FLOW_MOD_FLAG) {
1736                 mlx5_flow_verbs_mark_update(verbs, mark->id);
1737                 size = 0;
1738         } else if (size <= flow_size) {
1739                 tag.tag_id = mlx5_flow_mark_set(mark->id);
1740                 mlx5_flow_spec_verbs_add(flow, &tag, size);
1741         }
1742         flow->modifier |= MLX5_FLOW_MOD_MARK;
1743         return size;
1744 }
1745
1746 /**
1747  * Convert the @p action into a Verbs specification after ensuring the NIC
1748  * will understand and process it correctly.
1749  * If the necessary size for the conversion is greater than the @p flow_size,
1750  * nothing is written in @p flow, the validation is still performed.
1751  *
1752  * @param action[in]
1753  *   Action configuration.
1754  * @param flow[in, out]
1755  *   Pointer to flow structure.
1756  * @param flow_size[in]
1757  *   Size in bytes of the available space in @p flow, if too small, nothing is
1758  *   written.
1759  * @param error[int, out]
1760  *   Pointer to error structure.
1761  *
1762  * @return
1763  *   On success the number of bytes consumed/necessary, if the returned value
1764  *   is lesser or equal to @p flow_size, the @p action has fully been
1765  *   converted, otherwise another call with this returned memory size should
1766  *   be done.
1767  *   On error, a negative errno value is returned and rte_errno is set.
1768  */
1769 static int
1770 mlx5_flow_action_count(struct rte_eth_dev *dev,
1771                        const struct rte_flow_action *action,
1772                        struct rte_flow *flow,
1773                        const size_t flow_size __rte_unused,
1774                        struct rte_flow_error *error)
1775 {
1776         const struct rte_flow_action_count *count = action->conf;
1777 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
1778         unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
1779         struct ibv_flow_spec_counter_action counter = {
1780                 .type = IBV_FLOW_SPEC_ACTION_COUNT,
1781                 .size = size,
1782         };
1783 #endif
1784
1785         if (!flow->counter) {
1786                 flow->counter = mlx5_flow_counter_new(dev, count->shared,
1787                                                       count->id);
1788                 if (!flow->counter)
1789                         return rte_flow_error_set(error, ENOTSUP,
1790                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1791                                                   action,
1792                                                   "cannot get counter"
1793                                                   " context.");
1794         }
1795         flow->modifier |= MLX5_FLOW_MOD_COUNT;
1796 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
1797         counter.counter_set_handle = flow->counter->cs->handle;
1798         if (size <= flow_size)
1799                 mlx5_flow_spec_verbs_add(flow, &counter, size);
1800         return size;
1801 #endif
1802         return 0;
1803 }
1804
1805 /**
1806  * Convert the @p action into @p flow after ensuring the NIC will understand
1807  * and process it correctly.
1808  * The conversion is performed action per action, each of them is written into
1809  * the @p flow if its size is lesser or equal to @p flow_size.
1810  * Validation and memory consumption computation are still performed until the
1811  * end of @p action, unless an error is encountered.
1812  *
1813  * @param[in] dev
1814  *   Pointer to Ethernet device structure.
1815  * @param[in] actions
1816  *   Pointer to flow actions array.
1817  * @param[in, out] flow
1818  *   Pointer to the rte_flow structure.
1819  * @param[in] flow_size
1820  *   Size in bytes of the available space in @p flow, if too small some
1821  *   garbage may be present.
1822  * @param[out] error
1823  *   Pointer to error structure.
1824  *
1825  * @return
1826  *   On success the number of bytes consumed/necessary, if the returned value
1827  *   is lesser or equal to @p flow_size, the @p actions has fully been
1828  *   converted, otherwise another call with this returned memory size should
1829  *   be done.
1830  *   On error, a negative errno value is returned and rte_errno is set.
1831  */
1832 static int
1833 mlx5_flow_actions(struct rte_eth_dev *dev,
1834                   const struct rte_flow_action actions[],
1835                   struct rte_flow *flow, const size_t flow_size,
1836                   struct rte_flow_error *error)
1837 {
1838         size_t size = 0;
1839         int remain = flow_size;
1840         int ret = 0;
1841
1842         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1843                 switch (actions->type) {
1844                 case RTE_FLOW_ACTION_TYPE_VOID:
1845                         break;
1846                 case RTE_FLOW_ACTION_TYPE_FLAG:
1847                         ret = mlx5_flow_action_flag(flow, remain);
1848                         break;
1849                 case RTE_FLOW_ACTION_TYPE_MARK:
1850                         ret = mlx5_flow_action_mark(actions, flow, remain);
1851                         break;
1852                 case RTE_FLOW_ACTION_TYPE_DROP:
1853                         ret = mlx5_flow_action_drop(flow, remain);
1854                         break;
1855                 case RTE_FLOW_ACTION_TYPE_QUEUE:
1856                         ret = mlx5_flow_action_queue(actions, flow);
1857                         break;
1858                 case RTE_FLOW_ACTION_TYPE_RSS:
1859                         ret = mlx5_flow_action_rss(actions, flow);
1860                         break;
1861                 case RTE_FLOW_ACTION_TYPE_COUNT:
1862                         ret = mlx5_flow_action_count(dev, actions, flow, remain,
1863                                                      error);
1864                         break;
1865                 default:
1866                         return rte_flow_error_set(error, ENOTSUP,
1867                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1868                                                   actions,
1869                                                   "action not supported");
1870                 }
1871                 if (ret < 0)
1872                         return ret;
1873                 if (remain > ret)
1874                         remain -= ret;
1875                 else
1876                         remain = 0;
1877                 size += ret;
1878         }
1879         if (!flow->fate)
1880                 return rte_flow_error_set(error, ENOTSUP,
1881                                           RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1882                                           NULL,
1883                                           "no fate action found");
1884         return size;
1885 }
1886
1887 /**
1888  * Validate flow rule and fill flow structure accordingly.
1889  *
1890  * @param dev
1891  *   Pointer to Ethernet device.
1892  * @param[out] flow
1893  *   Pointer to flow structure.
1894  * @param flow_size
1895  *   Size of allocated space for @p flow.
1896  * @param[in] attr
1897  *   Flow rule attributes.
1898  * @param[in] pattern
1899  *   Pattern specification (list terminated by the END pattern item).
1900  * @param[in] actions
1901  *   Associated actions (list terminated by the END action).
1902  * @param[out] error
1903  *   Perform verbose error reporting if not NULL.
1904  *
1905  * @return
1906  *   A positive value representing the size of the flow object in bytes
1907  *   regardless of @p flow_size on success, a negative errno value otherwise
1908  *   and rte_errno is set.
1909  */
1910 static int
1911 mlx5_flow_merge_switch(struct rte_eth_dev *dev,
1912                        struct rte_flow *flow,
1913                        size_t flow_size,
1914                        const struct rte_flow_attr *attr,
1915                        const struct rte_flow_item pattern[],
1916                        const struct rte_flow_action actions[],
1917                        struct rte_flow_error *error)
1918 {
1919         unsigned int n = mlx5_dev_to_port_id(dev->device, NULL, 0);
1920         uint16_t port_id[!n + n];
1921         struct mlx5_nl_flow_ptoi ptoi[!n + n + 1];
1922         size_t off = RTE_ALIGN_CEIL(sizeof(*flow), alignof(max_align_t));
1923         unsigned int i;
1924         unsigned int own = 0;
1925         int ret;
1926
1927         /* At least one port is needed when no switch domain is present. */
1928         if (!n) {
1929                 n = 1;
1930                 port_id[0] = dev->data->port_id;
1931         } else {
1932                 n = RTE_MIN(mlx5_dev_to_port_id(dev->device, port_id, n), n);
1933         }
1934         for (i = 0; i != n; ++i) {
1935                 struct rte_eth_dev_info dev_info;
1936
1937                 rte_eth_dev_info_get(port_id[i], &dev_info);
1938                 if (port_id[i] == dev->data->port_id)
1939                         own = i;
1940                 ptoi[i].port_id = port_id[i];
1941                 ptoi[i].ifindex = dev_info.if_index;
1942         }
1943         /* Ensure first entry of ptoi[] is the current device. */
1944         if (own) {
1945                 ptoi[n] = ptoi[0];
1946                 ptoi[0] = ptoi[own];
1947                 ptoi[own] = ptoi[n];
1948         }
1949         /* An entry with zero ifindex terminates ptoi[]. */
1950         ptoi[n].port_id = 0;
1951         ptoi[n].ifindex = 0;
1952         if (flow_size < off)
1953                 flow_size = 0;
1954         ret = mlx5_nl_flow_transpose((uint8_t *)flow + off,
1955                                      flow_size ? flow_size - off : 0,
1956                                      ptoi, attr, pattern, actions, error);
1957         if (ret < 0)
1958                 return ret;
1959         if (flow_size) {
1960                 *flow = (struct rte_flow){
1961                         .attributes = *attr,
1962                         .nl_flow = (uint8_t *)flow + off,
1963                 };
1964                 /*
1965                  * Generate a reasonably unique handle based on the address
1966                  * of the target buffer.
1967                  *
1968                  * This is straightforward on 32-bit systems where the flow
1969                  * pointer can be used directly. Otherwise, its least
1970                  * significant part is taken after shifting it by the
1971                  * previous power of two of the pointed buffer size.
1972                  */
1973                 if (sizeof(flow) <= 4)
1974                         mlx5_nl_flow_brand(flow->nl_flow, (uintptr_t)flow);
1975                 else
1976                         mlx5_nl_flow_brand
1977                                 (flow->nl_flow,
1978                                  (uintptr_t)flow >>
1979                                  rte_log2_u32(rte_align32prevpow2(flow_size)));
1980         }
1981         return off + ret;
1982 }
1983
1984 static unsigned int
1985 mlx5_find_graph_root(const struct rte_flow_item pattern[], uint32_t rss_level)
1986 {
1987         const struct rte_flow_item *item;
1988         unsigned int has_vlan = 0;
1989
1990         for (item = pattern; item->type != RTE_FLOW_ITEM_TYPE_END; item++) {
1991                 if (item->type == RTE_FLOW_ITEM_TYPE_VLAN) {
1992                         has_vlan = 1;
1993                         break;
1994                 }
1995         }
1996         if (has_vlan)
1997                 return rss_level < 2 ? MLX5_EXPANSION_ROOT_ETH_VLAN :
1998                                        MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN;
1999         return rss_level < 2 ? MLX5_EXPANSION_ROOT :
2000                                MLX5_EXPANSION_ROOT_OUTER;
2001 }
2002
2003 /**
2004  * Convert the @p attributes, @p pattern, @p action, into an flow for the NIC
2005  * after ensuring the NIC will understand and process it correctly.
2006  * The conversion is only performed item/action per item/action, each of
2007  * them is written into the @p flow if its size is lesser or equal to @p
2008  * flow_size.
2009  * Validation and memory consumption computation are still performed until the
2010  * end, unless an error is encountered.
2011  *
2012  * @param[in] dev
2013  *   Pointer to Ethernet device.
2014  * @param[in, out] flow
2015  *   Pointer to flow structure.
2016  * @param[in] flow_size
2017  *   Size in bytes of the available space in @p flow, if too small some
2018  *   garbage may be present.
2019  * @param[in] attributes
2020  *   Flow rule attributes.
2021  * @param[in] pattern
2022  *   Pattern specification (list terminated by the END pattern item).
2023  * @param[in] actions
2024  *   Associated actions (list terminated by the END action).
2025  * @param[out] error
2026  *   Perform verbose error reporting if not NULL.
2027  *
2028  * @return
2029  *   On success the number of bytes consumed/necessary, if the returned value
2030  *   is lesser or equal to @p flow_size, the flow has fully been converted and
2031  *   can be applied, otherwise another call with this returned memory size
2032  *   should be done.
2033  *   On error, a negative errno value is returned and rte_errno is set.
2034  */
2035 static int
2036 mlx5_flow_merge(struct rte_eth_dev *dev, struct rte_flow *flow,
2037                 const size_t flow_size,
2038                 const struct rte_flow_attr *attributes,
2039                 const struct rte_flow_item pattern[],
2040                 const struct rte_flow_action actions[],
2041                 struct rte_flow_error *error)
2042 {
2043         struct rte_flow local_flow = { .layers = 0, };
2044         size_t size = sizeof(*flow);
2045         union {
2046                 struct rte_flow_expand_rss buf;
2047                 uint8_t buffer[2048];
2048         } expand_buffer;
2049         struct rte_flow_expand_rss *buf = &expand_buffer.buf;
2050         struct mlx5_flow_verbs *original_verbs = NULL;
2051         size_t original_verbs_size = 0;
2052         uint32_t original_layers = 0;
2053         int expanded_pattern_idx = 0;
2054         int ret = 0;
2055         uint32_t i;
2056
2057         if (attributes->transfer)
2058                 return mlx5_flow_merge_switch(dev, flow, flow_size,
2059                                               attributes, pattern,
2060                                               actions, error);
2061         if (size > flow_size)
2062                 flow = &local_flow;
2063         ret = mlx5_flow_attributes(dev->data->dev_private, attributes, flow);
2064         if (ret < 0)
2065                 return ret;
2066         ret = mlx5_flow_actions(dev, actions, &local_flow, 0, error);
2067         if (ret < 0)
2068                 return ret;
2069         if (local_flow.rss.types) {
2070                 unsigned int graph_root;
2071
2072                 graph_root = mlx5_find_graph_root(pattern,
2073                                                   local_flow.rss.level);
2074                 ret = rte_flow_expand_rss(buf, sizeof(expand_buffer.buffer),
2075                                           pattern, local_flow.rss.types,
2076                                           mlx5_support_expansion,
2077                                           graph_root);
2078                 assert(ret > 0 &&
2079                        (unsigned int)ret < sizeof(expand_buffer.buffer));
2080         } else {
2081                 buf->entries = 1;
2082                 buf->entry[0].pattern = (void *)(uintptr_t)pattern;
2083         }
2084         size += RTE_ALIGN_CEIL(local_flow.rss.queue_num * sizeof(uint16_t),
2085                                sizeof(void *));
2086         if (size <= flow_size)
2087                 flow->queue = (void *)(flow + 1);
2088         LIST_INIT(&flow->verbs);
2089         flow->layers = 0;
2090         flow->modifier = 0;
2091         flow->fate = 0;
2092         for (i = 0; i != buf->entries; ++i) {
2093                 size_t off = size;
2094                 size_t off2;
2095
2096                 flow->layers = original_layers;
2097                 size += sizeof(struct ibv_flow_attr) +
2098                         sizeof(struct mlx5_flow_verbs);
2099                 off2 = size;
2100                 if (size < flow_size) {
2101                         flow->cur_verbs = (void *)((uintptr_t)flow + off);
2102                         flow->cur_verbs->attr = (void *)(flow->cur_verbs + 1);
2103                         flow->cur_verbs->specs =
2104                                 (void *)(flow->cur_verbs->attr + 1);
2105                 }
2106                 /* First iteration convert the pattern into Verbs. */
2107                 if (i == 0) {
2108                         /* Actions don't need to be converted several time. */
2109                         ret = mlx5_flow_actions(dev, actions, flow,
2110                                                 (size < flow_size) ?
2111                                                 flow_size - size : 0,
2112                                                 error);
2113                         if (ret < 0)
2114                                 return ret;
2115                         size += ret;
2116                 } else {
2117                         /*
2118                          * Next iteration means the pattern has already been
2119                          * converted and an expansion is necessary to match
2120                          * the user RSS request.  For that only the expanded
2121                          * items will be converted, the common part with the
2122                          * user pattern are just copied into the next buffer
2123                          * zone.
2124                          */
2125                         size += original_verbs_size;
2126                         if (size < flow_size) {
2127                                 rte_memcpy(flow->cur_verbs->attr,
2128                                            original_verbs->attr,
2129                                            original_verbs_size +
2130                                            sizeof(struct ibv_flow_attr));
2131                                 flow->cur_verbs->size = original_verbs_size;
2132                         }
2133                 }
2134                 ret = mlx5_flow_items
2135                         ((const struct rte_flow_item *)
2136                          &buf->entry[i].pattern[expanded_pattern_idx],
2137                          flow,
2138                          (size < flow_size) ? flow_size - size : 0, error);
2139                 if (ret < 0)
2140                         return ret;
2141                 size += ret;
2142                 if (size <= flow_size) {
2143                         mlx5_flow_adjust_priority(dev, flow);
2144                         LIST_INSERT_HEAD(&flow->verbs, flow->cur_verbs, next);
2145                 }
2146                 /*
2147                  * Keep a pointer of the first verbs conversion and the layers
2148                  * it has encountered.
2149                  */
2150                 if (i == 0) {
2151                         original_verbs = flow->cur_verbs;
2152                         original_verbs_size = size - off2;
2153                         original_layers = flow->layers;
2154                         /*
2155                          * move the index of the expanded pattern to the
2156                          * first item not addressed yet.
2157                          */
2158                         if (pattern->type == RTE_FLOW_ITEM_TYPE_END) {
2159                                 expanded_pattern_idx++;
2160                         } else {
2161                                 const struct rte_flow_item *item = pattern;
2162
2163                                 for (item = pattern;
2164                                      item->type != RTE_FLOW_ITEM_TYPE_END;
2165                                      ++item)
2166                                         expanded_pattern_idx++;
2167                         }
2168                 }
2169         }
2170         /* Restore the origin layers in the flow. */
2171         flow->layers = original_layers;
2172         return size;
2173 }
2174
2175 /**
2176  * Lookup and set the ptype in the data Rx part.  A single Ptype can be used,
2177  * if several tunnel rules are used on this queue, the tunnel ptype will be
2178  * cleared.
2179  *
2180  * @param rxq_ctrl
2181  *   Rx queue to update.
2182  */
2183 static void
2184 mlx5_flow_rxq_tunnel_ptype_update(struct mlx5_rxq_ctrl *rxq_ctrl)
2185 {
2186         unsigned int i;
2187         uint32_t tunnel_ptype = 0;
2188
2189         /* Look up for the ptype to use. */
2190         for (i = 0; i != MLX5_FLOW_TUNNEL; ++i) {
2191                 if (!rxq_ctrl->flow_tunnels_n[i])
2192                         continue;
2193                 if (!tunnel_ptype) {
2194                         tunnel_ptype = tunnels_info[i].ptype;
2195                 } else {
2196                         tunnel_ptype = 0;
2197                         break;
2198                 }
2199         }
2200         rxq_ctrl->rxq.tunnel = tunnel_ptype;
2201 }
2202
2203 /**
2204  * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) according to the flow.
2205  *
2206  * @param[in] dev
2207  *   Pointer to Ethernet device.
2208  * @param[in] flow
2209  *   Pointer to flow structure.
2210  */
2211 static void
2212 mlx5_flow_rxq_flags_set(struct rte_eth_dev *dev, struct rte_flow *flow)
2213 {
2214         struct priv *priv = dev->data->dev_private;
2215         const int mark = !!(flow->modifier &
2216                             (MLX5_FLOW_MOD_FLAG | MLX5_FLOW_MOD_MARK));
2217         const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
2218         unsigned int i;
2219
2220         for (i = 0; i != flow->rss.queue_num; ++i) {
2221                 int idx = (*flow->queue)[i];
2222                 struct mlx5_rxq_ctrl *rxq_ctrl =
2223                         container_of((*priv->rxqs)[idx],
2224                                      struct mlx5_rxq_ctrl, rxq);
2225
2226                 if (mark) {
2227                         rxq_ctrl->rxq.mark = 1;
2228                         rxq_ctrl->flow_mark_n++;
2229                 }
2230                 if (tunnel) {
2231                         unsigned int j;
2232
2233                         /* Increase the counter matching the flow. */
2234                         for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) {
2235                                 if ((tunnels_info[j].tunnel & flow->layers) ==
2236                                     tunnels_info[j].tunnel) {
2237                                         rxq_ctrl->flow_tunnels_n[j]++;
2238                                         break;
2239                                 }
2240                         }
2241                         mlx5_flow_rxq_tunnel_ptype_update(rxq_ctrl);
2242                 }
2243         }
2244 }
2245
2246 /**
2247  * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the
2248  * @p flow if no other flow uses it with the same kind of request.
2249  *
2250  * @param dev
2251  *   Pointer to Ethernet device.
2252  * @param[in] flow
2253  *   Pointer to the flow.
2254  */
2255 static void
2256 mlx5_flow_rxq_flags_trim(struct rte_eth_dev *dev, struct rte_flow *flow)
2257 {
2258         struct priv *priv = dev->data->dev_private;
2259         const int mark = !!(flow->modifier &
2260                             (MLX5_FLOW_MOD_FLAG | MLX5_FLOW_MOD_MARK));
2261         const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
2262         unsigned int i;
2263
2264         assert(dev->data->dev_started);
2265         for (i = 0; i != flow->rss.queue_num; ++i) {
2266                 int idx = (*flow->queue)[i];
2267                 struct mlx5_rxq_ctrl *rxq_ctrl =
2268                         container_of((*priv->rxqs)[idx],
2269                                      struct mlx5_rxq_ctrl, rxq);
2270
2271                 if (mark) {
2272                         rxq_ctrl->flow_mark_n--;
2273                         rxq_ctrl->rxq.mark = !!rxq_ctrl->flow_mark_n;
2274                 }
2275                 if (tunnel) {
2276                         unsigned int j;
2277
2278                         /* Decrease the counter matching the flow. */
2279                         for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) {
2280                                 if ((tunnels_info[j].tunnel & flow->layers) ==
2281                                     tunnels_info[j].tunnel) {
2282                                         rxq_ctrl->flow_tunnels_n[j]--;
2283                                         break;
2284                                 }
2285                         }
2286                         mlx5_flow_rxq_tunnel_ptype_update(rxq_ctrl);
2287                 }
2288         }
2289 }
2290
2291 /**
2292  * Clear the Mark/Flag and Tunnel ptype information in all Rx queues.
2293  *
2294  * @param dev
2295  *   Pointer to Ethernet device.
2296  */
2297 static void
2298 mlx5_flow_rxq_flags_clear(struct rte_eth_dev *dev)
2299 {
2300         struct priv *priv = dev->data->dev_private;
2301         unsigned int i;
2302
2303         for (i = 0; i != priv->rxqs_n; ++i) {
2304                 struct mlx5_rxq_ctrl *rxq_ctrl;
2305                 unsigned int j;
2306
2307                 if (!(*priv->rxqs)[i])
2308                         continue;
2309                 rxq_ctrl = container_of((*priv->rxqs)[i],
2310                                         struct mlx5_rxq_ctrl, rxq);
2311                 rxq_ctrl->flow_mark_n = 0;
2312                 rxq_ctrl->rxq.mark = 0;
2313                 for (j = 0; j != MLX5_FLOW_TUNNEL; ++j)
2314                         rxq_ctrl->flow_tunnels_n[j] = 0;
2315                 rxq_ctrl->rxq.tunnel = 0;
2316         }
2317 }
2318
2319 /*
2320  * Validate the flag action.
2321  *
2322  * @param[in] action_flags
2323  *   Bit-fields that holds the actions detected until now.
2324  * @param[out] error
2325  *   Pointer to error structure.
2326  *
2327  * @return
2328  *   0 on success, a negative errno value otherwise and rte_ernno is set.
2329  */
2330 static int
2331 mlx5_flow_validate_action_flag(uint64_t action_flags,
2332                                struct rte_flow_error *error)
2333 {
2334
2335         if (action_flags & MLX5_FLOW_ACTION_DROP)
2336                 return rte_flow_error_set(error, EINVAL,
2337                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
2338                                           "can't drop and flag in same flow");
2339         if (action_flags & MLX5_FLOW_ACTION_MARK)
2340                 return rte_flow_error_set(error, EINVAL,
2341                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
2342                                           "can't mark and flag in same flow");
2343         if (action_flags & MLX5_FLOW_ACTION_FLAG)
2344                 return rte_flow_error_set(error, EINVAL,
2345                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
2346                                           "can't have 2 flag"
2347                                           " actions in same flow");
2348         return 0;
2349 }
2350
2351 /*
2352  * Validate the mark action.
2353  *
2354  * @param[in] action
2355  *   Pointer to the queue action.
2356  * @param[in] action_flags
2357  *   Bit-fields that holds the actions detected until now.
2358  * @param[out] error
2359  *   Pointer to error structure.
2360  *
2361  * @return
2362  *   0 on success, a negative errno value otherwise and rte_errno is set.
2363  */
2364 static int
2365 mlx5_flow_validate_action_mark(const struct rte_flow_action *action,
2366                                uint64_t action_flags,
2367                                struct rte_flow_error *error)
2368 {
2369         const struct rte_flow_action_mark *mark = action->conf;
2370
2371         if (!mark)
2372                 return rte_flow_error_set(error, EINVAL,
2373                                           RTE_FLOW_ERROR_TYPE_ACTION,
2374                                           action,
2375                                           "configuration cannot be null");
2376         if (mark->id >= MLX5_FLOW_MARK_MAX)
2377                 return rte_flow_error_set(error, EINVAL,
2378                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
2379                                           &mark->id,
2380                                           "mark id must in 0 <= id < "
2381                                           RTE_STR(MLX5_FLOW_MARK_MAX));
2382         if (action_flags & MLX5_FLOW_ACTION_DROP)
2383                 return rte_flow_error_set(error, EINVAL,
2384                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
2385                                           "can't drop and mark in same flow");
2386         if (action_flags & MLX5_FLOW_ACTION_FLAG)
2387                 return rte_flow_error_set(error, EINVAL,
2388                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
2389                                           "can't flag and mark in same flow");
2390         if (action_flags & MLX5_FLOW_ACTION_MARK)
2391                 return rte_flow_error_set(error, EINVAL,
2392                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
2393                                           "can't have 2 mark actions in same"
2394                                           " flow");
2395         return 0;
2396 }
2397
2398 /*
2399  * Validate the drop action.
2400  *
2401  * @param[in] action_flags
2402  *   Bit-fields that holds the actions detected until now.
2403  * @param[out] error
2404  *   Pointer to error structure.
2405  *
2406  * @return
2407  *   0 on success, a negative errno value otherwise and rte_ernno is set.
2408  */
2409 static int
2410 mlx5_flow_validate_action_drop(uint64_t action_flags,
2411                                struct rte_flow_error *error)
2412 {
2413         if (action_flags & MLX5_FLOW_ACTION_FLAG)
2414                 return rte_flow_error_set(error, EINVAL,
2415                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
2416                                           "can't drop and flag in same flow");
2417         if (action_flags & MLX5_FLOW_ACTION_MARK)
2418                 return rte_flow_error_set(error, EINVAL,
2419                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
2420                                           "can't drop and mark in same flow");
2421         if (action_flags & MLX5_FLOW_FATE_ACTIONS)
2422                 return rte_flow_error_set(error, EINVAL,
2423                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
2424                                           "can't have 2 fate actions in"
2425                                           " same flow");
2426         return 0;
2427 }
2428
2429 /*
2430  *
2431  * Validate the queue action.
2432  *
2433  * @param[in] action
2434  *   Pointer to the queue action.
2435  * @param[in] action_flags
2436  *   Bit-fields that holds the actions detected until now.
2437  * @param[in] dev
2438  *   Pointer to the Ethernet device structure.
2439  * @param[out] error
2440  *   Pointer to error structure.
2441  *
2442  * @return
2443  *   0 on success, a negative errno value otherwise and rte_ernno is set.
2444  */
2445 static int
2446 mlx5_flow_validate_action_queue(const struct rte_flow_action *action,
2447                                 uint64_t action_flags,
2448                                 struct rte_eth_dev *dev,
2449                                 struct rte_flow_error *error)
2450 {
2451         struct priv *priv = dev->data->dev_private;
2452         const struct rte_flow_action_queue *queue = action->conf;
2453
2454         if (action_flags & MLX5_FLOW_FATE_ACTIONS)
2455                 return rte_flow_error_set(error, EINVAL,
2456                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
2457                                           "can't have 2 fate actions in"
2458                                           " same flow");
2459         if (queue->index >= priv->rxqs_n)
2460                 return rte_flow_error_set(error, EINVAL,
2461                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
2462                                           &queue->index,
2463                                           "queue index out of range");
2464         if (!(*priv->rxqs)[queue->index])
2465                 return rte_flow_error_set(error, EINVAL,
2466                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
2467                                           &queue->index,
2468                                           "queue is not configured");
2469         return 0;
2470 }
2471
2472 /*
2473  *
2474  * Validate the rss action.
2475  *
2476  * @param[in] action
2477  *   Pointer to the queue action.
2478  * @param[in] action_flags
2479  *   Bit-fields that holds the actions detected until now.
2480  * @param[in] dev
2481  *   Pointer to the Ethernet device structure.
2482  * @param[out] error
2483  *   Pointer to error structure.
2484  *
2485  * @return
2486  *   0 on success, a negative errno value otherwise and rte_ernno is set.
2487  */
2488 static int
2489 mlx5_flow_validate_action_rss(const struct rte_flow_action *action,
2490                               uint64_t action_flags,
2491                               struct rte_eth_dev *dev,
2492                               struct rte_flow_error *error)
2493 {
2494         struct priv *priv = dev->data->dev_private;
2495         const struct rte_flow_action_rss *rss = action->conf;
2496         unsigned int i;
2497
2498         if (action_flags & MLX5_FLOW_FATE_ACTIONS)
2499                 return rte_flow_error_set(error, EINVAL,
2500                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
2501                                           "can't have 2 fate actions"
2502                                           " in same flow");
2503         if (rss->func != RTE_ETH_HASH_FUNCTION_DEFAULT &&
2504             rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ)
2505                 return rte_flow_error_set(error, ENOTSUP,
2506                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
2507                                           &rss->func,
2508                                           "RSS hash function not supported");
2509 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
2510         if (rss->level > 2)
2511 #else
2512         if (rss->level > 1)
2513 #endif
2514                 return rte_flow_error_set(error, ENOTSUP,
2515                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
2516                                           &rss->level,
2517                                           "tunnel RSS is not supported");
2518         if (rss->key_len < MLX5_RSS_HASH_KEY_LEN)
2519                 return rte_flow_error_set(error, ENOTSUP,
2520                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
2521                                           &rss->key_len,
2522                                           "RSS hash key too small");
2523         if (rss->key_len > MLX5_RSS_HASH_KEY_LEN)
2524                 return rte_flow_error_set(error, ENOTSUP,
2525                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
2526                                           &rss->key_len,
2527                                           "RSS hash key too large");
2528         if (rss->queue_num > priv->config.ind_table_max_size)
2529                 return rte_flow_error_set(error, ENOTSUP,
2530                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
2531                                           &rss->queue_num,
2532                                           "number of queues too large");
2533         if (rss->types & MLX5_RSS_HF_MASK)
2534                 return rte_flow_error_set(error, ENOTSUP,
2535                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
2536                                           &rss->types,
2537                                           "some RSS protocols are not"
2538                                           " supported");
2539         for (i = 0; i != rss->queue_num; ++i) {
2540                 if (!(*priv->rxqs)[rss->queue[i]])
2541                         return rte_flow_error_set
2542                                 (error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION_CONF,
2543                                  &rss->queue[i], "queue is not configured");
2544         }
2545         return 0;
2546 }
2547
2548 /*
2549  * Validate the count action.
2550  *
2551  * @param[in] dev
2552  *   Pointer to the Ethernet device structure.
2553  * @param[out] error
2554  *   Pointer to error structure.
2555  *
2556  * @return
2557  *   0 on success, a negative errno value otherwise and rte_ernno is set.
2558  */
2559 static int
2560 mlx5_flow_validate_action_count(struct rte_eth_dev *dev,
2561                                 struct rte_flow_error *error)
2562 {
2563         struct priv *priv = dev->data->dev_private;
2564
2565         if (!priv->config.flow_counter_en)
2566                 return rte_flow_error_set(error, ENOTSUP,
2567                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
2568                                           "flow counters are not supported.");
2569         return 0;
2570 }
2571
2572 /**
2573  * Verify the @p attributes will be correctly understood by the NIC and store
2574  * them in the @p flow if everything is correct.
2575  *
2576  * @param[in] dev
2577  *   Pointer to the Ethernet device structure.
2578  * @param[in] attributes
2579  *   Pointer to flow attributes
2580  * @param[out] error
2581  *   Pointer to error structure.
2582  *
2583  * @return
2584  *   0 on success, a negative errno value otherwise and rte_errno is set.
2585  */
2586 static int
2587 mlx5_flow_validate_attributes(struct rte_eth_dev *dev,
2588                               const struct rte_flow_attr *attributes,
2589                               struct rte_flow_error *error)
2590 {
2591         struct priv *priv = dev->data->dev_private;
2592         uint32_t priority_max = priv->config.flow_prio - 1;
2593
2594         if (attributes->group)
2595                 return rte_flow_error_set(error, ENOTSUP,
2596                                           RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
2597                                           NULL, "groups is not supported");
2598         if (attributes->priority != MLX5_FLOW_PRIO_RSVD &&
2599             attributes->priority >= priority_max)
2600                 return rte_flow_error_set(error, ENOTSUP,
2601                                           RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
2602                                           NULL, "priority out of range");
2603         if (attributes->egress)
2604                 return rte_flow_error_set(error, ENOTSUP,
2605                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
2606                                           "egress is not supported");
2607         if (attributes->transfer)
2608                 return rte_flow_error_set(error, ENOTSUP,
2609                                           RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
2610                                           NULL, "transfer is not supported");
2611         if (!attributes->ingress)
2612                 return rte_flow_error_set(error, EINVAL,
2613                                           RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
2614                                           NULL,
2615                                           "ingress attribute is mandatory");
2616         return 0;
2617 }
2618
2619 /**
2620  * Validate Ethernet item.
2621  *
2622  * @param[in] item
2623  *   Item specification.
2624  * @param[in] item_flags
2625  *   Bit-fields that holds the items detected until now.
2626  * @param[out] error
2627  *   Pointer to error structure.
2628  *
2629  * @return
2630  *   0 on success, a negative errno value otherwise and rte_errno is set.
2631  */
2632 static int
2633 mlx5_flow_validate_item_eth(const struct rte_flow_item *item,
2634                             uint64_t item_flags,
2635                             struct rte_flow_error *error)
2636 {
2637         const struct rte_flow_item_eth *mask = item->mask;
2638         const struct rte_flow_item_eth nic_mask = {
2639                 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
2640                 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
2641                 .type = RTE_BE16(0xffff),
2642         };
2643         int ret;
2644         int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2645
2646         if (item_flags & MLX5_FLOW_LAYER_OUTER_L2)
2647                 return rte_flow_error_set(error, ENOTSUP,
2648                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2649                                           "3 levels of l2 are not supported");
2650         if ((item_flags & MLX5_FLOW_LAYER_INNER_L2) && !tunnel)
2651                 return rte_flow_error_set(error, ENOTSUP,
2652                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2653                                           "2 L2 without tunnel are not supported");
2654         if (!mask)
2655                 mask = &rte_flow_item_eth_mask;
2656         ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
2657                                         (const uint8_t *)&nic_mask,
2658                                         sizeof(struct rte_flow_item_eth),
2659                                         error);
2660         return ret;
2661 }
2662
2663 /**
2664  * Validate VLAN item.
2665  *
2666  * @param[in] item
2667  *   Item specification.
2668  * @param[in] item_flags
2669  *   Bit-fields that holds the items detected until now.
2670  * @param[out] error
2671  *   Pointer to error structure.
2672  *
2673  * @return
2674  *   0 on success, a negative errno value otherwise and rte_errno is set.
2675  */
2676 static int
2677 mlx5_flow_validate_item_vlan(const struct rte_flow_item *item,
2678                              int64_t item_flags,
2679                              struct rte_flow_error *error)
2680 {
2681         const struct rte_flow_item_vlan *spec = item->spec;
2682         const struct rte_flow_item_vlan *mask = item->mask;
2683         const struct rte_flow_item_vlan nic_mask = {
2684                 .tci = RTE_BE16(0x0fff),
2685                 .inner_type = RTE_BE16(0xffff),
2686         };
2687         uint16_t vlan_tag = 0;
2688         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2689         int ret;
2690         const uint32_t l34m = tunnel ? (MLX5_FLOW_LAYER_INNER_L3 |
2691                                         MLX5_FLOW_LAYER_INNER_L4) :
2692                                        (MLX5_FLOW_LAYER_OUTER_L3 |
2693                                         MLX5_FLOW_LAYER_OUTER_L4);
2694         const uint32_t vlanm = tunnel ? MLX5_FLOW_LAYER_INNER_VLAN :
2695                                         MLX5_FLOW_LAYER_OUTER_VLAN;
2696
2697         if (item_flags & vlanm)
2698                 return rte_flow_error_set(error, EINVAL,
2699                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2700                                           "VLAN layer already configured");
2701         else if ((item_flags & l34m) != 0)
2702                 return rte_flow_error_set(error, EINVAL,
2703                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2704                                           "L2 layer cannot follow L3/L4 layer");
2705         if (!mask)
2706                 mask = &rte_flow_item_vlan_mask;
2707         ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
2708                                         (const uint8_t *)&nic_mask,
2709                                         sizeof(struct rte_flow_item_vlan),
2710                                         error);
2711         if (ret)
2712                 return ret;
2713         if (spec) {
2714                 vlan_tag = spec->tci;
2715                 vlan_tag &= mask->tci;
2716         }
2717         /*
2718          * From verbs perspective an empty VLAN is equivalent
2719          * to a packet without VLAN layer.
2720          */
2721         if (!vlan_tag)
2722                 return rte_flow_error_set(error, EINVAL,
2723                                           RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
2724                                           item->spec,
2725                                           "VLAN cannot be empty");
2726         return 0;
2727 }
2728
2729 /**
2730  * Validate IPV4 item.
2731  *
2732  * @param[in] item
2733  *   Item specification.
2734  * @param[in] item_flags
2735  *   Bit-fields that holds the items detected until now.
2736  * @param[out] error
2737  *   Pointer to error structure.
2738  *
2739  * @return
2740  *   0 on success, a negative errno value otherwise and rte_errno is set.
2741  */
2742 static int
2743 mlx5_flow_validate_item_ipv4(const struct rte_flow_item *item,
2744                              int64_t item_flags,
2745                              struct rte_flow_error *error)
2746 {
2747         const struct rte_flow_item_ipv4 *mask = item->mask;
2748         const struct rte_flow_item_ipv4 nic_mask = {
2749                 .hdr = {
2750                         .src_addr = RTE_BE32(0xffffffff),
2751                         .dst_addr = RTE_BE32(0xffffffff),
2752                         .type_of_service = 0xff,
2753                         .next_proto_id = 0xff,
2754                 },
2755         };
2756         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2757         int ret;
2758
2759         if (item_flags & (tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
2760                                    MLX5_FLOW_LAYER_OUTER_L3))
2761                 return rte_flow_error_set(error, ENOTSUP,
2762                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2763                                           "multiple L3 layers not supported");
2764         else if (item_flags & (tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2765                                         MLX5_FLOW_LAYER_OUTER_L4))
2766                 return rte_flow_error_set(error, EINVAL,
2767                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2768                                           "L3 cannot follow an L4 layer.");
2769         if (!mask)
2770                 mask = &rte_flow_item_ipv4_mask;
2771         ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
2772                                         (const uint8_t *)&nic_mask,
2773                                         sizeof(struct rte_flow_item_ipv4),
2774                                         error);
2775         if (ret < 0)
2776                 return ret;
2777         return 0;
2778 }
2779
2780 /**
2781  * Validate IPV6 item.
2782  *
2783  * @param[in] item
2784  *   Item specification.
2785  * @param[in] item_flags
2786  *   Bit-fields that holds the items detected until now.
2787  * @param[out] error
2788  *   Pointer to error structure.
2789  *
2790  * @return
2791  *   0 on success, a negative errno value otherwise and rte_errno is set.
2792  */
2793 static int
2794 mlx5_flow_validate_item_ipv6(const struct rte_flow_item *item,
2795                              uint64_t item_flags,
2796                              struct rte_flow_error *error)
2797 {
2798         const struct rte_flow_item_ipv6 *mask = item->mask;
2799         const struct rte_flow_item_ipv6 nic_mask = {
2800                 .hdr = {
2801                         .src_addr =
2802                                 "\xff\xff\xff\xff\xff\xff\xff\xff"
2803                                 "\xff\xff\xff\xff\xff\xff\xff\xff",
2804                         .dst_addr =
2805                                 "\xff\xff\xff\xff\xff\xff\xff\xff"
2806                                 "\xff\xff\xff\xff\xff\xff\xff\xff",
2807                         .vtc_flow = RTE_BE32(0xffffffff),
2808                         .proto = 0xff,
2809                         .hop_limits = 0xff,
2810                 },
2811         };
2812         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2813         int ret;
2814
2815         if (item_flags & (tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
2816                                    MLX5_FLOW_LAYER_OUTER_L3))
2817                 return rte_flow_error_set(error, ENOTSUP,
2818                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2819                                           "multiple L3 layers not supported");
2820         else if (item_flags & (tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2821                                         MLX5_FLOW_LAYER_OUTER_L4))
2822                 return rte_flow_error_set(error, EINVAL,
2823                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2824                                           "L3 cannot follow an L4 layer.");
2825         /*
2826          * IPv6 is not recognised by the NIC inside a GRE tunnel.
2827          * Such support has to be disabled as the rule will be
2828          * accepted.  Issue reproduced with Mellanox OFED 4.3-3.0.2.1 and
2829          * Mellanox OFED 4.4-1.0.0.0.
2830          */
2831         if (tunnel && item_flags & MLX5_FLOW_LAYER_GRE)
2832                 return rte_flow_error_set(error, ENOTSUP,
2833                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2834                                           "IPv6 inside a GRE tunnel is"
2835                                           " not recognised.");
2836         if (!mask)
2837                 mask = &rte_flow_item_ipv6_mask;
2838         ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
2839                                         (const uint8_t *)&nic_mask,
2840                                         sizeof(struct rte_flow_item_ipv6),
2841                                         error);
2842         if (ret < 0)
2843                 return ret;
2844         return 0;
2845 }
2846
2847 /**
2848  * Validate UDP item.
2849  *
2850  * @param[in] item
2851  *   Item specification.
2852  * @param[in] item_flags
2853  *   Bit-fields that holds the items detected until now.
2854  * @param[in] target_protocol
2855  *   The next protocol in the previous item.
2856  * @param[out] error
2857  *   Pointer to error structure.
2858  *
2859  * @return
2860  *   0 on success, a negative errno value otherwise and rte_errno is set.
2861  */
2862 static int
2863 mlx5_flow_validate_item_udp(const struct rte_flow_item *item,
2864                             uint64_t item_flags,
2865                             uint8_t target_protocol,
2866                             struct rte_flow_error *error)
2867 {
2868         const struct rte_flow_item_udp *mask = item->mask;
2869         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2870         int ret;
2871
2872         if (target_protocol != 0xff && target_protocol != MLX5_IP_PROTOCOL_UDP)
2873                 return rte_flow_error_set(error, EINVAL,
2874                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2875                                           "protocol filtering not compatible"
2876                                           " with UDP layer");
2877         if (!(item_flags & (tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
2878                                      MLX5_FLOW_LAYER_OUTER_L3)))
2879                 return rte_flow_error_set(error, EINVAL,
2880                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2881                                           "L3 is mandatory to filter on L4");
2882         if (item_flags & (tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2883                                    MLX5_FLOW_LAYER_OUTER_L4))
2884                 return rte_flow_error_set(error, EINVAL,
2885                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2886                                           "L4 layer is already present");
2887         if (!mask)
2888                 mask = &rte_flow_item_udp_mask;
2889         ret = mlx5_flow_item_acceptable
2890                 (item, (const uint8_t *)mask,
2891                  (const uint8_t *)&rte_flow_item_udp_mask,
2892                  sizeof(struct rte_flow_item_udp), error);
2893         if (ret < 0)
2894                 return ret;
2895         return 0;
2896 }
2897
2898 /**
2899  * Validate TCP item.
2900  *
2901  * @param[in] item
2902  *   Item specification.
2903  * @param[in] item_flags
2904  *   Bit-fields that holds the items detected until now.
2905  * @param[in] target_protocol
2906  *   The next protocol in the previous item.
2907  * @param[out] error
2908  *   Pointer to error structure.
2909  *
2910  * @return
2911  *   0 on success, a negative errno value otherwise and rte_errno is set.
2912  */
2913 static int
2914 mlx5_flow_validate_item_tcp(const struct rte_flow_item *item,
2915                             uint64_t item_flags,
2916                             uint8_t target_protocol,
2917                             struct rte_flow_error *error)
2918 {
2919         const struct rte_flow_item_tcp *mask = item->mask;
2920         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2921         int ret;
2922
2923         if (target_protocol != 0xff && target_protocol != MLX5_IP_PROTOCOL_TCP)
2924                 return rte_flow_error_set(error, EINVAL,
2925                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2926                                           "protocol filtering not compatible"
2927                                           " with TCP layer");
2928         if (!(item_flags & (tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
2929                                      MLX5_FLOW_LAYER_OUTER_L3)))
2930                 return rte_flow_error_set(error, EINVAL,
2931                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2932                                           "L3 is mandatory to filter on L4");
2933         if (item_flags & (tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2934                                    MLX5_FLOW_LAYER_OUTER_L4))
2935                 return rte_flow_error_set(error, EINVAL,
2936                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2937                                           "L4 layer is already present");
2938         if (!mask)
2939                 mask = &rte_flow_item_tcp_mask;
2940         ret = mlx5_flow_item_acceptable
2941                 (item, (const uint8_t *)mask,
2942                  (const uint8_t *)&rte_flow_item_tcp_mask,
2943                  sizeof(struct rte_flow_item_tcp), error);
2944         if (ret < 0)
2945                 return ret;
2946         return 0;
2947 }
2948
2949 /**
2950  * Validate VXLAN item.
2951  *
2952  * @param[in] item
2953  *   Item specification.
2954  * @param[in] item_flags
2955  *   Bit-fields that holds the items detected until now.
2956  * @param[in] target_protocol
2957  *   The next protocol in the previous item.
2958  * @param[out] error
2959  *   Pointer to error structure.
2960  *
2961  * @return
2962  *   0 on success, a negative errno value otherwise and rte_errno is set.
2963  */
2964 static int
2965 mlx5_flow_validate_item_vxlan(const struct rte_flow_item *item,
2966                               uint64_t item_flags,
2967                               struct rte_flow_error *error)
2968 {
2969         const struct rte_flow_item_vxlan *spec = item->spec;
2970         const struct rte_flow_item_vxlan *mask = item->mask;
2971         int ret;
2972         union vni {
2973                 uint32_t vlan_id;
2974                 uint8_t vni[4];
2975         } id = { .vlan_id = 0, };
2976         uint32_t vlan_id = 0;
2977
2978
2979         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2980                 return rte_flow_error_set(error, ENOTSUP,
2981                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2982                                           "a tunnel is already present");
2983         /*
2984          * Verify only UDPv4 is present as defined in
2985          * https://tools.ietf.org/html/rfc7348
2986          */
2987         if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
2988                 return rte_flow_error_set(error, EINVAL,
2989                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2990                                           "no outer UDP layer found");
2991         if (!mask)
2992                 mask = &rte_flow_item_vxlan_mask;
2993         ret = mlx5_flow_item_acceptable
2994                 (item, (const uint8_t *)mask,
2995                  (const uint8_t *)&rte_flow_item_vxlan_mask,
2996                  sizeof(struct rte_flow_item_vxlan),
2997                  error);
2998         if (ret < 0)
2999                 return ret;
3000         if (spec) {
3001                 memcpy(&id.vni[1], spec->vni, 3);
3002                 vlan_id = id.vlan_id;
3003                 memcpy(&id.vni[1], mask->vni, 3);
3004                 vlan_id &= id.vlan_id;
3005         }
3006         /*
3007          * Tunnel id 0 is equivalent as not adding a VXLAN layer, if
3008          * only this layer is defined in the Verbs specification it is
3009          * interpreted as wildcard and all packets will match this
3010          * rule, if it follows a full stack layer (ex: eth / ipv4 /
3011          * udp), all packets matching the layers before will also
3012          * match this rule.  To avoid such situation, VNI 0 is
3013          * currently refused.
3014          */
3015         if (!vlan_id)
3016                 return rte_flow_error_set(error, ENOTSUP,
3017                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3018                                           "VXLAN vni cannot be 0");
3019         if (!(item_flags & MLX5_FLOW_LAYER_OUTER))
3020                 return rte_flow_error_set(error, ENOTSUP,
3021                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3022                                           "VXLAN tunnel must be fully defined");
3023         return 0;
3024 }
3025
3026 /**
3027  * Validate VXLAN_GPE item.
3028  *
3029  * @param[in] item
3030  *   Item specification.
3031  * @param[in] item_flags
3032  *   Bit-fields that holds the items detected until now.
3033  * @param[in] priv
3034  *   Pointer to the private data structure.
3035  * @param[in] target_protocol
3036  *   The next protocol in the previous item.
3037  * @param[out] error
3038  *   Pointer to error structure.
3039  *
3040  * @return
3041  *   0 on success, a negative errno value otherwise and rte_errno is set.
3042  */
3043 static int
3044 mlx5_flow_validate_item_vxlan_gpe(const struct rte_flow_item *item,
3045                                   uint64_t item_flags,
3046                                   struct rte_eth_dev *dev,
3047                                   struct rte_flow_error *error)
3048 {
3049         struct priv *priv = dev->data->dev_private;
3050         const struct rte_flow_item_vxlan_gpe *spec = item->spec;
3051         const struct rte_flow_item_vxlan_gpe *mask = item->mask;
3052         int ret;
3053         union vni {
3054                 uint32_t vlan_id;
3055                 uint8_t vni[4];
3056         } id = { .vlan_id = 0, };
3057         uint32_t vlan_id = 0;
3058
3059         if (!priv->config.l3_vxlan_en)
3060                 return rte_flow_error_set(error, ENOTSUP,
3061                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3062                                           "L3 VXLAN is not enabled by device"
3063                                           " parameter and/or not configured in"
3064                                           " firmware");
3065         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
3066                 return rte_flow_error_set(error, ENOTSUP,
3067                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3068                                           "a tunnel is already present");
3069         /*
3070          * Verify only UDPv4 is present as defined in
3071          * https://tools.ietf.org/html/rfc7348
3072          */
3073         if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
3074                 return rte_flow_error_set(error, EINVAL,
3075                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3076                                           "no outer UDP layer found");
3077         if (!mask)
3078                 mask = &rte_flow_item_vxlan_gpe_mask;
3079         ret = mlx5_flow_item_acceptable
3080                 (item, (const uint8_t *)mask,
3081                  (const uint8_t *)&rte_flow_item_vxlan_gpe_mask,
3082                  sizeof(struct rte_flow_item_vxlan_gpe),
3083                  error);
3084         if (ret < 0)
3085                 return ret;
3086         if (spec) {
3087                 if (spec->protocol)
3088                         return rte_flow_error_set(error, ENOTSUP,
3089                                                   RTE_FLOW_ERROR_TYPE_ITEM,
3090                                                   item,
3091                                                   "VxLAN-GPE protocol"
3092                                                   " not supported");
3093                 memcpy(&id.vni[1], spec->vni, 3);
3094                 vlan_id = id.vlan_id;
3095                 memcpy(&id.vni[1], mask->vni, 3);
3096                 vlan_id &= id.vlan_id;
3097         }
3098         /*
3099          * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
3100          * layer is defined in the Verbs specification it is interpreted as
3101          * wildcard and all packets will match this rule, if it follows a full
3102          * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
3103          * before will also match this rule.  To avoid such situation, VNI 0
3104          * is currently refused.
3105          */
3106         if (!vlan_id)
3107                 return rte_flow_error_set(error, ENOTSUP,
3108                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3109                                           "VXLAN-GPE vni cannot be 0");
3110         if (!(item_flags & MLX5_FLOW_LAYER_OUTER))
3111                 return rte_flow_error_set(error, ENOTSUP,
3112                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3113                                           "VXLAN-GPE tunnel must be fully"
3114                                           " defined");
3115         return 0;
3116 }
3117
3118 /**
3119  * Validate GRE item.
3120  *
3121  * @param[in] item
3122  *   Item specification.
3123  * @param[in] item_flags
3124  *   Bit flags to mark detected items.
3125  * @param[in] target_protocol
3126  *   The next protocol in the previous item.
3127  * @param[out] error
3128  *   Pointer to error structure.
3129  *
3130  * @return
3131  *   0 on success, a negative errno value otherwise and rte_errno is set.
3132  */
3133 static int
3134 mlx5_flow_validate_item_gre(const struct rte_flow_item *item,
3135                             uint64_t item_flags,
3136                             uint8_t target_protocol,
3137                             struct rte_flow_error *error)
3138 {
3139         const struct rte_flow_item_gre *spec __rte_unused = item->spec;
3140         const struct rte_flow_item_gre *mask = item->mask;
3141         int ret;
3142
3143         if (target_protocol != 0xff && target_protocol != MLX5_IP_PROTOCOL_GRE)
3144                 return rte_flow_error_set(error, EINVAL,
3145                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3146                                           "protocol filtering not compatible"
3147                                           " with this GRE layer");
3148         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
3149                 return rte_flow_error_set(error, ENOTSUP,
3150                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3151                                           "a tunnel is already present");
3152         if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3))
3153                 return rte_flow_error_set(error, ENOTSUP,
3154                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3155                                           "L3 Layer is missing");
3156         if (!mask)
3157                 mask = &rte_flow_item_gre_mask;
3158         ret = mlx5_flow_item_acceptable
3159                 (item, (const uint8_t *)mask,
3160                  (const uint8_t *)&rte_flow_item_gre_mask,
3161                  sizeof(struct rte_flow_item_gre), error);
3162         if (ret < 0)
3163                 return ret;
3164 #ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT
3165         if (spec && (spec->protocol & mask->protocol))
3166                 return rte_flow_error_set(error, ENOTSUP,
3167                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3168                                           "without MPLS support the"
3169                                           " specification cannot be used for"
3170                                           " filtering");
3171 #endif
3172         return 0;
3173 }
3174
3175 /**
3176  * Validate MPLS item.
3177  *
3178  * @param[in] item
3179  *   Item specification.
3180  * @param[in] item_flags
3181  *   Bit-fields that holds the items detected until now.
3182  * @param[in] target_protocol
3183  *   The next protocol in the previous item.
3184  * @param[out] error
3185  *   Pointer to error structure.
3186  *
3187  * @return
3188  *   0 on success, a negative errno value otherwise and rte_errno is set.
3189  */
3190 static int
3191 mlx5_flow_validate_item_mpls(const struct rte_flow_item *item __rte_unused,
3192                              uint64_t item_flags __rte_unused,
3193                              uint8_t target_protocol __rte_unused,
3194                              struct rte_flow_error *error)
3195 {
3196 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
3197         const struct rte_flow_item_mpls *mask = item->mask;
3198         int ret;
3199
3200         if (target_protocol != 0xff && target_protocol != MLX5_IP_PROTOCOL_MPLS)
3201                 return rte_flow_error_set(error, EINVAL,
3202                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3203                                           "protocol filtering not compatible"
3204                                           " with MPLS layer");
3205         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
3206                 return rte_flow_error_set(error, ENOTSUP,
3207                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3208                                           "a tunnel is already"
3209                                           " present");
3210         if (!mask)
3211                 mask = &rte_flow_item_mpls_mask;
3212         ret = mlx5_flow_item_acceptable
3213                 (item, (const uint8_t *)mask,
3214                  (const uint8_t *)&rte_flow_item_mpls_mask,
3215                  sizeof(struct rte_flow_item_mpls), error);
3216         if (ret < 0)
3217                 return ret;
3218         return 0;
3219 #endif /* !HAVE_IBV_DEVICE_MPLS_SUPPORT */
3220         return rte_flow_error_set(error, ENOTSUP,
3221                                   RTE_FLOW_ERROR_TYPE_ITEM, item,
3222                                   "MPLS is not supported by Verbs, please"
3223                                   " update.");
3224 }
3225
3226 /**
3227  *
3228  * Internal validation function.
3229  *
3230  * @param[in] dev
3231  *   Pointer to the Ethernet device structure.
3232  * @param[in] attr
3233  *   Pointer to the flow attributes.
3234  * @param[in] items
3235  *   Pointer to the list of items.
3236  * @param[in] actions
3237  *   Pointer to the list of actions.
3238  * @param[out] error
3239  *   Pointer to the error structure.
3240  *
3241  * @return
3242  *   0 on success, a negative errno value otherwise and rte_ernno is set.
3243  */
3244 static int mlx5_flow_verbs_validate(struct rte_eth_dev *dev,
3245                                     const struct rte_flow_attr *attr,
3246                                     const struct rte_flow_item items[],
3247                                     const struct rte_flow_action actions[],
3248                                     struct rte_flow_error *error)
3249 {
3250         int ret;
3251         uint32_t action_flags = 0;
3252         uint32_t item_flags = 0;
3253         int tunnel = 0;
3254         uint8_t next_protocol = 0xff;
3255
3256         if (items == NULL)
3257                 return -1;
3258         ret = mlx5_flow_validate_attributes(dev, attr, error);
3259         if (ret < 0)
3260                 return ret;
3261         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
3262                 int ret = 0;
3263                 switch (items->type) {
3264                 case RTE_FLOW_ITEM_TYPE_VOID:
3265                         break;
3266                 case RTE_FLOW_ITEM_TYPE_ETH:
3267                         ret = mlx5_flow_validate_item_eth(items, item_flags,
3268                                                           error);
3269                         if (ret < 0)
3270                                 return ret;
3271                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
3272                                                MLX5_FLOW_LAYER_OUTER_L2;
3273                         break;
3274                 case RTE_FLOW_ITEM_TYPE_VLAN:
3275                         ret = mlx5_flow_validate_item_vlan(items, item_flags,
3276                                                            error);
3277                         if (ret < 0)
3278                                 return ret;
3279                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_VLAN :
3280                                                MLX5_FLOW_LAYER_OUTER_VLAN;
3281                         break;
3282                 case RTE_FLOW_ITEM_TYPE_IPV4:
3283                         ret = mlx5_flow_validate_item_ipv4(items, item_flags,
3284                                                            error);
3285                         if (ret < 0)
3286                                 return ret;
3287                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
3288                                                MLX5_FLOW_LAYER_OUTER_L3_IPV4;
3289                         if (items->mask != NULL &&
3290                             ((const struct rte_flow_item_ipv4 *)
3291                              items->mask)->hdr.next_proto_id)
3292                                 next_protocol =
3293                                         ((const struct rte_flow_item_ipv4 *)
3294                                          (items->spec))->hdr.next_proto_id;
3295                         break;
3296                 case RTE_FLOW_ITEM_TYPE_IPV6:
3297                         ret = mlx5_flow_validate_item_ipv6(items, item_flags,
3298                                                            error);
3299                         if (ret < 0)
3300                                 return ret;
3301                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
3302                                                MLX5_FLOW_LAYER_OUTER_L3_IPV6;
3303                         if (items->mask != NULL &&
3304                             ((const struct rte_flow_item_ipv6 *)
3305                              items->mask)->hdr.proto)
3306                                 next_protocol =
3307                                         ((const struct rte_flow_item_ipv6 *)
3308                                          items->spec)->hdr.proto;
3309                         break;
3310                 case RTE_FLOW_ITEM_TYPE_UDP:
3311                         ret = mlx5_flow_validate_item_udp(items, item_flags,
3312                                                           next_protocol,
3313                                                           error);
3314                         if (ret < 0)
3315                                 return ret;
3316                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP :
3317                                                MLX5_FLOW_LAYER_OUTER_L4_UDP;
3318                         break;
3319                 case RTE_FLOW_ITEM_TYPE_TCP:
3320                         ret = mlx5_flow_validate_item_tcp(items, item_flags,
3321                                                           next_protocol, error);
3322                         if (ret < 0)
3323                                 return ret;
3324                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP :
3325                                                MLX5_FLOW_LAYER_OUTER_L4_TCP;
3326                         break;
3327                 case RTE_FLOW_ITEM_TYPE_VXLAN:
3328                         ret = mlx5_flow_validate_item_vxlan(items, item_flags,
3329                                                             error);
3330                         if (ret < 0)
3331                                 return ret;
3332                         item_flags |= MLX5_FLOW_LAYER_VXLAN;
3333                         break;
3334                 case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
3335                         ret = mlx5_flow_validate_item_vxlan_gpe(items,
3336                                                                 item_flags,
3337                                                                 dev, error);
3338                         if (ret < 0)
3339                                 return ret;
3340                         item_flags |= MLX5_FLOW_LAYER_VXLAN_GPE;
3341                         break;
3342                 case RTE_FLOW_ITEM_TYPE_GRE:
3343                         ret = mlx5_flow_validate_item_gre(items, item_flags,
3344                                                           next_protocol, error);
3345                         if (ret < 0)
3346                                 return ret;
3347                         item_flags |= MLX5_FLOW_LAYER_GRE;
3348                         break;
3349                 case RTE_FLOW_ITEM_TYPE_MPLS:
3350                         ret = mlx5_flow_validate_item_mpls(items, item_flags,
3351                                                            next_protocol,
3352                                                            error);
3353                         if (ret < 0)
3354                                 return ret;
3355                         if (next_protocol != 0xff &&
3356                             next_protocol != MLX5_IP_PROTOCOL_MPLS)
3357                                 return rte_flow_error_set
3358                                         (error, ENOTSUP,
3359                                          RTE_FLOW_ERROR_TYPE_ITEM, items,
3360                                          "protocol filtering not compatible"
3361                                          " with MPLS layer");
3362                         item_flags |= MLX5_FLOW_LAYER_MPLS;
3363                         break;
3364                 default:
3365                         return rte_flow_error_set(error, ENOTSUP,
3366                                                   RTE_FLOW_ERROR_TYPE_ITEM,
3367                                                   NULL,
3368                                                   "item not supported");
3369                 }
3370         }
3371         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
3372                 tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
3373                 switch (actions->type) {
3374                 case RTE_FLOW_ACTION_TYPE_VOID:
3375                         break;
3376                 case RTE_FLOW_ACTION_TYPE_FLAG:
3377                         ret = mlx5_flow_validate_action_flag(action_flags,
3378                                                              error);
3379                         if (ret < 0)
3380                                 return ret;
3381                         action_flags |= MLX5_FLOW_ACTION_FLAG;
3382                         break;
3383                 case RTE_FLOW_ACTION_TYPE_MARK:
3384                         ret = mlx5_flow_validate_action_mark(actions,
3385                                                              action_flags,
3386                                                              error);
3387                         if (ret < 0)
3388                                 return ret;
3389                         action_flags |= MLX5_FLOW_ACTION_MARK;
3390                         break;
3391                 case RTE_FLOW_ACTION_TYPE_DROP:
3392                         ret = mlx5_flow_validate_action_drop(action_flags,
3393                                                              error);
3394                         if (ret < 0)
3395                                 return ret;
3396                         action_flags |= MLX5_FLOW_ACTION_DROP;
3397                         break;
3398                 case RTE_FLOW_ACTION_TYPE_QUEUE:
3399                         ret = mlx5_flow_validate_action_queue(actions,
3400                                                               action_flags, dev,
3401                                                               error);
3402                         if (ret < 0)
3403                                 return ret;
3404                         action_flags |= MLX5_FLOW_ACTION_QUEUE;
3405                         break;
3406                 case RTE_FLOW_ACTION_TYPE_RSS:
3407                         ret = mlx5_flow_validate_action_rss(actions,
3408                                                             action_flags, dev,
3409                                                             error);
3410                         if (ret < 0)
3411                                 return ret;
3412                         action_flags |= MLX5_FLOW_ACTION_RSS;
3413                         break;
3414                 case RTE_FLOW_ACTION_TYPE_COUNT:
3415                         ret = mlx5_flow_validate_action_count(dev, error);
3416                         if (ret < 0)
3417                                 return ret;
3418                         action_flags |= MLX5_FLOW_ACTION_COUNT;
3419                         break;
3420                 default:
3421                         return rte_flow_error_set(error, ENOTSUP,
3422                                                   RTE_FLOW_ERROR_TYPE_ACTION,
3423                                                   actions,
3424                                                   "action not supported");
3425                 }
3426         }
3427         return 0;
3428 }
3429
3430 /**
3431  * Validate a flow supported by the NIC.
3432  *
3433  * @see rte_flow_validate()
3434  * @see rte_flow_ops
3435  */
3436 int
3437 mlx5_flow_validate(struct rte_eth_dev *dev,
3438                    const struct rte_flow_attr *attr,
3439                    const struct rte_flow_item items[],
3440                    const struct rte_flow_action actions[],
3441                    struct rte_flow_error *error)
3442 {
3443         int ret;
3444
3445         ret =  mlx5_flow_verbs_validate(dev, attr, items, actions, error);
3446         if (ret < 0)
3447                 return ret;
3448         return 0;
3449 }
3450
3451 /**
3452  * Remove the flow.
3453  *
3454  * @param[in] dev
3455  *   Pointer to the Ethernet device structure.
3456  * @param[in, out] flow
3457  *   Pointer to flow structure.
3458  */
3459 static void
3460 mlx5_flow_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
3461 {
3462         struct priv *priv = dev->data->dev_private;
3463         struct mlx5_flow_verbs *verbs;
3464
3465         if (flow->nl_flow && priv->mnl_socket)
3466                 mlx5_nl_flow_destroy(priv->mnl_socket, flow->nl_flow, NULL);
3467         LIST_FOREACH(verbs, &flow->verbs, next) {
3468                 if (verbs->flow) {
3469                         claim_zero(mlx5_glue->destroy_flow(verbs->flow));
3470                         verbs->flow = NULL;
3471                 }
3472                 if (verbs->hrxq) {
3473                         if (flow->fate & MLX5_FLOW_FATE_DROP)
3474                                 mlx5_hrxq_drop_release(dev);
3475                         else
3476                                 mlx5_hrxq_release(dev, verbs->hrxq);
3477                         verbs->hrxq = NULL;
3478                 }
3479         }
3480         if (flow->counter) {
3481                 mlx5_flow_counter_release(flow->counter);
3482                 flow->counter = NULL;
3483         }
3484 }
3485
3486 /**
3487  * Apply the flow.
3488  *
3489  * @param[in] dev
3490  *   Pointer to the Ethernet device structure.
3491  * @param[in, out] flow
3492  *   Pointer to flow structure.
3493  * @param[out] error
3494  *   Pointer to error structure.
3495  *
3496  * @return
3497  *   0 on success, a negative errno value otherwise and rte_errno is set.
3498  */
3499 static int
3500 mlx5_flow_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
3501                 struct rte_flow_error *error)
3502 {
3503         struct priv *priv = dev->data->dev_private;
3504         struct mlx5_flow_verbs *verbs;
3505         int err;
3506
3507         LIST_FOREACH(verbs, &flow->verbs, next) {
3508                 if (flow->fate & MLX5_FLOW_FATE_DROP) {
3509                         verbs->hrxq = mlx5_hrxq_drop_new(dev);
3510                         if (!verbs->hrxq) {
3511                                 rte_flow_error_set
3512                                         (error, errno,
3513                                          RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
3514                                          NULL,
3515                                          "cannot get drop hash queue");
3516                                 goto error;
3517                         }
3518                 } else {
3519                         struct mlx5_hrxq *hrxq;
3520
3521                         hrxq = mlx5_hrxq_get(dev, flow->key,
3522                                              MLX5_RSS_HASH_KEY_LEN,
3523                                              verbs->hash_fields,
3524                                              (*flow->queue),
3525                                              flow->rss.queue_num);
3526                         if (!hrxq)
3527                                 hrxq = mlx5_hrxq_new(dev, flow->key,
3528                                                      MLX5_RSS_HASH_KEY_LEN,
3529                                                      verbs->hash_fields,
3530                                                      (*flow->queue),
3531                                                      flow->rss.queue_num,
3532                                                      !!(flow->layers &
3533                                                       MLX5_FLOW_LAYER_TUNNEL));
3534                         if (!hrxq) {
3535                                 rte_flow_error_set
3536                                         (error, rte_errno,
3537                                          RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
3538                                          NULL,
3539                                          "cannot get hash queue");
3540                                 goto error;
3541                         }
3542                         verbs->hrxq = hrxq;
3543                 }
3544                 verbs->flow =
3545                         mlx5_glue->create_flow(verbs->hrxq->qp, verbs->attr);
3546                 if (!verbs->flow) {
3547                         rte_flow_error_set(error, errno,
3548                                            RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
3549                                            NULL,
3550                                            "hardware refuses to create flow");
3551                         goto error;
3552                 }
3553         }
3554         if (flow->nl_flow &&
3555             priv->mnl_socket &&
3556             mlx5_nl_flow_create(priv->mnl_socket, flow->nl_flow, error))
3557                 goto error;
3558         return 0;
3559 error:
3560         err = rte_errno; /* Save rte_errno before cleanup. */
3561         LIST_FOREACH(verbs, &flow->verbs, next) {
3562                 if (verbs->hrxq) {
3563                         if (flow->fate & MLX5_FLOW_FATE_DROP)
3564                                 mlx5_hrxq_drop_release(dev);
3565                         else
3566                                 mlx5_hrxq_release(dev, verbs->hrxq);
3567                         verbs->hrxq = NULL;
3568                 }
3569         }
3570         rte_errno = err; /* Restore rte_errno. */
3571         return -rte_errno;
3572 }
3573
3574 /**
3575  * Create a flow and add it to @p list.
3576  *
3577  * @param dev
3578  *   Pointer to Ethernet device.
3579  * @param list
3580  *   Pointer to a TAILQ flow list.
3581  * @param[in] attr
3582  *   Flow rule attributes.
3583  * @param[in] items
3584  *   Pattern specification (list terminated by the END pattern item).
3585  * @param[in] actions
3586  *   Associated actions (list terminated by the END action).
3587  * @param[out] error
3588  *   Perform verbose error reporting if not NULL.
3589  *
3590  * @return
3591  *   A flow on success, NULL otherwise and rte_errno is set.
3592  */
3593 static struct rte_flow *
3594 mlx5_flow_list_create(struct rte_eth_dev *dev,
3595                       struct mlx5_flows *list,
3596                       const struct rte_flow_attr *attr,
3597                       const struct rte_flow_item items[],
3598                       const struct rte_flow_action actions[],
3599                       struct rte_flow_error *error)
3600 {
3601         struct rte_flow *flow = NULL;
3602         size_t size = 0;
3603         int ret;
3604
3605         ret = mlx5_flow_validate(dev, attr, items, actions, error);
3606         if (ret < 0)
3607                 return NULL;
3608         ret = mlx5_flow_merge(dev, flow, size, attr, items, actions, error);
3609         if (ret < 0)
3610                 return NULL;
3611         size = ret;
3612         flow = rte_calloc(__func__, 1, size, 0);
3613         if (!flow) {
3614                 rte_flow_error_set(error, ENOMEM,
3615                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
3616                                    NULL,
3617                                    "not enough memory to create flow");
3618                 return NULL;
3619         }
3620         ret = mlx5_flow_merge(dev, flow, size, attr, items, actions, error);
3621         if (ret < 0) {
3622                 rte_free(flow);
3623                 return NULL;
3624         }
3625         assert((size_t)ret == size);
3626         if (dev->data->dev_started) {
3627                 ret = mlx5_flow_apply(dev, flow, error);
3628                 if (ret < 0) {
3629                         ret = rte_errno; /* Save rte_errno before cleanup. */
3630                         if (flow) {
3631                                 mlx5_flow_remove(dev, flow);
3632                                 rte_free(flow);
3633                         }
3634                         rte_errno = ret; /* Restore rte_errno. */
3635                         return NULL;
3636                 }
3637         }
3638         TAILQ_INSERT_TAIL(list, flow, next);
3639         mlx5_flow_rxq_flags_set(dev, flow);
3640         return flow;
3641 }
3642
3643 /**
3644  * Create a flow.
3645  *
3646  * @see rte_flow_create()
3647  * @see rte_flow_ops
3648  */
3649 struct rte_flow *
3650 mlx5_flow_create(struct rte_eth_dev *dev,
3651                  const struct rte_flow_attr *attr,
3652                  const struct rte_flow_item items[],
3653                  const struct rte_flow_action actions[],
3654                  struct rte_flow_error *error)
3655 {
3656         return mlx5_flow_list_create
3657                 (dev, &((struct priv *)dev->data->dev_private)->flows,
3658                  attr, items, actions, error);
3659 }
3660
3661 /**
3662  * Destroy a flow in a list.
3663  *
3664  * @param dev
3665  *   Pointer to Ethernet device.
3666  * @param list
3667  *   Pointer to a TAILQ flow list.
3668  * @param[in] flow
3669  *   Flow to destroy.
3670  */
3671 static void
3672 mlx5_flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list,
3673                        struct rte_flow *flow)
3674 {
3675         mlx5_flow_remove(dev, flow);
3676         TAILQ_REMOVE(list, flow, next);
3677         /*
3678          * Update RX queue flags only if port is started, otherwise it is
3679          * already clean.
3680          */
3681         if (dev->data->dev_started)
3682                 mlx5_flow_rxq_flags_trim(dev, flow);
3683         rte_free(flow);
3684 }
3685
3686 /**
3687  * Destroy all flows.
3688  *
3689  * @param dev
3690  *   Pointer to Ethernet device.
3691  * @param list
3692  *   Pointer to a TAILQ flow list.
3693  */
3694 void
3695 mlx5_flow_list_flush(struct rte_eth_dev *dev, struct mlx5_flows *list)
3696 {
3697         while (!TAILQ_EMPTY(list)) {
3698                 struct rte_flow *flow;
3699
3700                 flow = TAILQ_FIRST(list);
3701                 mlx5_flow_list_destroy(dev, list, flow);
3702         }
3703 }
3704
3705 /**
3706  * Remove all flows.
3707  *
3708  * @param dev
3709  *   Pointer to Ethernet device.
3710  * @param list
3711  *   Pointer to a TAILQ flow list.
3712  */
3713 void
3714 mlx5_flow_stop(struct rte_eth_dev *dev, struct mlx5_flows *list)
3715 {
3716         struct rte_flow *flow;
3717
3718         TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next)
3719                 mlx5_flow_remove(dev, flow);
3720         mlx5_flow_rxq_flags_clear(dev);
3721 }
3722
3723 /**
3724  * Add all flows.
3725  *
3726  * @param dev
3727  *   Pointer to Ethernet device.
3728  * @param list
3729  *   Pointer to a TAILQ flow list.
3730  *
3731  * @return
3732  *   0 on success, a negative errno value otherwise and rte_errno is set.
3733  */
3734 int
3735 mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list)
3736 {
3737         struct rte_flow *flow;
3738         struct rte_flow_error error;
3739         int ret = 0;
3740
3741         TAILQ_FOREACH(flow, list, next) {
3742                 ret = mlx5_flow_apply(dev, flow, &error);
3743                 if (ret < 0)
3744                         goto error;
3745                 mlx5_flow_rxq_flags_set(dev, flow);
3746         }
3747         return 0;
3748 error:
3749         ret = rte_errno; /* Save rte_errno before cleanup. */
3750         mlx5_flow_stop(dev, list);
3751         rte_errno = ret; /* Restore rte_errno. */
3752         return -rte_errno;
3753 }
3754
3755 /**
3756  * Verify the flow list is empty
3757  *
3758  * @param dev
3759  *  Pointer to Ethernet device.
3760  *
3761  * @return the number of flows not released.
3762  */
3763 int
3764 mlx5_flow_verify(struct rte_eth_dev *dev)
3765 {
3766         struct priv *priv = dev->data->dev_private;
3767         struct rte_flow *flow;
3768         int ret = 0;
3769
3770         TAILQ_FOREACH(flow, &priv->flows, next) {
3771                 DRV_LOG(DEBUG, "port %u flow %p still referenced",
3772                         dev->data->port_id, (void *)flow);
3773                 ++ret;
3774         }
3775         return ret;
3776 }
3777
3778 /**
3779  * Enable a control flow configured from the control plane.
3780  *
3781  * @param dev
3782  *   Pointer to Ethernet device.
3783  * @param eth_spec
3784  *   An Ethernet flow spec to apply.
3785  * @param eth_mask
3786  *   An Ethernet flow mask to apply.
3787  * @param vlan_spec
3788  *   A VLAN flow spec to apply.
3789  * @param vlan_mask
3790  *   A VLAN flow mask to apply.
3791  *
3792  * @return
3793  *   0 on success, a negative errno value otherwise and rte_errno is set.
3794  */
3795 int
3796 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
3797                     struct rte_flow_item_eth *eth_spec,
3798                     struct rte_flow_item_eth *eth_mask,
3799                     struct rte_flow_item_vlan *vlan_spec,
3800                     struct rte_flow_item_vlan *vlan_mask)
3801 {
3802         struct priv *priv = dev->data->dev_private;
3803         const struct rte_flow_attr attr = {
3804                 .ingress = 1,
3805                 .priority = MLX5_FLOW_PRIO_RSVD,
3806         };
3807         struct rte_flow_item items[] = {
3808                 {
3809                         .type = RTE_FLOW_ITEM_TYPE_ETH,
3810                         .spec = eth_spec,
3811                         .last = NULL,
3812                         .mask = eth_mask,
3813                 },
3814                 {
3815                         .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
3816                                               RTE_FLOW_ITEM_TYPE_END,
3817                         .spec = vlan_spec,
3818                         .last = NULL,
3819                         .mask = vlan_mask,
3820                 },
3821                 {
3822                         .type = RTE_FLOW_ITEM_TYPE_END,
3823                 },
3824         };
3825         uint16_t queue[priv->reta_idx_n];
3826         struct rte_flow_action_rss action_rss = {
3827                 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
3828                 .level = 0,
3829                 .types = priv->rss_conf.rss_hf,
3830                 .key_len = priv->rss_conf.rss_key_len,
3831                 .queue_num = priv->reta_idx_n,
3832                 .key = priv->rss_conf.rss_key,
3833                 .queue = queue,
3834         };
3835         struct rte_flow_action actions[] = {
3836                 {
3837                         .type = RTE_FLOW_ACTION_TYPE_RSS,
3838                         .conf = &action_rss,
3839                 },
3840                 {
3841                         .type = RTE_FLOW_ACTION_TYPE_END,
3842                 },
3843         };
3844         struct rte_flow *flow;
3845         struct rte_flow_error error;
3846         unsigned int i;
3847
3848         if (!priv->reta_idx_n) {
3849                 rte_errno = EINVAL;
3850                 return -rte_errno;
3851         }
3852         for (i = 0; i != priv->reta_idx_n; ++i)
3853                 queue[i] = (*priv->reta_idx)[i];
3854         flow = mlx5_flow_list_create(dev, &priv->ctrl_flows, &attr, items,
3855                                      actions, &error);
3856         if (!flow)
3857                 return -rte_errno;
3858         return 0;
3859 }
3860
3861 /**
3862  * Enable a flow control configured from the control plane.
3863  *
3864  * @param dev
3865  *   Pointer to Ethernet device.
3866  * @param eth_spec
3867  *   An Ethernet flow spec to apply.
3868  * @param eth_mask
3869  *   An Ethernet flow mask to apply.
3870  *
3871  * @return
3872  *   0 on success, a negative errno value otherwise and rte_errno is set.
3873  */
3874 int
3875 mlx5_ctrl_flow(struct rte_eth_dev *dev,
3876                struct rte_flow_item_eth *eth_spec,
3877                struct rte_flow_item_eth *eth_mask)
3878 {
3879         return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
3880 }
3881
3882 /**
3883  * Destroy a flow.
3884  *
3885  * @see rte_flow_destroy()
3886  * @see rte_flow_ops
3887  */
3888 int
3889 mlx5_flow_destroy(struct rte_eth_dev *dev,
3890                   struct rte_flow *flow,
3891                   struct rte_flow_error *error __rte_unused)
3892 {
3893         struct priv *priv = dev->data->dev_private;
3894
3895         mlx5_flow_list_destroy(dev, &priv->flows, flow);
3896         return 0;
3897 }
3898
3899 /**
3900  * Destroy all flows.
3901  *
3902  * @see rte_flow_flush()
3903  * @see rte_flow_ops
3904  */
3905 int
3906 mlx5_flow_flush(struct rte_eth_dev *dev,
3907                 struct rte_flow_error *error __rte_unused)
3908 {
3909         struct priv *priv = dev->data->dev_private;
3910
3911         mlx5_flow_list_flush(dev, &priv->flows);
3912         return 0;
3913 }
3914
3915 /**
3916  * Isolated mode.
3917  *
3918  * @see rte_flow_isolate()
3919  * @see rte_flow_ops
3920  */
3921 int
3922 mlx5_flow_isolate(struct rte_eth_dev *dev,
3923                   int enable,
3924                   struct rte_flow_error *error)
3925 {
3926         struct priv *priv = dev->data->dev_private;
3927
3928         if (dev->data->dev_started) {
3929                 rte_flow_error_set(error, EBUSY,
3930                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
3931                                    NULL,
3932                                    "port must be stopped first");
3933                 return -rte_errno;
3934         }
3935         priv->isolated = !!enable;
3936         if (enable)
3937                 dev->dev_ops = &mlx5_dev_ops_isolate;
3938         else
3939                 dev->dev_ops = &mlx5_dev_ops;
3940         return 0;
3941 }
3942
3943 /**
3944  * Query flow counter.
3945  *
3946  * @param flow
3947  *   Pointer to the flow.
3948  *
3949  * @return
3950  *   0 on success, a negative errno value otherwise and rte_errno is set.
3951  */
3952 static int
3953 mlx5_flow_query_count(struct rte_flow *flow __rte_unused,
3954                       void *data __rte_unused,
3955                       struct rte_flow_error *error)
3956 {
3957 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
3958         if (flow->modifier & MLX5_FLOW_MOD_COUNT) {
3959                 struct rte_flow_query_count *qc = data;
3960                 uint64_t counters[2] = {0, 0};
3961                 struct ibv_query_counter_set_attr query_cs_attr = {
3962                         .cs = flow->counter->cs,
3963                         .query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
3964                 };
3965                 struct ibv_counter_set_data query_out = {
3966                         .out = counters,
3967                         .outlen = 2 * sizeof(uint64_t),
3968                 };
3969                 int err = mlx5_glue->query_counter_set(&query_cs_attr,
3970                                                        &query_out);
3971
3972                 if (err)
3973                         return rte_flow_error_set
3974                                 (error, err,
3975                                  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
3976                                  NULL,
3977                                  "cannot read counter");
3978                 qc->hits_set = 1;
3979                 qc->bytes_set = 1;
3980                 qc->hits = counters[0] - flow->counter->hits;
3981                 qc->bytes = counters[1] - flow->counter->bytes;
3982                 if (qc->reset) {
3983                         flow->counter->hits = counters[0];
3984                         flow->counter->bytes = counters[1];
3985                 }
3986                 return 0;
3987         }
3988         return rte_flow_error_set(error, ENOTSUP,
3989                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
3990                                   NULL,
3991                                   "flow does not have counter");
3992 #endif
3993         return rte_flow_error_set(error, ENOTSUP,
3994                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
3995                                   NULL,
3996                                   "counters are not available");
3997 }
3998
3999 /**
4000  * Query a flows.
4001  *
4002  * @see rte_flow_query()
4003  * @see rte_flow_ops
4004  */
4005 int
4006 mlx5_flow_query(struct rte_eth_dev *dev __rte_unused,
4007                 struct rte_flow *flow,
4008                 const struct rte_flow_action *actions,
4009                 void *data,
4010                 struct rte_flow_error *error)
4011 {
4012         int ret = 0;
4013
4014         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
4015                 switch (actions->type) {
4016                 case RTE_FLOW_ACTION_TYPE_VOID:
4017                         break;
4018                 case RTE_FLOW_ACTION_TYPE_COUNT:
4019                         ret = mlx5_flow_query_count(flow, data, error);
4020                         break;
4021                 default:
4022                         return rte_flow_error_set(error, ENOTSUP,
4023                                                   RTE_FLOW_ERROR_TYPE_ACTION,
4024                                                   actions,
4025                                                   "action not supported");
4026                 }
4027                 if (ret < 0)
4028                         return ret;
4029         }
4030         return 0;
4031 }
4032
4033 /**
4034  * Convert a flow director filter to a generic flow.
4035  *
4036  * @param dev
4037  *   Pointer to Ethernet device.
4038  * @param fdir_filter
4039  *   Flow director filter to add.
4040  * @param attributes
4041  *   Generic flow parameters structure.
4042  *
4043  * @return
4044  *   0 on success, a negative errno value otherwise and rte_errno is set.
4045  */
4046 static int
4047 mlx5_fdir_filter_convert(struct rte_eth_dev *dev,
4048                          const struct rte_eth_fdir_filter *fdir_filter,
4049                          struct mlx5_fdir *attributes)
4050 {
4051         struct priv *priv = dev->data->dev_private;
4052         const struct rte_eth_fdir_input *input = &fdir_filter->input;
4053         const struct rte_eth_fdir_masks *mask =
4054                 &dev->data->dev_conf.fdir_conf.mask;
4055
4056         /* Validate queue number. */
4057         if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
4058                 DRV_LOG(ERR, "port %u invalid queue number %d",
4059                         dev->data->port_id, fdir_filter->action.rx_queue);
4060                 rte_errno = EINVAL;
4061                 return -rte_errno;
4062         }
4063         attributes->attr.ingress = 1;
4064         attributes->items[0] = (struct rte_flow_item) {
4065                 .type = RTE_FLOW_ITEM_TYPE_ETH,
4066                 .spec = &attributes->l2,
4067                 .mask = &attributes->l2_mask,
4068         };
4069         switch (fdir_filter->action.behavior) {
4070         case RTE_ETH_FDIR_ACCEPT:
4071                 attributes->actions[0] = (struct rte_flow_action){
4072                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
4073                         .conf = &attributes->queue,
4074                 };
4075                 break;
4076         case RTE_ETH_FDIR_REJECT:
4077                 attributes->actions[0] = (struct rte_flow_action){
4078                         .type = RTE_FLOW_ACTION_TYPE_DROP,
4079                 };
4080                 break;
4081         default:
4082                 DRV_LOG(ERR, "port %u invalid behavior %d",
4083                         dev->data->port_id,
4084                         fdir_filter->action.behavior);
4085                 rte_errno = ENOTSUP;
4086                 return -rte_errno;
4087         }
4088         attributes->queue.index = fdir_filter->action.rx_queue;
4089         /* Handle L3. */
4090         switch (fdir_filter->input.flow_type) {
4091         case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
4092         case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
4093         case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
4094                 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
4095                         .src_addr = input->flow.ip4_flow.src_ip,
4096                         .dst_addr = input->flow.ip4_flow.dst_ip,
4097                         .time_to_live = input->flow.ip4_flow.ttl,
4098                         .type_of_service = input->flow.ip4_flow.tos,
4099                         .next_proto_id = input->flow.ip4_flow.proto,
4100                 };
4101                 attributes->l3_mask.ipv4.hdr = (struct ipv4_hdr){
4102                         .src_addr = mask->ipv4_mask.src_ip,
4103                         .dst_addr = mask->ipv4_mask.dst_ip,
4104                         .time_to_live = mask->ipv4_mask.ttl,
4105                         .type_of_service = mask->ipv4_mask.tos,
4106                         .next_proto_id = mask->ipv4_mask.proto,
4107                 };
4108                 attributes->items[1] = (struct rte_flow_item){
4109                         .type = RTE_FLOW_ITEM_TYPE_IPV4,
4110                         .spec = &attributes->l3,
4111                         .mask = &attributes->l3_mask,
4112                 };
4113                 break;
4114         case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
4115         case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
4116         case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
4117                 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
4118                         .hop_limits = input->flow.ipv6_flow.hop_limits,
4119                         .proto = input->flow.ipv6_flow.proto,
4120                 };
4121
4122                 memcpy(attributes->l3.ipv6.hdr.src_addr,
4123                        input->flow.ipv6_flow.src_ip,
4124                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
4125                 memcpy(attributes->l3.ipv6.hdr.dst_addr,
4126                        input->flow.ipv6_flow.dst_ip,
4127                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
4128                 memcpy(attributes->l3_mask.ipv6.hdr.src_addr,
4129                        mask->ipv6_mask.src_ip,
4130                        RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
4131                 memcpy(attributes->l3_mask.ipv6.hdr.dst_addr,
4132                        mask->ipv6_mask.dst_ip,
4133                        RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
4134                 attributes->items[1] = (struct rte_flow_item){
4135                         .type = RTE_FLOW_ITEM_TYPE_IPV6,
4136                         .spec = &attributes->l3,
4137                         .mask = &attributes->l3_mask,
4138                 };
4139                 break;
4140         default:
4141                 DRV_LOG(ERR, "port %u invalid flow type%d",
4142                         dev->data->port_id, fdir_filter->input.flow_type);
4143                 rte_errno = ENOTSUP;
4144                 return -rte_errno;
4145         }
4146         /* Handle L4. */
4147         switch (fdir_filter->input.flow_type) {
4148         case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
4149                 attributes->l4.udp.hdr = (struct udp_hdr){
4150                         .src_port = input->flow.udp4_flow.src_port,
4151                         .dst_port = input->flow.udp4_flow.dst_port,
4152                 };
4153                 attributes->l4_mask.udp.hdr = (struct udp_hdr){
4154                         .src_port = mask->src_port_mask,
4155                         .dst_port = mask->dst_port_mask,
4156                 };
4157                 attributes->items[2] = (struct rte_flow_item){
4158                         .type = RTE_FLOW_ITEM_TYPE_UDP,
4159                         .spec = &attributes->l4,
4160                         .mask = &attributes->l4_mask,
4161                 };
4162                 break;
4163         case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
4164                 attributes->l4.tcp.hdr = (struct tcp_hdr){
4165                         .src_port = input->flow.tcp4_flow.src_port,
4166                         .dst_port = input->flow.tcp4_flow.dst_port,
4167                 };
4168                 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
4169                         .src_port = mask->src_port_mask,
4170                         .dst_port = mask->dst_port_mask,
4171                 };
4172                 attributes->items[2] = (struct rte_flow_item){
4173                         .type = RTE_FLOW_ITEM_TYPE_TCP,
4174                         .spec = &attributes->l4,
4175                         .mask = &attributes->l4_mask,
4176                 };
4177                 break;
4178         case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
4179                 attributes->l4.udp.hdr = (struct udp_hdr){
4180                         .src_port = input->flow.udp6_flow.src_port,
4181                         .dst_port = input->flow.udp6_flow.dst_port,
4182                 };
4183                 attributes->l4_mask.udp.hdr = (struct udp_hdr){
4184                         .src_port = mask->src_port_mask,
4185                         .dst_port = mask->dst_port_mask,
4186                 };
4187                 attributes->items[2] = (struct rte_flow_item){
4188                         .type = RTE_FLOW_ITEM_TYPE_UDP,
4189                         .spec = &attributes->l4,
4190                         .mask = &attributes->l4_mask,
4191                 };
4192                 break;
4193         case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
4194                 attributes->l4.tcp.hdr = (struct tcp_hdr){
4195                         .src_port = input->flow.tcp6_flow.src_port,
4196                         .dst_port = input->flow.tcp6_flow.dst_port,
4197                 };
4198                 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
4199                         .src_port = mask->src_port_mask,
4200                         .dst_port = mask->dst_port_mask,
4201                 };
4202                 attributes->items[2] = (struct rte_flow_item){
4203                         .type = RTE_FLOW_ITEM_TYPE_TCP,
4204                         .spec = &attributes->l4,
4205                         .mask = &attributes->l4_mask,
4206                 };
4207                 break;
4208         case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
4209         case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
4210                 break;
4211         default:
4212                 DRV_LOG(ERR, "port %u invalid flow type%d",
4213                         dev->data->port_id, fdir_filter->input.flow_type);
4214                 rte_errno = ENOTSUP;
4215                 return -rte_errno;
4216         }
4217         return 0;
4218 }
4219
4220 /**
4221  * Add new flow director filter and store it in list.
4222  *
4223  * @param dev
4224  *   Pointer to Ethernet device.
4225  * @param fdir_filter
4226  *   Flow director filter to add.
4227  *
4228  * @return
4229  *   0 on success, a negative errno value otherwise and rte_errno is set.
4230  */
4231 static int
4232 mlx5_fdir_filter_add(struct rte_eth_dev *dev,
4233                      const struct rte_eth_fdir_filter *fdir_filter)
4234 {
4235         struct priv *priv = dev->data->dev_private;
4236         struct mlx5_fdir attributes = {
4237                 .attr.group = 0,
4238                 .l2_mask = {
4239                         .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
4240                         .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
4241                         .type = 0,
4242                 },
4243         };
4244         struct rte_flow_error error;
4245         struct rte_flow *flow;
4246         int ret;
4247
4248         ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
4249         if (ret)
4250                 return ret;
4251         flow = mlx5_flow_list_create(dev, &priv->flows, &attributes.attr,
4252                                      attributes.items, attributes.actions,
4253                                      &error);
4254         if (flow) {
4255                 DRV_LOG(DEBUG, "port %u FDIR created %p", dev->data->port_id,
4256                         (void *)flow);
4257                 return 0;
4258         }
4259         return -rte_errno;
4260 }
4261
4262 /**
4263  * Delete specific filter.
4264  *
4265  * @param dev
4266  *   Pointer to Ethernet device.
4267  * @param fdir_filter
4268  *   Filter to be deleted.
4269  *
4270  * @return
4271  *   0 on success, a negative errno value otherwise and rte_errno is set.
4272  */
4273 static int
4274 mlx5_fdir_filter_delete(struct rte_eth_dev *dev __rte_unused,
4275                         const struct rte_eth_fdir_filter *fdir_filter
4276                         __rte_unused)
4277 {
4278         rte_errno = ENOTSUP;
4279         return -rte_errno;
4280 }
4281
4282 /**
4283  * Update queue for specific filter.
4284  *
4285  * @param dev
4286  *   Pointer to Ethernet device.
4287  * @param fdir_filter
4288  *   Filter to be updated.
4289  *
4290  * @return
4291  *   0 on success, a negative errno value otherwise and rte_errno is set.
4292  */
4293 static int
4294 mlx5_fdir_filter_update(struct rte_eth_dev *dev,
4295                         const struct rte_eth_fdir_filter *fdir_filter)
4296 {
4297         int ret;
4298
4299         ret = mlx5_fdir_filter_delete(dev, fdir_filter);
4300         if (ret)
4301                 return ret;
4302         return mlx5_fdir_filter_add(dev, fdir_filter);
4303 }
4304
4305 /**
4306  * Flush all filters.
4307  *
4308  * @param dev
4309  *   Pointer to Ethernet device.
4310  */
4311 static void
4312 mlx5_fdir_filter_flush(struct rte_eth_dev *dev)
4313 {
4314         struct priv *priv = dev->data->dev_private;
4315
4316         mlx5_flow_list_flush(dev, &priv->flows);
4317 }
4318
4319 /**
4320  * Get flow director information.
4321  *
4322  * @param dev
4323  *   Pointer to Ethernet device.
4324  * @param[out] fdir_info
4325  *   Resulting flow director information.
4326  */
4327 static void
4328 mlx5_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info)
4329 {
4330         struct rte_eth_fdir_masks *mask =
4331                 &dev->data->dev_conf.fdir_conf.mask;
4332
4333         fdir_info->mode = dev->data->dev_conf.fdir_conf.mode;
4334         fdir_info->guarant_spc = 0;
4335         rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
4336         fdir_info->max_flexpayload = 0;
4337         fdir_info->flow_types_mask[0] = 0;
4338         fdir_info->flex_payload_unit = 0;
4339         fdir_info->max_flex_payload_segment_num = 0;
4340         fdir_info->flex_payload_limit = 0;
4341         memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
4342 }
4343
4344 /**
4345  * Deal with flow director operations.
4346  *
4347  * @param dev
4348  *   Pointer to Ethernet device.
4349  * @param filter_op
4350  *   Operation to perform.
4351  * @param arg
4352  *   Pointer to operation-specific structure.
4353  *
4354  * @return
4355  *   0 on success, a negative errno value otherwise and rte_errno is set.
4356  */
4357 static int
4358 mlx5_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op,
4359                     void *arg)
4360 {
4361         enum rte_fdir_mode fdir_mode =
4362                 dev->data->dev_conf.fdir_conf.mode;
4363
4364         if (filter_op == RTE_ETH_FILTER_NOP)
4365                 return 0;
4366         if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
4367             fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
4368                 DRV_LOG(ERR, "port %u flow director mode %d not supported",
4369                         dev->data->port_id, fdir_mode);
4370                 rte_errno = EINVAL;
4371                 return -rte_errno;
4372         }
4373         switch (filter_op) {
4374         case RTE_ETH_FILTER_ADD:
4375                 return mlx5_fdir_filter_add(dev, arg);
4376         case RTE_ETH_FILTER_UPDATE:
4377                 return mlx5_fdir_filter_update(dev, arg);
4378         case RTE_ETH_FILTER_DELETE:
4379                 return mlx5_fdir_filter_delete(dev, arg);
4380         case RTE_ETH_FILTER_FLUSH:
4381                 mlx5_fdir_filter_flush(dev);
4382                 break;
4383         case RTE_ETH_FILTER_INFO:
4384                 mlx5_fdir_info_get(dev, arg);
4385                 break;
4386         default:
4387                 DRV_LOG(DEBUG, "port %u unknown operation %u",
4388                         dev->data->port_id, filter_op);
4389                 rte_errno = EINVAL;
4390                 return -rte_errno;
4391         }
4392         return 0;
4393 }
4394
4395 /**
4396  * Manage filter operations.
4397  *
4398  * @param dev
4399  *   Pointer to Ethernet device structure.
4400  * @param filter_type
4401  *   Filter type.
4402  * @param filter_op
4403  *   Operation to perform.
4404  * @param arg
4405  *   Pointer to operation-specific structure.
4406  *
4407  * @return
4408  *   0 on success, a negative errno value otherwise and rte_errno is set.
4409  */
4410 int
4411 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
4412                      enum rte_filter_type filter_type,
4413                      enum rte_filter_op filter_op,
4414                      void *arg)
4415 {
4416         switch (filter_type) {
4417         case RTE_ETH_FILTER_GENERIC:
4418                 if (filter_op != RTE_ETH_FILTER_GET) {
4419                         rte_errno = EINVAL;
4420                         return -rte_errno;
4421                 }
4422                 *(const void **)arg = &mlx5_flow_ops;
4423                 return 0;
4424         case RTE_ETH_FILTER_FDIR:
4425                 return mlx5_fdir_ctrl_func(dev, filter_op, arg);
4426         default:
4427                 DRV_LOG(ERR, "port %u filter type (%d) not supported",
4428                         dev->data->port_id, filter_type);
4429                 rte_errno = ENOTSUP;
4430                 return -rte_errno;
4431         }
4432         return 0;
4433 }