net/mlx5: add flow prepare function
[dpdk.git] / drivers / net / mlx5 / mlx5_flow.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2016 6WIND S.A.
3  * Copyright 2016 Mellanox Technologies, Ltd
4  */
5
6 #include <sys/queue.h>
7 #include <stdalign.h>
8 #include <stdint.h>
9 #include <string.h>
10
11 /* Verbs header. */
12 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
13 #ifdef PEDANTIC
14 #pragma GCC diagnostic ignored "-Wpedantic"
15 #endif
16 #include <infiniband/verbs.h>
17 #ifdef PEDANTIC
18 #pragma GCC diagnostic error "-Wpedantic"
19 #endif
20
21 #include <rte_common.h>
22 #include <rte_ether.h>
23 #include <rte_eth_ctrl.h>
24 #include <rte_ethdev_driver.h>
25 #include <rte_flow.h>
26 #include <rte_flow_driver.h>
27 #include <rte_malloc.h>
28 #include <rte_ip.h>
29
30 #include "mlx5.h"
31 #include "mlx5_defs.h"
32 #include "mlx5_prm.h"
33 #include "mlx5_glue.h"
34
35 /* Dev ops structure defined in mlx5.c */
36 extern const struct eth_dev_ops mlx5_dev_ops;
37 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
38
39 /* Pattern outer Layer bits. */
40 #define MLX5_FLOW_LAYER_OUTER_L2 (1u << 0)
41 #define MLX5_FLOW_LAYER_OUTER_L3_IPV4 (1u << 1)
42 #define MLX5_FLOW_LAYER_OUTER_L3_IPV6 (1u << 2)
43 #define MLX5_FLOW_LAYER_OUTER_L4_UDP (1u << 3)
44 #define MLX5_FLOW_LAYER_OUTER_L4_TCP (1u << 4)
45 #define MLX5_FLOW_LAYER_OUTER_VLAN (1u << 5)
46
47 /* Pattern inner Layer bits. */
48 #define MLX5_FLOW_LAYER_INNER_L2 (1u << 6)
49 #define MLX5_FLOW_LAYER_INNER_L3_IPV4 (1u << 7)
50 #define MLX5_FLOW_LAYER_INNER_L3_IPV6 (1u << 8)
51 #define MLX5_FLOW_LAYER_INNER_L4_UDP (1u << 9)
52 #define MLX5_FLOW_LAYER_INNER_L4_TCP (1u << 10)
53 #define MLX5_FLOW_LAYER_INNER_VLAN (1u << 11)
54
55 /* Pattern tunnel Layer bits. */
56 #define MLX5_FLOW_LAYER_VXLAN (1u << 12)
57 #define MLX5_FLOW_LAYER_VXLAN_GPE (1u << 13)
58 #define MLX5_FLOW_LAYER_GRE (1u << 14)
59 #define MLX5_FLOW_LAYER_MPLS (1u << 15)
60
61 /* Outer Masks. */
62 #define MLX5_FLOW_LAYER_OUTER_L3 \
63         (MLX5_FLOW_LAYER_OUTER_L3_IPV4 | MLX5_FLOW_LAYER_OUTER_L3_IPV6)
64 #define MLX5_FLOW_LAYER_OUTER_L4 \
65         (MLX5_FLOW_LAYER_OUTER_L4_UDP | MLX5_FLOW_LAYER_OUTER_L4_TCP)
66 #define MLX5_FLOW_LAYER_OUTER \
67         (MLX5_FLOW_LAYER_OUTER_L2 | MLX5_FLOW_LAYER_OUTER_L3 | \
68          MLX5_FLOW_LAYER_OUTER_L4)
69
70 /* Tunnel Masks. */
71 #define MLX5_FLOW_LAYER_TUNNEL \
72         (MLX5_FLOW_LAYER_VXLAN | MLX5_FLOW_LAYER_VXLAN_GPE | \
73          MLX5_FLOW_LAYER_GRE | MLX5_FLOW_LAYER_MPLS)
74
75 /* Inner Masks. */
76 #define MLX5_FLOW_LAYER_INNER_L3 \
77         (MLX5_FLOW_LAYER_INNER_L3_IPV4 | MLX5_FLOW_LAYER_INNER_L3_IPV6)
78 #define MLX5_FLOW_LAYER_INNER_L4 \
79         (MLX5_FLOW_LAYER_INNER_L4_UDP | MLX5_FLOW_LAYER_INNER_L4_TCP)
80 #define MLX5_FLOW_LAYER_INNER \
81         (MLX5_FLOW_LAYER_INNER_L2 | MLX5_FLOW_LAYER_INNER_L3 | \
82          MLX5_FLOW_LAYER_INNER_L4)
83
84 /* Actions that modify the fate of matching traffic. */
85 #define MLX5_FLOW_FATE_DROP (1u << 0)
86 #define MLX5_FLOW_FATE_QUEUE (1u << 1)
87 #define MLX5_FLOW_FATE_RSS (1u << 2)
88
89 /* Modify a packet. */
90 #define MLX5_FLOW_MOD_FLAG (1u << 0)
91 #define MLX5_FLOW_MOD_MARK (1u << 1)
92 #define MLX5_FLOW_MOD_COUNT (1u << 2)
93
94 /* Actions */
95 #define MLX5_FLOW_ACTION_DROP (1u << 0)
96 #define MLX5_FLOW_ACTION_QUEUE (1u << 1)
97 #define MLX5_FLOW_ACTION_RSS (1u << 2)
98 #define MLX5_FLOW_ACTION_FLAG (1u << 3)
99 #define MLX5_FLOW_ACTION_MARK (1u << 4)
100 #define MLX5_FLOW_ACTION_COUNT (1u << 5)
101
102 #define MLX5_FLOW_FATE_ACTIONS \
103         (MLX5_FLOW_ACTION_DROP | MLX5_FLOW_ACTION_QUEUE | MLX5_FLOW_ACTION_RSS)
104
105 /* possible L3 layers protocols filtering. */
106 #define MLX5_IP_PROTOCOL_TCP 6
107 #define MLX5_IP_PROTOCOL_UDP 17
108 #define MLX5_IP_PROTOCOL_GRE 47
109 #define MLX5_IP_PROTOCOL_MPLS 147
110
111 /* Priority reserved for default flows. */
112 #define MLX5_FLOW_PRIO_RSVD ((uint32_t)-1)
113
114 enum mlx5_expansion {
115         MLX5_EXPANSION_ROOT,
116         MLX5_EXPANSION_ROOT_OUTER,
117         MLX5_EXPANSION_ROOT_ETH_VLAN,
118         MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN,
119         MLX5_EXPANSION_OUTER_ETH,
120         MLX5_EXPANSION_OUTER_ETH_VLAN,
121         MLX5_EXPANSION_OUTER_VLAN,
122         MLX5_EXPANSION_OUTER_IPV4,
123         MLX5_EXPANSION_OUTER_IPV4_UDP,
124         MLX5_EXPANSION_OUTER_IPV4_TCP,
125         MLX5_EXPANSION_OUTER_IPV6,
126         MLX5_EXPANSION_OUTER_IPV6_UDP,
127         MLX5_EXPANSION_OUTER_IPV6_TCP,
128         MLX5_EXPANSION_VXLAN,
129         MLX5_EXPANSION_VXLAN_GPE,
130         MLX5_EXPANSION_GRE,
131         MLX5_EXPANSION_MPLS,
132         MLX5_EXPANSION_ETH,
133         MLX5_EXPANSION_ETH_VLAN,
134         MLX5_EXPANSION_VLAN,
135         MLX5_EXPANSION_IPV4,
136         MLX5_EXPANSION_IPV4_UDP,
137         MLX5_EXPANSION_IPV4_TCP,
138         MLX5_EXPANSION_IPV6,
139         MLX5_EXPANSION_IPV6_UDP,
140         MLX5_EXPANSION_IPV6_TCP,
141 };
142
143 /** Supported expansion of items. */
144 static const struct rte_flow_expand_node mlx5_support_expansion[] = {
145         [MLX5_EXPANSION_ROOT] = {
146                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
147                                                  MLX5_EXPANSION_IPV4,
148                                                  MLX5_EXPANSION_IPV6),
149                 .type = RTE_FLOW_ITEM_TYPE_END,
150         },
151         [MLX5_EXPANSION_ROOT_OUTER] = {
152                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_ETH,
153                                                  MLX5_EXPANSION_OUTER_IPV4,
154                                                  MLX5_EXPANSION_OUTER_IPV6),
155                 .type = RTE_FLOW_ITEM_TYPE_END,
156         },
157         [MLX5_EXPANSION_ROOT_ETH_VLAN] = {
158                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH_VLAN),
159                 .type = RTE_FLOW_ITEM_TYPE_END,
160         },
161         [MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN] = {
162                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_ETH_VLAN),
163                 .type = RTE_FLOW_ITEM_TYPE_END,
164         },
165         [MLX5_EXPANSION_OUTER_ETH] = {
166                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_IPV4,
167                                                  MLX5_EXPANSION_OUTER_IPV6,
168                                                  MLX5_EXPANSION_MPLS),
169                 .type = RTE_FLOW_ITEM_TYPE_ETH,
170                 .rss_types = 0,
171         },
172         [MLX5_EXPANSION_OUTER_ETH_VLAN] = {
173                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_VLAN),
174                 .type = RTE_FLOW_ITEM_TYPE_ETH,
175                 .rss_types = 0,
176         },
177         [MLX5_EXPANSION_OUTER_VLAN] = {
178                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_IPV4,
179                                                  MLX5_EXPANSION_OUTER_IPV6),
180                 .type = RTE_FLOW_ITEM_TYPE_VLAN,
181         },
182         [MLX5_EXPANSION_OUTER_IPV4] = {
183                 .next = RTE_FLOW_EXPAND_RSS_NEXT
184                         (MLX5_EXPANSION_OUTER_IPV4_UDP,
185                          MLX5_EXPANSION_OUTER_IPV4_TCP,
186                          MLX5_EXPANSION_GRE),
187                 .type = RTE_FLOW_ITEM_TYPE_IPV4,
188                 .rss_types = ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
189                         ETH_RSS_NONFRAG_IPV4_OTHER,
190         },
191         [MLX5_EXPANSION_OUTER_IPV4_UDP] = {
192                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN,
193                                                  MLX5_EXPANSION_VXLAN_GPE),
194                 .type = RTE_FLOW_ITEM_TYPE_UDP,
195                 .rss_types = ETH_RSS_NONFRAG_IPV4_UDP,
196         },
197         [MLX5_EXPANSION_OUTER_IPV4_TCP] = {
198                 .type = RTE_FLOW_ITEM_TYPE_TCP,
199                 .rss_types = ETH_RSS_NONFRAG_IPV4_TCP,
200         },
201         [MLX5_EXPANSION_OUTER_IPV6] = {
202                 .next = RTE_FLOW_EXPAND_RSS_NEXT
203                         (MLX5_EXPANSION_OUTER_IPV6_UDP,
204                          MLX5_EXPANSION_OUTER_IPV6_TCP),
205                 .type = RTE_FLOW_ITEM_TYPE_IPV6,
206                 .rss_types = ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 |
207                         ETH_RSS_NONFRAG_IPV6_OTHER,
208         },
209         [MLX5_EXPANSION_OUTER_IPV6_UDP] = {
210                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN,
211                                                  MLX5_EXPANSION_VXLAN_GPE),
212                 .type = RTE_FLOW_ITEM_TYPE_UDP,
213                 .rss_types = ETH_RSS_NONFRAG_IPV6_UDP,
214         },
215         [MLX5_EXPANSION_OUTER_IPV6_TCP] = {
216                 .type = RTE_FLOW_ITEM_TYPE_TCP,
217                 .rss_types = ETH_RSS_NONFRAG_IPV6_TCP,
218         },
219         [MLX5_EXPANSION_VXLAN] = {
220                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH),
221                 .type = RTE_FLOW_ITEM_TYPE_VXLAN,
222         },
223         [MLX5_EXPANSION_VXLAN_GPE] = {
224                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
225                                                  MLX5_EXPANSION_IPV4,
226                                                  MLX5_EXPANSION_IPV6),
227                 .type = RTE_FLOW_ITEM_TYPE_VXLAN_GPE,
228         },
229         [MLX5_EXPANSION_GRE] = {
230                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4),
231                 .type = RTE_FLOW_ITEM_TYPE_GRE,
232         },
233         [MLX5_EXPANSION_MPLS] = {
234                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
235                                                  MLX5_EXPANSION_IPV6),
236                 .type = RTE_FLOW_ITEM_TYPE_MPLS,
237         },
238         [MLX5_EXPANSION_ETH] = {
239                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
240                                                  MLX5_EXPANSION_IPV6),
241                 .type = RTE_FLOW_ITEM_TYPE_ETH,
242         },
243         [MLX5_EXPANSION_ETH_VLAN] = {
244                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VLAN),
245                 .type = RTE_FLOW_ITEM_TYPE_ETH,
246         },
247         [MLX5_EXPANSION_VLAN] = {
248                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
249                                                  MLX5_EXPANSION_IPV6),
250                 .type = RTE_FLOW_ITEM_TYPE_VLAN,
251         },
252         [MLX5_EXPANSION_IPV4] = {
253                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4_UDP,
254                                                  MLX5_EXPANSION_IPV4_TCP),
255                 .type = RTE_FLOW_ITEM_TYPE_IPV4,
256                 .rss_types = ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
257                         ETH_RSS_NONFRAG_IPV4_OTHER,
258         },
259         [MLX5_EXPANSION_IPV4_UDP] = {
260                 .type = RTE_FLOW_ITEM_TYPE_UDP,
261                 .rss_types = ETH_RSS_NONFRAG_IPV4_UDP,
262         },
263         [MLX5_EXPANSION_IPV4_TCP] = {
264                 .type = RTE_FLOW_ITEM_TYPE_TCP,
265                 .rss_types = ETH_RSS_NONFRAG_IPV4_TCP,
266         },
267         [MLX5_EXPANSION_IPV6] = {
268                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV6_UDP,
269                                                  MLX5_EXPANSION_IPV6_TCP),
270                 .type = RTE_FLOW_ITEM_TYPE_IPV6,
271                 .rss_types = ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 |
272                         ETH_RSS_NONFRAG_IPV6_OTHER,
273         },
274         [MLX5_EXPANSION_IPV6_UDP] = {
275                 .type = RTE_FLOW_ITEM_TYPE_UDP,
276                 .rss_types = ETH_RSS_NONFRAG_IPV6_UDP,
277         },
278         [MLX5_EXPANSION_IPV6_TCP] = {
279                 .type = RTE_FLOW_ITEM_TYPE_TCP,
280                 .rss_types = ETH_RSS_NONFRAG_IPV6_TCP,
281         },
282 };
283
284 /** Handles information leading to a drop fate. */
285 struct mlx5_flow_verbs {
286         LIST_ENTRY(mlx5_flow_verbs) next;
287         unsigned int size; /**< Size of the attribute. */
288         struct {
289                 struct ibv_flow_attr *attr;
290                 /**< Pointer to the Specification buffer. */
291                 uint8_t *specs; /**< Pointer to the specifications. */
292         };
293         struct ibv_flow *flow; /**< Verbs flow pointer. */
294         struct mlx5_hrxq *hrxq; /**< Hash Rx queue object. */
295         uint64_t hash_fields; /**< Verbs hash Rx queue hash fields. */
296 };
297
298 /** Device flow structure. */
299 struct mlx5_flow {
300         LIST_ENTRY(mlx5_flow) next;
301         struct rte_flow *flow; /**< Pointer to the main flow. */
302         union {
303                 struct mlx5_flow_verbs verbs; /**< Holds the verbs dev-flow. */
304         };
305 };
306
307 /* Counters information. */
308 struct mlx5_flow_counter {
309         LIST_ENTRY(mlx5_flow_counter) next; /**< Pointer to the next counter. */
310         uint32_t shared:1; /**< Share counter ID with other flow rules. */
311         uint32_t ref_cnt:31; /**< Reference counter. */
312         uint32_t id; /**< Counter ID. */
313         struct ibv_counter_set *cs; /**< Holds the counters for the rule. */
314         uint64_t hits; /**< Number of packets matched by the rule. */
315         uint64_t bytes; /**< Number of bytes matched by the rule. */
316 };
317
318 /* Flow structure. */
319 struct rte_flow {
320         TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
321         struct rte_flow_attr attributes; /**< User flow attribute. */
322         uint32_t layers;
323         /**< Bit-fields of present layers see MLX5_FLOW_LAYER_*. */
324         uint32_t modifier;
325         /**< Bit-fields of present modifier see MLX5_FLOW_MOD_*. */
326         uint32_t fate;
327         /**< Bit-fields of present fate see MLX5_FLOW_FATE_*. */
328         LIST_HEAD(verbs, mlx5_flow_verbs) verbs; /**< Verbs flows list. */
329         struct mlx5_flow_verbs *cur_verbs;
330         /**< Current Verbs flow structure being filled. */
331         struct mlx5_flow_counter *counter; /**< Holds Verbs flow counter. */
332         struct rte_flow_action_rss rss;/**< RSS context. */
333         uint8_t key[MLX5_RSS_HASH_KEY_LEN]; /**< RSS hash key. */
334         uint16_t (*queue)[]; /**< Destination queues to redirect traffic to. */
335         void *nl_flow; /**< Netlink flow buffer if relevant. */
336         LIST_HEAD(dev_flows, mlx5_flow) dev_flows;
337         /**< Device flows that are part of the flow. */
338 };
339
340 static const struct rte_flow_ops mlx5_flow_ops = {
341         .validate = mlx5_flow_validate,
342         .create = mlx5_flow_create,
343         .destroy = mlx5_flow_destroy,
344         .flush = mlx5_flow_flush,
345         .isolate = mlx5_flow_isolate,
346         .query = mlx5_flow_query,
347 };
348
349 /* Convert FDIR request to Generic flow. */
350 struct mlx5_fdir {
351         struct rte_flow_attr attr;
352         struct rte_flow_action actions[2];
353         struct rte_flow_item items[4];
354         struct rte_flow_item_eth l2;
355         struct rte_flow_item_eth l2_mask;
356         union {
357                 struct rte_flow_item_ipv4 ipv4;
358                 struct rte_flow_item_ipv6 ipv6;
359         } l3;
360         union {
361                 struct rte_flow_item_ipv4 ipv4;
362                 struct rte_flow_item_ipv6 ipv6;
363         } l3_mask;
364         union {
365                 struct rte_flow_item_udp udp;
366                 struct rte_flow_item_tcp tcp;
367         } l4;
368         union {
369                 struct rte_flow_item_udp udp;
370                 struct rte_flow_item_tcp tcp;
371         } l4_mask;
372         struct rte_flow_action_queue queue;
373 };
374
375 /* Verbs specification header. */
376 struct ibv_spec_header {
377         enum ibv_flow_spec_type type;
378         uint16_t size;
379 };
380
381 /*
382  * Number of sub priorities.
383  * For each kind of pattern matching i.e. L2, L3, L4 to have a correct
384  * matching on the NIC (firmware dependent) L4 most have the higher priority
385  * followed by L3 and ending with L2.
386  */
387 #define MLX5_PRIORITY_MAP_L2 2
388 #define MLX5_PRIORITY_MAP_L3 1
389 #define MLX5_PRIORITY_MAP_L4 0
390 #define MLX5_PRIORITY_MAP_MAX 3
391
392 /* Map of Verbs to Flow priority with 8 Verbs priorities. */
393 static const uint32_t priority_map_3[][MLX5_PRIORITY_MAP_MAX] = {
394         { 0, 1, 2 }, { 2, 3, 4 }, { 5, 6, 7 },
395 };
396
397 /* Map of Verbs to Flow priority with 16 Verbs priorities. */
398 static const uint32_t priority_map_5[][MLX5_PRIORITY_MAP_MAX] = {
399         { 0, 1, 2 }, { 3, 4, 5 }, { 6, 7, 8 },
400         { 9, 10, 11 }, { 12, 13, 14 },
401 };
402
403 /* Tunnel information. */
404 struct mlx5_flow_tunnel_info {
405         uint32_t tunnel; /**< Tunnel bit (see MLX5_FLOW_*). */
406         uint32_t ptype; /**< Tunnel Ptype (see RTE_PTYPE_*). */
407 };
408
409 static struct mlx5_flow_tunnel_info tunnels_info[] = {
410         {
411                 .tunnel = MLX5_FLOW_LAYER_VXLAN,
412                 .ptype = RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L4_UDP,
413         },
414         {
415                 .tunnel = MLX5_FLOW_LAYER_VXLAN_GPE,
416                 .ptype = RTE_PTYPE_TUNNEL_VXLAN_GPE | RTE_PTYPE_L4_UDP,
417         },
418         {
419                 .tunnel = MLX5_FLOW_LAYER_GRE,
420                 .ptype = RTE_PTYPE_TUNNEL_GRE,
421         },
422         {
423                 .tunnel = MLX5_FLOW_LAYER_MPLS | MLX5_FLOW_LAYER_OUTER_L4_UDP,
424                 .ptype = RTE_PTYPE_TUNNEL_MPLS_IN_GRE | RTE_PTYPE_L4_UDP,
425         },
426         {
427                 .tunnel = MLX5_FLOW_LAYER_MPLS,
428                 .ptype = RTE_PTYPE_TUNNEL_MPLS_IN_GRE,
429         },
430 };
431
432 /**
433  * Discover the maximum number of priority available.
434  *
435  * @param[in] dev
436  *   Pointer to Ethernet device.
437  *
438  * @return
439  *   number of supported flow priority on success, a negative errno
440  *   value otherwise and rte_errno is set.
441  */
442 int
443 mlx5_flow_discover_priorities(struct rte_eth_dev *dev)
444 {
445         struct {
446                 struct ibv_flow_attr attr;
447                 struct ibv_flow_spec_eth eth;
448                 struct ibv_flow_spec_action_drop drop;
449         } flow_attr = {
450                 .attr = {
451                         .num_of_specs = 2,
452                 },
453                 .eth = {
454                         .type = IBV_FLOW_SPEC_ETH,
455                         .size = sizeof(struct ibv_flow_spec_eth),
456                 },
457                 .drop = {
458                         .size = sizeof(struct ibv_flow_spec_action_drop),
459                         .type = IBV_FLOW_SPEC_ACTION_DROP,
460                 },
461         };
462         struct ibv_flow *flow;
463         struct mlx5_hrxq *drop = mlx5_hrxq_drop_new(dev);
464         uint16_t vprio[] = { 8, 16 };
465         int i;
466         int priority = 0;
467
468         if (!drop) {
469                 rte_errno = ENOTSUP;
470                 return -rte_errno;
471         }
472         for (i = 0; i != RTE_DIM(vprio); i++) {
473                 flow_attr.attr.priority = vprio[i] - 1;
474                 flow = mlx5_glue->create_flow(drop->qp, &flow_attr.attr);
475                 if (!flow)
476                         break;
477                 claim_zero(mlx5_glue->destroy_flow(flow));
478                 priority = vprio[i];
479         }
480         switch (priority) {
481         case 8:
482                 priority = RTE_DIM(priority_map_3);
483                 break;
484         case 16:
485                 priority = RTE_DIM(priority_map_5);
486                 break;
487         default:
488                 rte_errno = ENOTSUP;
489                 DRV_LOG(ERR,
490                         "port %u verbs maximum priority: %d expected 8/16",
491                         dev->data->port_id, vprio[i]);
492                 return -rte_errno;
493         }
494         mlx5_hrxq_drop_release(dev);
495         DRV_LOG(INFO, "port %u flow maximum priority: %d",
496                 dev->data->port_id, priority);
497         return priority;
498 }
499
500 /**
501  * Adjust flow priority.
502  *
503  * @param dev
504  *   Pointer to Ethernet device.
505  * @param flow
506  *   Pointer to an rte flow.
507  */
508 static void
509 mlx5_flow_adjust_priority(struct rte_eth_dev *dev, struct rte_flow *flow)
510 {
511         struct priv *priv = dev->data->dev_private;
512         uint32_t priority = flow->attributes.priority;
513         uint32_t subpriority = flow->cur_verbs->attr->priority;
514
515         switch (priv->config.flow_prio) {
516         case RTE_DIM(priority_map_3):
517                 priority = priority_map_3[priority][subpriority];
518                 break;
519         case RTE_DIM(priority_map_5):
520                 priority = priority_map_5[priority][subpriority];
521                 break;
522         }
523         flow->cur_verbs->attr->priority = priority;
524 }
525
526 /**
527  * Get a flow counter.
528  *
529  * @param[in] dev
530  *   Pointer to Ethernet device.
531  * @param[in] shared
532  *   Indicate if this counter is shared with other flows.
533  * @param[in] id
534  *   Counter identifier.
535  *
536  * @return
537  *   A pointer to the counter, NULL otherwise and rte_errno is set.
538  */
539 static struct mlx5_flow_counter *
540 mlx5_flow_counter_new(struct rte_eth_dev *dev, uint32_t shared, uint32_t id)
541 {
542         struct priv *priv = dev->data->dev_private;
543         struct mlx5_flow_counter *cnt;
544
545         LIST_FOREACH(cnt, &priv->flow_counters, next) {
546                 if (!cnt->shared || cnt->shared != shared)
547                         continue;
548                 if (cnt->id != id)
549                         continue;
550                 cnt->ref_cnt++;
551                 return cnt;
552         }
553 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
554
555         struct mlx5_flow_counter tmpl = {
556                 .shared = shared,
557                 .id = id,
558                 .cs = mlx5_glue->create_counter_set
559                         (priv->ctx,
560                          &(struct ibv_counter_set_init_attr){
561                                  .counter_set_id = id,
562                          }),
563                 .hits = 0,
564                 .bytes = 0,
565         };
566
567         if (!tmpl.cs) {
568                 rte_errno = errno;
569                 return NULL;
570         }
571         cnt = rte_calloc(__func__, 1, sizeof(*cnt), 0);
572         if (!cnt) {
573                 rte_errno = ENOMEM;
574                 return NULL;
575         }
576         *cnt = tmpl;
577         LIST_INSERT_HEAD(&priv->flow_counters, cnt, next);
578         return cnt;
579 #endif
580         rte_errno = ENOTSUP;
581         return NULL;
582 }
583
584 /**
585  * Release a flow counter.
586  *
587  * @param[in] counter
588  *   Pointer to the counter handler.
589  */
590 static void
591 mlx5_flow_counter_release(struct mlx5_flow_counter *counter)
592 {
593         if (--counter->ref_cnt == 0) {
594                 claim_zero(mlx5_glue->destroy_counter_set(counter->cs));
595                 LIST_REMOVE(counter, next);
596                 rte_free(counter);
597         }
598 }
599
600 /**
601  * Verify the @p attributes will be correctly understood by the NIC and store
602  * them in the @p flow if everything is correct.
603  *
604  * @param[in] dev
605  *   Pointer to Ethernet device structure.
606  * @param[in] attributes
607  *   Pointer to flow attributes
608  * @param[in, out] flow
609  *   Pointer to the rte_flow structure.
610  *
611  * @return
612  *   0 on success.
613  */
614 static int
615 mlx5_flow_attributes(struct rte_eth_dev *dev,
616                      const struct rte_flow_attr *attributes,
617                      struct rte_flow *flow)
618 {
619         struct priv *priv = dev->data->dev_private;
620         uint32_t priority_max = priv->config.flow_prio - 1;
621
622         flow->attributes = *attributes;
623         if (attributes->priority == MLX5_FLOW_PRIO_RSVD)
624                 flow->attributes.priority = priority_max;
625         return 0;
626 }
627
628 /**
629  * Verify the @p item specifications (spec, last, mask) are compatible with the
630  * NIC capabilities.
631  *
632  * @param[in] item
633  *   Item specification.
634  * @param[in] mask
635  *   @p item->mask or flow default bit-masks.
636  * @param[in] nic_mask
637  *   Bit-masks covering supported fields by the NIC to compare with user mask.
638  * @param[in] size
639  *   Bit-masks size in bytes.
640  * @param[out] error
641  *   Pointer to error structure.
642  *
643  * @return
644  *   0 on success, a negative errno value otherwise and rte_errno is set.
645  */
646 static int
647 mlx5_flow_item_acceptable(const struct rte_flow_item *item,
648                           const uint8_t *mask,
649                           const uint8_t *nic_mask,
650                           unsigned int size,
651                           struct rte_flow_error *error)
652 {
653         unsigned int i;
654
655         assert(nic_mask);
656         for (i = 0; i < size; ++i)
657                 if ((nic_mask[i] | mask[i]) != nic_mask[i])
658                         return rte_flow_error_set(error, ENOTSUP,
659                                                   RTE_FLOW_ERROR_TYPE_ITEM,
660                                                   item,
661                                                   "mask enables non supported"
662                                                   " bits");
663         if (!item->spec && (item->mask || item->last))
664                 return rte_flow_error_set(error, EINVAL,
665                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
666                                           "mask/last without a spec is not"
667                                           " supported");
668         if (item->spec && item->last) {
669                 uint8_t spec[size];
670                 uint8_t last[size];
671                 unsigned int i;
672                 int ret;
673
674                 for (i = 0; i < size; ++i) {
675                         spec[i] = ((const uint8_t *)item->spec)[i] & mask[i];
676                         last[i] = ((const uint8_t *)item->last)[i] & mask[i];
677                 }
678                 ret = memcmp(spec, last, size);
679                 if (ret != 0)
680                         return rte_flow_error_set(error, ENOTSUP,
681                                                   RTE_FLOW_ERROR_TYPE_ITEM,
682                                                   item,
683                                                   "range is not supported");
684         }
685         return 0;
686 }
687
688 /**
689  * Add a verbs item specification into @p flow.
690  *
691  * @param[in, out] flow
692  *   Pointer to flow structure.
693  * @param[in] src
694  *   Create specification.
695  * @param[in] size
696  *   Size in bytes of the specification to copy.
697  */
698 static void
699 mlx5_flow_spec_verbs_add(struct rte_flow *flow, void *src, unsigned int size)
700 {
701         struct mlx5_flow_verbs *verbs = flow->cur_verbs;
702
703         if (verbs->specs) {
704                 void *dst;
705
706                 dst = (void *)(verbs->specs + verbs->size);
707                 memcpy(dst, src, size);
708                 ++verbs->attr->num_of_specs;
709         }
710         verbs->size += size;
711 }
712
713 /**
714  * Adjust verbs hash fields according to the @p flow information.
715  *
716  * @param[in, out] flow.
717  *   Pointer to flow structure.
718  * @param[in] tunnel
719  *   1 when the hash field is for a tunnel item.
720  * @param[in] layer_types
721  *   ETH_RSS_* types.
722  * @param[in] hash_fields
723  *   Item hash fields.
724  */
725 static void
726 mlx5_flow_verbs_hashfields_adjust(struct rte_flow *flow,
727                                   int tunnel __rte_unused,
728                                   uint32_t layer_types, uint64_t hash_fields)
729 {
730 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
731         hash_fields |= (tunnel ? IBV_RX_HASH_INNER : 0);
732         if (flow->rss.level == 2 && !tunnel)
733                 hash_fields = 0;
734         else if (flow->rss.level < 2 && tunnel)
735                 hash_fields = 0;
736 #endif
737         if (!(flow->rss.types & layer_types))
738                 hash_fields = 0;
739         flow->cur_verbs->hash_fields |= hash_fields;
740 }
741
742 /**
743  * Convert the @p item into a Verbs specification after ensuring the NIC
744  * will understand and process it correctly.
745  * If the necessary size for the conversion is greater than the @p flow_size,
746  * nothing is written in @p flow, the validation is still performed.
747  *
748  * @param[in] item
749  *   Item specification.
750  * @param[in, out] flow
751  *   Pointer to flow structure.
752  * @param[in] flow_size
753  *   Size in bytes of the available space in @p flow, if too small, nothing is
754  *   written.
755  *
756  * @return
757  *   On success the number of bytes consumed/necessary, if the returned value
758  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
759  *   otherwise another call with this returned memory size should be done.
760  *   On error, a negative errno value is returned and rte_errno is set.
761  */
762 static int
763 mlx5_flow_item_eth(const struct rte_flow_item *item, struct rte_flow *flow,
764                    const size_t flow_size)
765 {
766         const struct rte_flow_item_eth *spec = item->spec;
767         const struct rte_flow_item_eth *mask = item->mask;
768         const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
769         const unsigned int size = sizeof(struct ibv_flow_spec_eth);
770         struct ibv_flow_spec_eth eth = {
771                 .type = IBV_FLOW_SPEC_ETH | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
772                 .size = size,
773         };
774
775         if (!mask)
776                 mask = &rte_flow_item_eth_mask;
777         flow->layers |= tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
778                 MLX5_FLOW_LAYER_OUTER_L2;
779         if (size > flow_size)
780                 return size;
781         if (spec) {
782                 unsigned int i;
783
784                 memcpy(&eth.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
785                 memcpy(&eth.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
786                 eth.val.ether_type = spec->type;
787                 memcpy(&eth.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
788                 memcpy(&eth.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
789                 eth.mask.ether_type = mask->type;
790                 /* Remove unwanted bits from values. */
791                 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
792                         eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
793                         eth.val.src_mac[i] &= eth.mask.src_mac[i];
794                 }
795                 eth.val.ether_type &= eth.mask.ether_type;
796         }
797         flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2;
798         mlx5_flow_spec_verbs_add(flow, &eth, size);
799         return size;
800 }
801
802 /**
803  * Update the VLAN tag in the Verbs Ethernet specification.
804  *
805  * @param[in, out] attr
806  *   Pointer to Verbs attributes structure.
807  * @param[in] eth
808  *   Verbs structure containing the VLAN information to copy.
809  */
810 static void
811 mlx5_flow_item_vlan_update(struct ibv_flow_attr *attr,
812                            struct ibv_flow_spec_eth *eth)
813 {
814         unsigned int i;
815         const enum ibv_flow_spec_type search = eth->type;
816         struct ibv_spec_header *hdr = (struct ibv_spec_header *)
817                 ((uint8_t *)attr + sizeof(struct ibv_flow_attr));
818
819         for (i = 0; i != attr->num_of_specs; ++i) {
820                 if (hdr->type == search) {
821                         struct ibv_flow_spec_eth *e =
822                                 (struct ibv_flow_spec_eth *)hdr;
823
824                         e->val.vlan_tag = eth->val.vlan_tag;
825                         e->mask.vlan_tag = eth->mask.vlan_tag;
826                         e->val.ether_type = eth->val.ether_type;
827                         e->mask.ether_type = eth->mask.ether_type;
828                         break;
829                 }
830                 hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size);
831         }
832 }
833
834 /**
835  * Convert the @p item into @p flow (or by updating the already present
836  * Ethernet Verbs) specification after ensuring the NIC will understand and
837  * process it correctly.
838  * If the necessary size for the conversion is greater than the @p flow_size,
839  * nothing is written in @p flow, the validation is still performed.
840  *
841  * @param[in] item
842  *   Item specification.
843  * @param[in, out] flow
844  *   Pointer to flow structure.
845  * @param[in] flow_size
846  *   Size in bytes of the available space in @p flow, if too small, nothing is
847  *   written.
848  *
849  * @return
850  *   On success the number of bytes consumed/necessary, if the returned value
851  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
852  *   otherwise another call with this returned memory size should be done.
853  *   On error, a negative errno value is returned and rte_errno is set.
854  */
855 static int
856 mlx5_flow_item_vlan(const struct rte_flow_item *item, struct rte_flow *flow,
857                     const size_t flow_size)
858 {
859         const struct rte_flow_item_vlan *spec = item->spec;
860         const struct rte_flow_item_vlan *mask = item->mask;
861         unsigned int size = sizeof(struct ibv_flow_spec_eth);
862         const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
863         struct ibv_flow_spec_eth eth = {
864                 .type = IBV_FLOW_SPEC_ETH | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
865                 .size = size,
866         };
867         const uint32_t l2m = tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
868                 MLX5_FLOW_LAYER_OUTER_L2;
869
870         if (!mask)
871                 mask = &rte_flow_item_vlan_mask;
872         if (spec) {
873                 eth.val.vlan_tag = spec->tci;
874                 eth.mask.vlan_tag = mask->tci;
875                 eth.val.vlan_tag &= eth.mask.vlan_tag;
876                 eth.val.ether_type = spec->inner_type;
877                 eth.mask.ether_type = mask->inner_type;
878                 eth.val.ether_type &= eth.mask.ether_type;
879         }
880         if (!(flow->layers & l2m)) {
881                 if (size <= flow_size) {
882                         flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2;
883                         mlx5_flow_spec_verbs_add(flow, &eth, size);
884                 }
885         } else {
886                 if (flow->cur_verbs)
887                         mlx5_flow_item_vlan_update(flow->cur_verbs->attr,
888                                                    &eth);
889                 size = 0; /* Only an update is done in eth specification. */
890         }
891         flow->layers |= tunnel ?
892                 (MLX5_FLOW_LAYER_INNER_L2 | MLX5_FLOW_LAYER_INNER_VLAN) :
893                 (MLX5_FLOW_LAYER_OUTER_L2 | MLX5_FLOW_LAYER_OUTER_VLAN);
894         return size;
895 }
896
897 /**
898  * Convert the @p item into a Verbs specification after ensuring the NIC
899  * will understand and process it correctly.
900  * If the necessary size for the conversion is greater than the @p flow_size,
901  * nothing is written in @p flow, the validation is still performed.
902  *
903  * @param[in] item
904  *   Item specification.
905  * @param[in, out] flow
906  *   Pointer to flow structure.
907  * @param[in] flow_size
908  *   Size in bytes of the available space in @p flow, if too small, nothing is
909  *   written.
910  *
911  * @return
912  *   On success the number of bytes consumed/necessary, if the returned value
913  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
914  *   otherwise another call with this returned memory size should be done.
915  */
916 static int
917 mlx5_flow_item_ipv4(const struct rte_flow_item *item, struct rte_flow *flow,
918                     const size_t flow_size)
919 {
920         const struct rte_flow_item_ipv4 *spec = item->spec;
921         const struct rte_flow_item_ipv4 *mask = item->mask;
922         const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
923         unsigned int size = sizeof(struct ibv_flow_spec_ipv4_ext);
924         struct ibv_flow_spec_ipv4_ext ipv4 = {
925                 .type = IBV_FLOW_SPEC_IPV4_EXT |
926                         (tunnel ? IBV_FLOW_SPEC_INNER : 0),
927                 .size = size,
928         };
929
930         if (!mask)
931                 mask = &rte_flow_item_ipv4_mask;
932         flow->layers |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
933                 MLX5_FLOW_LAYER_OUTER_L3_IPV4;
934         if (spec) {
935                 ipv4.val = (struct ibv_flow_ipv4_ext_filter){
936                         .src_ip = spec->hdr.src_addr,
937                         .dst_ip = spec->hdr.dst_addr,
938                         .proto = spec->hdr.next_proto_id,
939                         .tos = spec->hdr.type_of_service,
940                 };
941                 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
942                         .src_ip = mask->hdr.src_addr,
943                         .dst_ip = mask->hdr.dst_addr,
944                         .proto = mask->hdr.next_proto_id,
945                         .tos = mask->hdr.type_of_service,
946                 };
947                 /* Remove unwanted bits from values. */
948                 ipv4.val.src_ip &= ipv4.mask.src_ip;
949                 ipv4.val.dst_ip &= ipv4.mask.dst_ip;
950                 ipv4.val.proto &= ipv4.mask.proto;
951                 ipv4.val.tos &= ipv4.mask.tos;
952         }
953         if (size <= flow_size) {
954                 mlx5_flow_verbs_hashfields_adjust
955                         (flow, tunnel,
956                          (ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
957                           ETH_RSS_NONFRAG_IPV4_TCP |
958                           ETH_RSS_NONFRAG_IPV4_UDP |
959                           ETH_RSS_NONFRAG_IPV4_OTHER),
960                          (IBV_RX_HASH_SRC_IPV4 | IBV_RX_HASH_DST_IPV4));
961                 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L3;
962                 mlx5_flow_spec_verbs_add(flow, &ipv4, size);
963         }
964         return size;
965 }
966
967 /**
968  * Convert the @p item into a Verbs specification after ensuring the NIC
969  * will understand and process it correctly.
970  * If the necessary size for the conversion is greater than the @p flow_size,
971  * nothing is written in @p flow, the validation is still performed.
972  *
973  * @param[in] item
974  *   Item specification.
975  * @param[in, out] flow
976  *   Pointer to flow structure.
977  * @param[in] flow_size
978  *   Size in bytes of the available space in @p flow, if too small, nothing is
979  *   written.
980  *
981  * @return
982  *   On success the number of bytes consumed/necessary, if the returned value
983  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
984  *   otherwise another call with this returned memory size should be done.
985  */
986 static int
987 mlx5_flow_item_ipv6(const struct rte_flow_item *item, struct rte_flow *flow,
988                     const size_t flow_size)
989 {
990         const struct rte_flow_item_ipv6 *spec = item->spec;
991         const struct rte_flow_item_ipv6 *mask = item->mask;
992         const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
993         unsigned int size = sizeof(struct ibv_flow_spec_ipv6);
994         struct ibv_flow_spec_ipv6 ipv6 = {
995                 .type = IBV_FLOW_SPEC_IPV6 | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
996                 .size = size,
997         };
998
999         if (!mask)
1000                 mask = &rte_flow_item_ipv6_mask;
1001         flow->layers |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
1002                 MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1003         if (spec) {
1004                 unsigned int i;
1005                 uint32_t vtc_flow_val;
1006                 uint32_t vtc_flow_mask;
1007
1008                 memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
1009                        RTE_DIM(ipv6.val.src_ip));
1010                 memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
1011                        RTE_DIM(ipv6.val.dst_ip));
1012                 memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
1013                        RTE_DIM(ipv6.mask.src_ip));
1014                 memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
1015                        RTE_DIM(ipv6.mask.dst_ip));
1016                 vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
1017                 vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
1018                 ipv6.val.flow_label =
1019                         rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >>
1020                                          IPV6_HDR_FL_SHIFT);
1021                 ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >>
1022                                          IPV6_HDR_TC_SHIFT;
1023                 ipv6.val.next_hdr = spec->hdr.proto;
1024                 ipv6.val.hop_limit = spec->hdr.hop_limits;
1025                 ipv6.mask.flow_label =
1026                         rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >>
1027                                          IPV6_HDR_FL_SHIFT);
1028                 ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >>
1029                                           IPV6_HDR_TC_SHIFT;
1030                 ipv6.mask.next_hdr = mask->hdr.proto;
1031                 ipv6.mask.hop_limit = mask->hdr.hop_limits;
1032                 /* Remove unwanted bits from values. */
1033                 for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
1034                         ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
1035                         ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
1036                 }
1037                 ipv6.val.flow_label &= ipv6.mask.flow_label;
1038                 ipv6.val.traffic_class &= ipv6.mask.traffic_class;
1039                 ipv6.val.next_hdr &= ipv6.mask.next_hdr;
1040                 ipv6.val.hop_limit &= ipv6.mask.hop_limit;
1041         }
1042         if (size <= flow_size) {
1043                 mlx5_flow_verbs_hashfields_adjust
1044                         (flow, tunnel,
1045                          (ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 |
1046                           ETH_RSS_NONFRAG_IPV6_TCP | ETH_RSS_NONFRAG_IPV6_UDP |
1047                           ETH_RSS_NONFRAG_IPV6_OTHER | ETH_RSS_IPV6_EX |
1048                           ETH_RSS_IPV6_TCP_EX | ETH_RSS_IPV6_UDP_EX),
1049                          (IBV_RX_HASH_SRC_IPV6 | IBV_RX_HASH_DST_IPV6));
1050                 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L3;
1051                 mlx5_flow_spec_verbs_add(flow, &ipv6, size);
1052         }
1053         return size;
1054 }
1055
1056 /**
1057  * Convert the @p item into a Verbs specification after ensuring the NIC
1058  * will understand and process it correctly.
1059  * If the necessary size for the conversion is greater than the @p flow_size,
1060  * nothing is written in @p flow, the validation is still performed.
1061  *
1062  * @param[in] item
1063  *   Item specification.
1064  * @param[in, out] flow
1065  *   Pointer to flow structure.
1066  * @param[in] flow_size
1067  *   Size in bytes of the available space in @p flow, if too small, nothing is
1068  *   written.
1069  *
1070  * @return
1071  *   On success the number of bytes consumed/necessary, if the returned value
1072  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
1073  *   otherwise another call with this returned memory size should be done.
1074  */
1075 static int
1076 mlx5_flow_item_udp(const struct rte_flow_item *item, struct rte_flow *flow,
1077                    const size_t flow_size)
1078 {
1079         const struct rte_flow_item_udp *spec = item->spec;
1080         const struct rte_flow_item_udp *mask = item->mask;
1081         const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
1082         unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
1083         struct ibv_flow_spec_tcp_udp udp = {
1084                 .type = IBV_FLOW_SPEC_UDP | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
1085                 .size = size,
1086         };
1087
1088         if (!mask)
1089                 mask = &rte_flow_item_udp_mask;
1090         flow->layers |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP :
1091                 MLX5_FLOW_LAYER_OUTER_L4_UDP;
1092         if (spec) {
1093                 udp.val.dst_port = spec->hdr.dst_port;
1094                 udp.val.src_port = spec->hdr.src_port;
1095                 udp.mask.dst_port = mask->hdr.dst_port;
1096                 udp.mask.src_port = mask->hdr.src_port;
1097                 /* Remove unwanted bits from values. */
1098                 udp.val.src_port &= udp.mask.src_port;
1099                 udp.val.dst_port &= udp.mask.dst_port;
1100         }
1101         if (size <= flow_size) {
1102                 mlx5_flow_verbs_hashfields_adjust(flow, tunnel, ETH_RSS_UDP,
1103                                                   (IBV_RX_HASH_SRC_PORT_UDP |
1104                                                    IBV_RX_HASH_DST_PORT_UDP));
1105                 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L4;
1106                 mlx5_flow_spec_verbs_add(flow, &udp, size);
1107         }
1108         return size;
1109 }
1110
1111 /**
1112  * Convert the @p item into a Verbs specification after ensuring the NIC
1113  * will understand and process it correctly.
1114  * If the necessary size for the conversion is greater than the @p flow_size,
1115  * nothing is written in @p flow, the validation is still performed.
1116  *
1117  * @param[in] item
1118  *   Item specification.
1119  * @param[in, out] flow
1120  *   Pointer to flow structure.
1121  * @param[in] flow_size
1122  *   Size in bytes of the available space in @p flow, if too small, nothing is
1123  *   written.
1124  * @param[out] error
1125  *   Pointer to error structure.
1126  *
1127  * @return
1128  *   On success the number of bytes consumed/necessary, if the returned value
1129  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
1130  *   otherwise another call with this returned memory size should be done.
1131  */
1132 static int
1133 mlx5_flow_item_tcp(const struct rte_flow_item *item, struct rte_flow *flow,
1134                    const size_t flow_size)
1135 {
1136         const struct rte_flow_item_tcp *spec = item->spec;
1137         const struct rte_flow_item_tcp *mask = item->mask;
1138         const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
1139         unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
1140         struct ibv_flow_spec_tcp_udp tcp = {
1141                 .type = IBV_FLOW_SPEC_TCP | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
1142                 .size = size,
1143         };
1144
1145         if (!mask)
1146                 mask = &rte_flow_item_tcp_mask;
1147         flow->layers |=  tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP :
1148                 MLX5_FLOW_LAYER_OUTER_L4_TCP;
1149         if (spec) {
1150                 tcp.val.dst_port = spec->hdr.dst_port;
1151                 tcp.val.src_port = spec->hdr.src_port;
1152                 tcp.mask.dst_port = mask->hdr.dst_port;
1153                 tcp.mask.src_port = mask->hdr.src_port;
1154                 /* Remove unwanted bits from values. */
1155                 tcp.val.src_port &= tcp.mask.src_port;
1156                 tcp.val.dst_port &= tcp.mask.dst_port;
1157         }
1158         if (size <= flow_size) {
1159                 mlx5_flow_verbs_hashfields_adjust(flow, tunnel, ETH_RSS_TCP,
1160                                                   (IBV_RX_HASH_SRC_PORT_TCP |
1161                                                    IBV_RX_HASH_DST_PORT_TCP));
1162                 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L4;
1163                 mlx5_flow_spec_verbs_add(flow, &tcp, size);
1164         }
1165         return size;
1166 }
1167
1168 /**
1169  * Convert the @p item into a Verbs specification after ensuring the NIC
1170  * will understand and process it correctly.
1171  * If the necessary size for the conversion is greater than the @p flow_size,
1172  * nothing is written in @p flow, the validation is still performed.
1173  *
1174  * @param[in] item
1175  *   Item specification.
1176  * @param[in, out] flow
1177  *   Pointer to flow structure.
1178  * @param[in] flow_size
1179  *   Size in bytes of the available space in @p flow, if too small, nothing is
1180  *   written.
1181  * @param[out] error
1182  *   Pointer to error structure.
1183  *
1184  * @return
1185  *   On success the number of bytes consumed/necessary, if the returned value
1186  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
1187  *   otherwise another call with this returned memory size should be done.
1188  */
1189 static int
1190 mlx5_flow_item_vxlan(const struct rte_flow_item *item, struct rte_flow *flow,
1191                      const size_t flow_size)
1192 {
1193         const struct rte_flow_item_vxlan *spec = item->spec;
1194         const struct rte_flow_item_vxlan *mask = item->mask;
1195         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1196         struct ibv_flow_spec_tunnel vxlan = {
1197                 .type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
1198                 .size = size,
1199         };
1200         union vni {
1201                 uint32_t vlan_id;
1202                 uint8_t vni[4];
1203         } id = { .vlan_id = 0, };
1204
1205         if (!mask)
1206                 mask = &rte_flow_item_vxlan_mask;
1207         if (spec) {
1208                 memcpy(&id.vni[1], spec->vni, 3);
1209                 vxlan.val.tunnel_id = id.vlan_id;
1210                 memcpy(&id.vni[1], mask->vni, 3);
1211                 vxlan.mask.tunnel_id = id.vlan_id;
1212                 /* Remove unwanted bits from values. */
1213                 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
1214         }
1215         if (size <= flow_size) {
1216                 mlx5_flow_spec_verbs_add(flow, &vxlan, size);
1217                 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2;
1218         }
1219         flow->layers |= MLX5_FLOW_LAYER_VXLAN;
1220         return size;
1221 }
1222
1223 /**
1224  * Convert the @p item into a Verbs specification after ensuring the NIC
1225  * will understand and process it correctly.
1226  * If the necessary size for the conversion is greater than the @p flow_size,
1227  * nothing is written in @p flow, the validation is still performed.
1228  *
1229  * @param[in] item
1230  *   Item specification.
1231  * @param[in, out] flow
1232  *   Pointer to flow structure.
1233  * @param[in] flow_size
1234  *   Size in bytes of the available space in @p flow, if too small, nothing is
1235  *   written.
1236  * @param[out] error
1237  *   Pointer to error structure.
1238  *
1239  * @return
1240  *   On success the number of bytes consumed/necessary, if the returned value
1241  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
1242  *   otherwise another call with this returned memory size should be done.
1243  */
1244 static int
1245 mlx5_flow_item_vxlan_gpe(const struct rte_flow_item *item,
1246                          struct rte_flow *flow, const size_t flow_size)
1247 {
1248         const struct rte_flow_item_vxlan_gpe *spec = item->spec;
1249         const struct rte_flow_item_vxlan_gpe *mask = item->mask;
1250         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1251         struct ibv_flow_spec_tunnel vxlan_gpe = {
1252                 .type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
1253                 .size = size,
1254         };
1255         union vni {
1256                 uint32_t vlan_id;
1257                 uint8_t vni[4];
1258         } id = { .vlan_id = 0, };
1259
1260         if (!mask)
1261                 mask = &rte_flow_item_vxlan_gpe_mask;
1262         if (spec) {
1263                 memcpy(&id.vni[1], spec->vni, 3);
1264                 vxlan_gpe.val.tunnel_id = id.vlan_id;
1265                 memcpy(&id.vni[1], mask->vni, 3);
1266                 vxlan_gpe.mask.tunnel_id = id.vlan_id;
1267                 /* Remove unwanted bits from values. */
1268                 vxlan_gpe.val.tunnel_id &= vxlan_gpe.mask.tunnel_id;
1269         }
1270         if (size <= flow_size) {
1271                 mlx5_flow_spec_verbs_add(flow, &vxlan_gpe, size);
1272                 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2;
1273         }
1274         flow->layers |= MLX5_FLOW_LAYER_VXLAN_GPE;
1275         return size;
1276 }
1277
1278 /**
1279  * Update the protocol in Verbs IPv4/IPv6 spec.
1280  *
1281  * @param[in, out] attr
1282  *   Pointer to Verbs attributes structure.
1283  * @param[in] search
1284  *   Specification type to search in order to update the IP protocol.
1285  * @param[in] protocol
1286  *   Protocol value to set if none is present in the specification.
1287  */
1288 static void
1289 mlx5_flow_item_gre_ip_protocol_update(struct ibv_flow_attr *attr,
1290                                       enum ibv_flow_spec_type search,
1291                                       uint8_t protocol)
1292 {
1293         unsigned int i;
1294         struct ibv_spec_header *hdr = (struct ibv_spec_header *)
1295                 ((uint8_t *)attr + sizeof(struct ibv_flow_attr));
1296
1297         if (!attr)
1298                 return;
1299         for (i = 0; i != attr->num_of_specs; ++i) {
1300                 if (hdr->type == search) {
1301                         union {
1302                                 struct ibv_flow_spec_ipv4_ext *ipv4;
1303                                 struct ibv_flow_spec_ipv6 *ipv6;
1304                         } ip;
1305
1306                         switch (search) {
1307                         case IBV_FLOW_SPEC_IPV4_EXT:
1308                                 ip.ipv4 = (struct ibv_flow_spec_ipv4_ext *)hdr;
1309                                 if (!ip.ipv4->val.proto) {
1310                                         ip.ipv4->val.proto = protocol;
1311                                         ip.ipv4->mask.proto = 0xff;
1312                                 }
1313                                 break;
1314                         case IBV_FLOW_SPEC_IPV6:
1315                                 ip.ipv6 = (struct ibv_flow_spec_ipv6 *)hdr;
1316                                 if (!ip.ipv6->val.next_hdr) {
1317                                         ip.ipv6->val.next_hdr = protocol;
1318                                         ip.ipv6->mask.next_hdr = 0xff;
1319                                 }
1320                                 break;
1321                         default:
1322                                 break;
1323                         }
1324                         break;
1325                 }
1326                 hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size);
1327         }
1328 }
1329
1330 /**
1331  * Convert the @p item into a Verbs specification after ensuring the NIC
1332  * will understand and process it correctly.
1333  * It will also update the previous L3 layer with the protocol value matching
1334  * the GRE.
1335  * If the necessary size for the conversion is greater than the @p flow_size,
1336  * nothing is written in @p flow, the validation is still performed.
1337  *
1338  * @param dev
1339  *   Pointer to Ethernet device.
1340  * @param[in] item
1341  *   Item specification.
1342  * @param[in, out] flow
1343  *   Pointer to flow structure.
1344  * @param[in] flow_size
1345  *   Size in bytes of the available space in @p flow, if too small, nothing is
1346  *   written.
1347  *
1348  * @return
1349  *   On success the number of bytes consumed/necessary, if the returned value
1350  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
1351  *   otherwise another call with this returned memory size should be done.
1352  */
1353 static int
1354 mlx5_flow_item_gre(const struct rte_flow_item *item __rte_unused,
1355                    struct rte_flow *flow, const size_t flow_size)
1356 {
1357         struct mlx5_flow_verbs *verbs = flow->cur_verbs;
1358 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
1359         const struct rte_flow_item_gre *spec = item->spec;
1360         const struct rte_flow_item_gre *mask = item->mask;
1361         unsigned int size = sizeof(struct ibv_flow_spec_gre);
1362         struct ibv_flow_spec_gre tunnel = {
1363                 .type = IBV_FLOW_SPEC_GRE,
1364                 .size = size,
1365         };
1366 #else
1367         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1368         struct ibv_flow_spec_tunnel tunnel = {
1369                 .type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
1370                 .size = size,
1371         };
1372 #endif
1373
1374 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
1375         if (!mask)
1376                 mask = &rte_flow_item_gre_mask;
1377         if (spec) {
1378                 tunnel.val.c_ks_res0_ver = spec->c_rsvd0_ver;
1379                 tunnel.val.protocol = spec->protocol;
1380                 tunnel.mask.c_ks_res0_ver = mask->c_rsvd0_ver;
1381                 tunnel.mask.protocol = mask->protocol;
1382                 /* Remove unwanted bits from values. */
1383                 tunnel.val.c_ks_res0_ver &= tunnel.mask.c_ks_res0_ver;
1384                 tunnel.val.protocol &= tunnel.mask.protocol;
1385                 tunnel.val.key &= tunnel.mask.key;
1386         }
1387 #else
1388 #endif /* !HAVE_IBV_DEVICE_MPLS_SUPPORT */
1389         if (size <= flow_size) {
1390                 if (flow->layers & MLX5_FLOW_LAYER_OUTER_L3_IPV4)
1391                         mlx5_flow_item_gre_ip_protocol_update
1392                                 (verbs->attr, IBV_FLOW_SPEC_IPV4_EXT,
1393                                  MLX5_IP_PROTOCOL_GRE);
1394                 else
1395                         mlx5_flow_item_gre_ip_protocol_update
1396                                 (verbs->attr, IBV_FLOW_SPEC_IPV6,
1397                                  MLX5_IP_PROTOCOL_GRE);
1398                 mlx5_flow_spec_verbs_add(flow, &tunnel, size);
1399                 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2;
1400         }
1401         flow->layers |= MLX5_FLOW_LAYER_GRE;
1402         return size;
1403 }
1404
1405 /**
1406  * Convert the @p item into a Verbs specification after ensuring the NIC
1407  * will understand and process it correctly.
1408  * If the necessary size for the conversion is greater than the @p flow_size,
1409  * nothing is written in @p flow, the validation is still performed.
1410  *
1411  * @param[in] item
1412  *   Item specification.
1413  * @param[in, out] flow
1414  *   Pointer to flow structure.
1415  * @param[in] flow_size
1416  *   Size in bytes of the available space in @p flow, if too small, nothing is
1417  *   written.
1418  * @param[out] error
1419  *   Pointer to error structure.
1420  *
1421  * @return
1422  *   On success the number of bytes consumed/necessary, if the returned value
1423  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
1424  *   otherwise another call with this returned memory size should be done.
1425  *   On error, a negative errno value is returned and rte_errno is set.
1426  */
1427 static int
1428 mlx5_flow_item_mpls(const struct rte_flow_item *item __rte_unused,
1429                     struct rte_flow *flow __rte_unused,
1430                     const size_t flow_size __rte_unused,
1431                     struct rte_flow_error *error)
1432 {
1433 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
1434         const struct rte_flow_item_mpls *spec = item->spec;
1435         const struct rte_flow_item_mpls *mask = item->mask;
1436         unsigned int size = sizeof(struct ibv_flow_spec_mpls);
1437         struct ibv_flow_spec_mpls mpls = {
1438                 .type = IBV_FLOW_SPEC_MPLS,
1439                 .size = size,
1440         };
1441
1442         if (!mask)
1443                 mask = &rte_flow_item_mpls_mask;
1444         if (spec) {
1445                 memcpy(&mpls.val.label, spec, sizeof(mpls.val.label));
1446                 memcpy(&mpls.mask.label, mask, sizeof(mpls.mask.label));
1447                 /* Remove unwanted bits from values.  */
1448                 mpls.val.label &= mpls.mask.label;
1449         }
1450         if (size <= flow_size) {
1451                 mlx5_flow_spec_verbs_add(flow, &mpls, size);
1452                 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2;
1453         }
1454         flow->layers |= MLX5_FLOW_LAYER_MPLS;
1455         return size;
1456 #endif /* !HAVE_IBV_DEVICE_MPLS_SUPPORT */
1457         return rte_flow_error_set(error, ENOTSUP,
1458                                   RTE_FLOW_ERROR_TYPE_ITEM,
1459                                   item,
1460                                   "MPLS is not supported by Verbs, please"
1461                                   " update.");
1462 }
1463
1464 /**
1465  * Convert the @p pattern into a Verbs specifications after ensuring the NIC
1466  * will understand and process it correctly.
1467  * The conversion is performed item per item, each of them is written into
1468  * the @p flow if its size is lesser or equal to @p flow_size.
1469  * Validation and memory consumption computation are still performed until the
1470  * end of @p pattern, unless an error is encountered.
1471  *
1472  * @param[in] pattern
1473  *   Flow pattern.
1474  * @param[in, out] flow
1475  *   Pointer to the rte_flow structure.
1476  * @param[in] flow_size
1477  *   Size in bytes of the available space in @p flow, if too small some
1478  *   garbage may be present.
1479  * @param[out] error
1480  *   Pointer to error structure.
1481  *
1482  * @return
1483  *   On success the number of bytes consumed/necessary, if the returned value
1484  *   is lesser or equal to @p flow_size, the @pattern  has fully been
1485  *   converted, otherwise another call with this returned memory size should
1486  *   be done.
1487  *   On error, a negative errno value is returned and rte_errno is set.
1488  */
1489 static int
1490 mlx5_flow_items(const struct rte_flow_item pattern[],
1491                 struct rte_flow *flow, const size_t flow_size,
1492                 struct rte_flow_error *error)
1493 {
1494         int remain = flow_size;
1495         size_t size = 0;
1496
1497         for (; pattern->type != RTE_FLOW_ITEM_TYPE_END; pattern++) {
1498                 int ret = 0;
1499
1500                 switch (pattern->type) {
1501                 case RTE_FLOW_ITEM_TYPE_VOID:
1502                         break;
1503                 case RTE_FLOW_ITEM_TYPE_ETH:
1504                         ret = mlx5_flow_item_eth(pattern, flow, remain);
1505                         break;
1506                 case RTE_FLOW_ITEM_TYPE_VLAN:
1507                         ret = mlx5_flow_item_vlan(pattern, flow, remain);
1508                         break;
1509                 case RTE_FLOW_ITEM_TYPE_IPV4:
1510                         ret = mlx5_flow_item_ipv4(pattern, flow, remain);
1511                         break;
1512                 case RTE_FLOW_ITEM_TYPE_IPV6:
1513                         ret = mlx5_flow_item_ipv6(pattern, flow, remain);
1514                         break;
1515                 case RTE_FLOW_ITEM_TYPE_UDP:
1516                         ret = mlx5_flow_item_udp(pattern, flow, remain);
1517                         break;
1518                 case RTE_FLOW_ITEM_TYPE_TCP:
1519                         ret = mlx5_flow_item_tcp(pattern, flow, remain);
1520                         break;
1521                 case RTE_FLOW_ITEM_TYPE_VXLAN:
1522                         ret = mlx5_flow_item_vxlan(pattern, flow, remain);
1523                         break;
1524                 case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
1525                         ret = mlx5_flow_item_vxlan_gpe(pattern, flow,
1526                                                        remain);
1527                         break;
1528                 case RTE_FLOW_ITEM_TYPE_GRE:
1529                         ret = mlx5_flow_item_gre(pattern, flow, remain);
1530                         break;
1531                 case RTE_FLOW_ITEM_TYPE_MPLS:
1532                         ret = mlx5_flow_item_mpls(pattern, flow, remain, error);
1533                         break;
1534                 default:
1535                         return rte_flow_error_set(error, ENOTSUP,
1536                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1537                                                   pattern,
1538                                                   "item not supported");
1539                 }
1540                 if (ret < 0)
1541                         return ret;
1542                 if (remain > ret)
1543                         remain -= ret;
1544                 else
1545                         remain = 0;
1546                 size += ret;
1547         }
1548         if (!flow->layers) {
1549                 const struct rte_flow_item item = {
1550                         .type = RTE_FLOW_ITEM_TYPE_ETH,
1551                 };
1552
1553                 return mlx5_flow_item_eth(&item, flow, flow_size);
1554         }
1555         return size;
1556 }
1557
1558 /**
1559  * Convert the @p action into a Verbs specification after ensuring the NIC
1560  * will understand and process it correctly.
1561  * If the necessary size for the conversion is greater than the @p flow_size,
1562  * nothing is written in @p flow, the validation is still performed.
1563  *
1564  * @param[in, out] flow
1565  *   Pointer to flow structure.
1566  * @param[in] flow_size
1567  *   Size in bytes of the available space in @p flow, if too small, nothing is
1568  *   written.
1569  *
1570  * @return
1571  *   On success the number of bytes consumed/necessary, if the returned value
1572  *   is lesser or equal to @p flow_size, the @p action has fully been
1573  *   converted, otherwise another call with this returned memory size should
1574  *   be done.
1575  *   On error, a negative errno value is returned and rte_errno is set.
1576  */
1577 static int
1578 mlx5_flow_action_drop(struct rte_flow *flow, const size_t flow_size)
1579 {
1580         unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
1581         struct ibv_flow_spec_action_drop drop = {
1582                         .type = IBV_FLOW_SPEC_ACTION_DROP,
1583                         .size = size,
1584         };
1585
1586         if (size < flow_size)
1587                 mlx5_flow_spec_verbs_add(flow, &drop, size);
1588         flow->fate |= MLX5_FLOW_FATE_DROP;
1589         return size;
1590 }
1591
1592 /**
1593  * Convert the @p action into @p flow after ensuring the NIC will understand
1594  * and process it correctly.
1595  *
1596  * @param[in] action
1597  *   Action configuration.
1598  * @param[in, out] flow
1599  *   Pointer to flow structure.
1600  *
1601  * @return
1602  *   0 on success, a negative errno value otherwise and rte_errno is set.
1603  */
1604 static int
1605 mlx5_flow_action_queue(const struct rte_flow_action *action,
1606                        struct rte_flow *flow)
1607 {
1608         const struct rte_flow_action_queue *queue = action->conf;
1609
1610         if (flow->queue)
1611                 (*flow->queue)[0] = queue->index;
1612         flow->rss.queue_num = 1;
1613         flow->fate |= MLX5_FLOW_FATE_QUEUE;
1614         return 0;
1615 }
1616
1617 /**
1618  * Ensure the @p action will be understood and used correctly by the  NIC.
1619  *
1620  * @param[in] action
1621  *   Action configuration.
1622  * @param flow[in, out]
1623  *   Pointer to the rte_flow structure.
1624  *
1625  * @return
1626  *   0 On success.
1627  */
1628 static int
1629 mlx5_flow_action_rss(const struct rte_flow_action *action,
1630                         struct rte_flow *flow)
1631 {
1632         const struct rte_flow_action_rss *rss = action->conf;
1633
1634         if (flow->queue)
1635                 memcpy((*flow->queue), rss->queue,
1636                        rss->queue_num * sizeof(uint16_t));
1637         flow->rss.queue_num = rss->queue_num;
1638         memcpy(flow->key, rss->key, MLX5_RSS_HASH_KEY_LEN);
1639         flow->rss.types = rss->types;
1640         flow->rss.level = rss->level;
1641         flow->fate |= MLX5_FLOW_FATE_RSS;
1642         return 0;
1643 }
1644
1645 /**
1646  * Convert the @p action into a Verbs specification after ensuring the NIC
1647  * will understand and process it correctly.
1648  * If the necessary size for the conversion is greater than the @p flow_size,
1649  * nothing is written in @p flow, the validation is still performed.
1650  *
1651  * @param[in, out] flow
1652  *   Pointer to flow structure.
1653  * @param[in] flow_size
1654  *   Size in bytes of the available space in @p flow, if too small, nothing is
1655  *   written.
1656  *
1657  * @return
1658  *   On success the number of bytes consumed/necessary, if the returned value
1659  *   is lesser or equal to @p flow_size, the @p action has fully been
1660  *   converted, otherwise another call with this returned memory size should
1661  *   be done.
1662  */
1663 static int
1664 mlx5_flow_action_flag(struct rte_flow *flow, const size_t flow_size)
1665 {
1666         unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1667         struct ibv_flow_spec_action_tag tag = {
1668                 .type = IBV_FLOW_SPEC_ACTION_TAG,
1669                 .size = size,
1670                 .tag_id = mlx5_flow_mark_set(MLX5_FLOW_MARK_DEFAULT),
1671         };
1672         struct mlx5_flow_verbs *verbs = flow->cur_verbs;
1673
1674         if (flow->modifier & MLX5_FLOW_MOD_MARK)
1675                 size = 0;
1676         else if (size <= flow_size && verbs)
1677                 mlx5_flow_spec_verbs_add(flow, &tag, size);
1678         flow->modifier |= MLX5_FLOW_MOD_FLAG;
1679         return size;
1680 }
1681
1682 /**
1683  * Update verbs specification to modify the flag to mark.
1684  *
1685  * @param[in, out] verbs
1686  *   Pointer to the mlx5_flow_verbs structure.
1687  * @param[in] mark_id
1688  *   Mark identifier to replace the flag.
1689  */
1690 static void
1691 mlx5_flow_verbs_mark_update(struct mlx5_flow_verbs *verbs, uint32_t mark_id)
1692 {
1693         struct ibv_spec_header *hdr;
1694         int i;
1695
1696         if (!verbs)
1697                 return;
1698         /* Update Verbs specification. */
1699         hdr = (struct ibv_spec_header *)verbs->specs;
1700         if (!hdr)
1701                 return;
1702         for (i = 0; i != verbs->attr->num_of_specs; ++i) {
1703                 if (hdr->type == IBV_FLOW_SPEC_ACTION_TAG) {
1704                         struct ibv_flow_spec_action_tag *t =
1705                                 (struct ibv_flow_spec_action_tag *)hdr;
1706
1707                         t->tag_id = mlx5_flow_mark_set(mark_id);
1708                 }
1709                 hdr = (struct ibv_spec_header *)((uintptr_t)hdr + hdr->size);
1710         }
1711 }
1712
1713 /**
1714  * Convert the @p action into @p flow (or by updating the already present
1715  * Flag Verbs specification) after ensuring the NIC will understand and
1716  * process it correctly.
1717  * If the necessary size for the conversion is greater than the @p flow_size,
1718  * nothing is written in @p flow, the validation is still performed.
1719  *
1720  * @param[in] action
1721  *   Action configuration.
1722  * @param[in, out] flow
1723  *   Pointer to flow structure.
1724  * @param[in] flow_size
1725  *   Size in bytes of the available space in @p flow, if too small, nothing is
1726  *   written.
1727  *
1728  * @return
1729  *   On success the number of bytes consumed/necessary, if the returned value
1730  *   is lesser or equal to @p flow_size, the @p action has fully been
1731  *   converted, otherwise another call with this returned memory size should
1732  *   be done.
1733  */
1734 static int
1735 mlx5_flow_action_mark(const struct rte_flow_action *action,
1736                       struct rte_flow *flow, const size_t flow_size)
1737 {
1738         const struct rte_flow_action_mark *mark = action->conf;
1739         unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1740         struct ibv_flow_spec_action_tag tag = {
1741                 .type = IBV_FLOW_SPEC_ACTION_TAG,
1742                 .size = size,
1743         };
1744         struct mlx5_flow_verbs *verbs = flow->cur_verbs;
1745
1746         if (flow->modifier & MLX5_FLOW_MOD_FLAG) {
1747                 mlx5_flow_verbs_mark_update(verbs, mark->id);
1748                 size = 0;
1749         } else if (size <= flow_size) {
1750                 tag.tag_id = mlx5_flow_mark_set(mark->id);
1751                 mlx5_flow_spec_verbs_add(flow, &tag, size);
1752         }
1753         flow->modifier |= MLX5_FLOW_MOD_MARK;
1754         return size;
1755 }
1756
1757 /**
1758  * Convert the @p action into a Verbs specification after ensuring the NIC
1759  * will understand and process it correctly.
1760  * If the necessary size for the conversion is greater than the @p flow_size,
1761  * nothing is written in @p flow, the validation is still performed.
1762  *
1763  * @param action[in]
1764  *   Action configuration.
1765  * @param flow[in, out]
1766  *   Pointer to flow structure.
1767  * @param flow_size[in]
1768  *   Size in bytes of the available space in @p flow, if too small, nothing is
1769  *   written.
1770  * @param error[int, out]
1771  *   Pointer to error structure.
1772  *
1773  * @return
1774  *   On success the number of bytes consumed/necessary, if the returned value
1775  *   is lesser or equal to @p flow_size, the @p action has fully been
1776  *   converted, otherwise another call with this returned memory size should
1777  *   be done.
1778  *   On error, a negative errno value is returned and rte_errno is set.
1779  */
1780 static int
1781 mlx5_flow_action_count(struct rte_eth_dev *dev,
1782                        const struct rte_flow_action *action,
1783                        struct rte_flow *flow,
1784                        const size_t flow_size __rte_unused,
1785                        struct rte_flow_error *error)
1786 {
1787         const struct rte_flow_action_count *count = action->conf;
1788 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
1789         unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
1790         struct ibv_flow_spec_counter_action counter = {
1791                 .type = IBV_FLOW_SPEC_ACTION_COUNT,
1792                 .size = size,
1793         };
1794 #endif
1795
1796         if (!flow->counter) {
1797                 flow->counter = mlx5_flow_counter_new(dev, count->shared,
1798                                                       count->id);
1799                 if (!flow->counter)
1800                         return rte_flow_error_set(error, ENOTSUP,
1801                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1802                                                   action,
1803                                                   "cannot get counter"
1804                                                   " context.");
1805         }
1806         flow->modifier |= MLX5_FLOW_MOD_COUNT;
1807 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
1808         counter.counter_set_handle = flow->counter->cs->handle;
1809         if (size <= flow_size)
1810                 mlx5_flow_spec_verbs_add(flow, &counter, size);
1811         return size;
1812 #endif
1813         return 0;
1814 }
1815
1816 /**
1817  * Convert the @p action into @p flow after ensuring the NIC will understand
1818  * and process it correctly.
1819  * The conversion is performed action per action, each of them is written into
1820  * the @p flow if its size is lesser or equal to @p flow_size.
1821  * Validation and memory consumption computation are still performed until the
1822  * end of @p action, unless an error is encountered.
1823  *
1824  * @param[in] dev
1825  *   Pointer to Ethernet device structure.
1826  * @param[in] actions
1827  *   Pointer to flow actions array.
1828  * @param[in, out] flow
1829  *   Pointer to the rte_flow structure.
1830  * @param[in] flow_size
1831  *   Size in bytes of the available space in @p flow, if too small some
1832  *   garbage may be present.
1833  * @param[out] error
1834  *   Pointer to error structure.
1835  *
1836  * @return
1837  *   On success the number of bytes consumed/necessary, if the returned value
1838  *   is lesser or equal to @p flow_size, the @p actions has fully been
1839  *   converted, otherwise another call with this returned memory size should
1840  *   be done.
1841  *   On error, a negative errno value is returned and rte_errno is set.
1842  */
1843 static int
1844 mlx5_flow_actions(struct rte_eth_dev *dev,
1845                   const struct rte_flow_action actions[],
1846                   struct rte_flow *flow, const size_t flow_size,
1847                   struct rte_flow_error *error)
1848 {
1849         size_t size = 0;
1850         int remain = flow_size;
1851         int ret = 0;
1852
1853         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1854                 switch (actions->type) {
1855                 case RTE_FLOW_ACTION_TYPE_VOID:
1856                         break;
1857                 case RTE_FLOW_ACTION_TYPE_FLAG:
1858                         ret = mlx5_flow_action_flag(flow, remain);
1859                         break;
1860                 case RTE_FLOW_ACTION_TYPE_MARK:
1861                         ret = mlx5_flow_action_mark(actions, flow, remain);
1862                         break;
1863                 case RTE_FLOW_ACTION_TYPE_DROP:
1864                         ret = mlx5_flow_action_drop(flow, remain);
1865                         break;
1866                 case RTE_FLOW_ACTION_TYPE_QUEUE:
1867                         ret = mlx5_flow_action_queue(actions, flow);
1868                         break;
1869                 case RTE_FLOW_ACTION_TYPE_RSS:
1870                         ret = mlx5_flow_action_rss(actions, flow);
1871                         break;
1872                 case RTE_FLOW_ACTION_TYPE_COUNT:
1873                         ret = mlx5_flow_action_count(dev, actions, flow, remain,
1874                                                      error);
1875                         break;
1876                 default:
1877                         return rte_flow_error_set(error, ENOTSUP,
1878                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1879                                                   actions,
1880                                                   "action not supported");
1881                 }
1882                 if (ret < 0)
1883                         return ret;
1884                 if (remain > ret)
1885                         remain -= ret;
1886                 else
1887                         remain = 0;
1888                 size += ret;
1889         }
1890         if (!flow->fate)
1891                 return rte_flow_error_set(error, ENOTSUP,
1892                                           RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1893                                           NULL,
1894                                           "no fate action found");
1895         return size;
1896 }
1897
1898 /**
1899  * Validate flow rule and fill flow structure accordingly.
1900  *
1901  * @param dev
1902  *   Pointer to Ethernet device.
1903  * @param[out] flow
1904  *   Pointer to flow structure.
1905  * @param flow_size
1906  *   Size of allocated space for @p flow.
1907  * @param[in] attr
1908  *   Flow rule attributes.
1909  * @param[in] pattern
1910  *   Pattern specification (list terminated by the END pattern item).
1911  * @param[in] actions
1912  *   Associated actions (list terminated by the END action).
1913  * @param[out] error
1914  *   Perform verbose error reporting if not NULL.
1915  *
1916  * @return
1917  *   A positive value representing the size of the flow object in bytes
1918  *   regardless of @p flow_size on success, a negative errno value otherwise
1919  *   and rte_errno is set.
1920  */
1921 static int
1922 mlx5_flow_merge_switch(struct rte_eth_dev *dev,
1923                        struct rte_flow *flow,
1924                        size_t flow_size,
1925                        const struct rte_flow_attr *attr,
1926                        const struct rte_flow_item pattern[],
1927                        const struct rte_flow_action actions[],
1928                        struct rte_flow_error *error)
1929 {
1930         unsigned int n = mlx5_dev_to_port_id(dev->device, NULL, 0);
1931         uint16_t port_id[!n + n];
1932         struct mlx5_nl_flow_ptoi ptoi[!n + n + 1];
1933         size_t off = RTE_ALIGN_CEIL(sizeof(*flow), alignof(max_align_t));
1934         unsigned int i;
1935         unsigned int own = 0;
1936         int ret;
1937
1938         /* At least one port is needed when no switch domain is present. */
1939         if (!n) {
1940                 n = 1;
1941                 port_id[0] = dev->data->port_id;
1942         } else {
1943                 n = RTE_MIN(mlx5_dev_to_port_id(dev->device, port_id, n), n);
1944         }
1945         for (i = 0; i != n; ++i) {
1946                 struct rte_eth_dev_info dev_info;
1947
1948                 rte_eth_dev_info_get(port_id[i], &dev_info);
1949                 if (port_id[i] == dev->data->port_id)
1950                         own = i;
1951                 ptoi[i].port_id = port_id[i];
1952                 ptoi[i].ifindex = dev_info.if_index;
1953         }
1954         /* Ensure first entry of ptoi[] is the current device. */
1955         if (own) {
1956                 ptoi[n] = ptoi[0];
1957                 ptoi[0] = ptoi[own];
1958                 ptoi[own] = ptoi[n];
1959         }
1960         /* An entry with zero ifindex terminates ptoi[]. */
1961         ptoi[n].port_id = 0;
1962         ptoi[n].ifindex = 0;
1963         if (flow_size < off)
1964                 flow_size = 0;
1965         ret = mlx5_nl_flow_transpose((uint8_t *)flow + off,
1966                                      flow_size ? flow_size - off : 0,
1967                                      ptoi, attr, pattern, actions, error);
1968         if (ret < 0)
1969                 return ret;
1970         if (flow_size) {
1971                 *flow = (struct rte_flow){
1972                         .attributes = *attr,
1973                         .nl_flow = (uint8_t *)flow + off,
1974                 };
1975                 /*
1976                  * Generate a reasonably unique handle based on the address
1977                  * of the target buffer.
1978                  *
1979                  * This is straightforward on 32-bit systems where the flow
1980                  * pointer can be used directly. Otherwise, its least
1981                  * significant part is taken after shifting it by the
1982                  * previous power of two of the pointed buffer size.
1983                  */
1984                 if (sizeof(flow) <= 4)
1985                         mlx5_nl_flow_brand(flow->nl_flow, (uintptr_t)flow);
1986                 else
1987                         mlx5_nl_flow_brand
1988                                 (flow->nl_flow,
1989                                  (uintptr_t)flow >>
1990                                  rte_log2_u32(rte_align32prevpow2(flow_size)));
1991         }
1992         return off + ret;
1993 }
1994
1995 static unsigned int
1996 mlx5_find_graph_root(const struct rte_flow_item pattern[], uint32_t rss_level)
1997 {
1998         const struct rte_flow_item *item;
1999         unsigned int has_vlan = 0;
2000
2001         for (item = pattern; item->type != RTE_FLOW_ITEM_TYPE_END; item++) {
2002                 if (item->type == RTE_FLOW_ITEM_TYPE_VLAN) {
2003                         has_vlan = 1;
2004                         break;
2005                 }
2006         }
2007         if (has_vlan)
2008                 return rss_level < 2 ? MLX5_EXPANSION_ROOT_ETH_VLAN :
2009                                        MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN;
2010         return rss_level < 2 ? MLX5_EXPANSION_ROOT :
2011                                MLX5_EXPANSION_ROOT_OUTER;
2012 }
2013
2014 /**
2015  * Convert the @p attributes, @p pattern, @p action, into an flow for the NIC
2016  * after ensuring the NIC will understand and process it correctly.
2017  * The conversion is only performed item/action per item/action, each of
2018  * them is written into the @p flow if its size is lesser or equal to @p
2019  * flow_size.
2020  * Validation and memory consumption computation are still performed until the
2021  * end, unless an error is encountered.
2022  *
2023  * @param[in] dev
2024  *   Pointer to Ethernet device.
2025  * @param[in, out] flow
2026  *   Pointer to flow structure.
2027  * @param[in] flow_size
2028  *   Size in bytes of the available space in @p flow, if too small some
2029  *   garbage may be present.
2030  * @param[in] attributes
2031  *   Flow rule attributes.
2032  * @param[in] pattern
2033  *   Pattern specification (list terminated by the END pattern item).
2034  * @param[in] actions
2035  *   Associated actions (list terminated by the END action).
2036  * @param[out] error
2037  *   Perform verbose error reporting if not NULL.
2038  *
2039  * @return
2040  *   On success the number of bytes consumed/necessary, if the returned value
2041  *   is lesser or equal to @p flow_size, the flow has fully been converted and
2042  *   can be applied, otherwise another call with this returned memory size
2043  *   should be done.
2044  *   On error, a negative errno value is returned and rte_errno is set.
2045  */
2046 static int
2047 mlx5_flow_merge(struct rte_eth_dev *dev, struct rte_flow *flow,
2048                 const size_t flow_size,
2049                 const struct rte_flow_attr *attributes,
2050                 const struct rte_flow_item pattern[],
2051                 const struct rte_flow_action actions[],
2052                 struct rte_flow_error *error)
2053 {
2054         struct rte_flow local_flow = { .layers = 0, };
2055         size_t size = sizeof(*flow);
2056         union {
2057                 struct rte_flow_expand_rss buf;
2058                 uint8_t buffer[2048];
2059         } expand_buffer;
2060         struct rte_flow_expand_rss *buf = &expand_buffer.buf;
2061         struct mlx5_flow_verbs *original_verbs = NULL;
2062         size_t original_verbs_size = 0;
2063         uint32_t original_layers = 0;
2064         int expanded_pattern_idx = 0;
2065         int ret = 0;
2066         uint32_t i;
2067
2068         if (attributes->transfer)
2069                 return mlx5_flow_merge_switch(dev, flow, flow_size,
2070                                               attributes, pattern,
2071                                               actions, error);
2072         if (size > flow_size)
2073                 flow = &local_flow;
2074         ret = mlx5_flow_attributes(dev->data->dev_private, attributes, flow);
2075         if (ret < 0)
2076                 return ret;
2077         ret = mlx5_flow_actions(dev, actions, &local_flow, 0, error);
2078         if (ret < 0)
2079                 return ret;
2080         if (local_flow.rss.types) {
2081                 unsigned int graph_root;
2082
2083                 graph_root = mlx5_find_graph_root(pattern,
2084                                                   local_flow.rss.level);
2085                 ret = rte_flow_expand_rss(buf, sizeof(expand_buffer.buffer),
2086                                           pattern, local_flow.rss.types,
2087                                           mlx5_support_expansion,
2088                                           graph_root);
2089                 assert(ret > 0 &&
2090                        (unsigned int)ret < sizeof(expand_buffer.buffer));
2091         } else {
2092                 buf->entries = 1;
2093                 buf->entry[0].pattern = (void *)(uintptr_t)pattern;
2094         }
2095         size += RTE_ALIGN_CEIL(local_flow.rss.queue_num * sizeof(uint16_t),
2096                                sizeof(void *));
2097         if (size <= flow_size)
2098                 flow->queue = (void *)(flow + 1);
2099         LIST_INIT(&flow->verbs);
2100         flow->layers = 0;
2101         flow->modifier = 0;
2102         flow->fate = 0;
2103         for (i = 0; i != buf->entries; ++i) {
2104                 size_t off = size;
2105                 size_t off2;
2106
2107                 flow->layers = original_layers;
2108                 size += sizeof(struct ibv_flow_attr) +
2109                         sizeof(struct mlx5_flow_verbs);
2110                 off2 = size;
2111                 if (size < flow_size) {
2112                         flow->cur_verbs = (void *)((uintptr_t)flow + off);
2113                         flow->cur_verbs->attr = (void *)(flow->cur_verbs + 1);
2114                         flow->cur_verbs->specs =
2115                                 (void *)(flow->cur_verbs->attr + 1);
2116                 }
2117                 /* First iteration convert the pattern into Verbs. */
2118                 if (i == 0) {
2119                         /* Actions don't need to be converted several time. */
2120                         ret = mlx5_flow_actions(dev, actions, flow,
2121                                                 (size < flow_size) ?
2122                                                 flow_size - size : 0,
2123                                                 error);
2124                         if (ret < 0)
2125                                 return ret;
2126                         size += ret;
2127                 } else {
2128                         /*
2129                          * Next iteration means the pattern has already been
2130                          * converted and an expansion is necessary to match
2131                          * the user RSS request.  For that only the expanded
2132                          * items will be converted, the common part with the
2133                          * user pattern are just copied into the next buffer
2134                          * zone.
2135                          */
2136                         size += original_verbs_size;
2137                         if (size < flow_size) {
2138                                 rte_memcpy(flow->cur_verbs->attr,
2139                                            original_verbs->attr,
2140                                            original_verbs_size +
2141                                            sizeof(struct ibv_flow_attr));
2142                                 flow->cur_verbs->size = original_verbs_size;
2143                         }
2144                 }
2145                 ret = mlx5_flow_items
2146                         ((const struct rte_flow_item *)
2147                          &buf->entry[i].pattern[expanded_pattern_idx],
2148                          flow,
2149                          (size < flow_size) ? flow_size - size : 0, error);
2150                 if (ret < 0)
2151                         return ret;
2152                 size += ret;
2153                 if (size <= flow_size) {
2154                         mlx5_flow_adjust_priority(dev, flow);
2155                         LIST_INSERT_HEAD(&flow->verbs, flow->cur_verbs, next);
2156                 }
2157                 /*
2158                  * Keep a pointer of the first verbs conversion and the layers
2159                  * it has encountered.
2160                  */
2161                 if (i == 0) {
2162                         original_verbs = flow->cur_verbs;
2163                         original_verbs_size = size - off2;
2164                         original_layers = flow->layers;
2165                         /*
2166                          * move the index of the expanded pattern to the
2167                          * first item not addressed yet.
2168                          */
2169                         if (pattern->type == RTE_FLOW_ITEM_TYPE_END) {
2170                                 expanded_pattern_idx++;
2171                         } else {
2172                                 const struct rte_flow_item *item = pattern;
2173
2174                                 for (item = pattern;
2175                                      item->type != RTE_FLOW_ITEM_TYPE_END;
2176                                      ++item)
2177                                         expanded_pattern_idx++;
2178                         }
2179                 }
2180         }
2181         /* Restore the origin layers in the flow. */
2182         flow->layers = original_layers;
2183         return size;
2184 }
2185
2186 /**
2187  * Lookup and set the ptype in the data Rx part.  A single Ptype can be used,
2188  * if several tunnel rules are used on this queue, the tunnel ptype will be
2189  * cleared.
2190  *
2191  * @param rxq_ctrl
2192  *   Rx queue to update.
2193  */
2194 static void
2195 mlx5_flow_rxq_tunnel_ptype_update(struct mlx5_rxq_ctrl *rxq_ctrl)
2196 {
2197         unsigned int i;
2198         uint32_t tunnel_ptype = 0;
2199
2200         /* Look up for the ptype to use. */
2201         for (i = 0; i != MLX5_FLOW_TUNNEL; ++i) {
2202                 if (!rxq_ctrl->flow_tunnels_n[i])
2203                         continue;
2204                 if (!tunnel_ptype) {
2205                         tunnel_ptype = tunnels_info[i].ptype;
2206                 } else {
2207                         tunnel_ptype = 0;
2208                         break;
2209                 }
2210         }
2211         rxq_ctrl->rxq.tunnel = tunnel_ptype;
2212 }
2213
2214 /**
2215  * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) according to the flow.
2216  *
2217  * @param[in] dev
2218  *   Pointer to Ethernet device.
2219  * @param[in] flow
2220  *   Pointer to flow structure.
2221  */
2222 static void
2223 mlx5_flow_rxq_flags_set(struct rte_eth_dev *dev, struct rte_flow *flow)
2224 {
2225         struct priv *priv = dev->data->dev_private;
2226         const int mark = !!(flow->modifier &
2227                             (MLX5_FLOW_MOD_FLAG | MLX5_FLOW_MOD_MARK));
2228         const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
2229         unsigned int i;
2230
2231         for (i = 0; i != flow->rss.queue_num; ++i) {
2232                 int idx = (*flow->queue)[i];
2233                 struct mlx5_rxq_ctrl *rxq_ctrl =
2234                         container_of((*priv->rxqs)[idx],
2235                                      struct mlx5_rxq_ctrl, rxq);
2236
2237                 if (mark) {
2238                         rxq_ctrl->rxq.mark = 1;
2239                         rxq_ctrl->flow_mark_n++;
2240                 }
2241                 if (tunnel) {
2242                         unsigned int j;
2243
2244                         /* Increase the counter matching the flow. */
2245                         for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) {
2246                                 if ((tunnels_info[j].tunnel & flow->layers) ==
2247                                     tunnels_info[j].tunnel) {
2248                                         rxq_ctrl->flow_tunnels_n[j]++;
2249                                         break;
2250                                 }
2251                         }
2252                         mlx5_flow_rxq_tunnel_ptype_update(rxq_ctrl);
2253                 }
2254         }
2255 }
2256
2257 /**
2258  * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the
2259  * @p flow if no other flow uses it with the same kind of request.
2260  *
2261  * @param dev
2262  *   Pointer to Ethernet device.
2263  * @param[in] flow
2264  *   Pointer to the flow.
2265  */
2266 static void
2267 mlx5_flow_rxq_flags_trim(struct rte_eth_dev *dev, struct rte_flow *flow)
2268 {
2269         struct priv *priv = dev->data->dev_private;
2270         const int mark = !!(flow->modifier &
2271                             (MLX5_FLOW_MOD_FLAG | MLX5_FLOW_MOD_MARK));
2272         const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
2273         unsigned int i;
2274
2275         assert(dev->data->dev_started);
2276         for (i = 0; i != flow->rss.queue_num; ++i) {
2277                 int idx = (*flow->queue)[i];
2278                 struct mlx5_rxq_ctrl *rxq_ctrl =
2279                         container_of((*priv->rxqs)[idx],
2280                                      struct mlx5_rxq_ctrl, rxq);
2281
2282                 if (mark) {
2283                         rxq_ctrl->flow_mark_n--;
2284                         rxq_ctrl->rxq.mark = !!rxq_ctrl->flow_mark_n;
2285                 }
2286                 if (tunnel) {
2287                         unsigned int j;
2288
2289                         /* Decrease the counter matching the flow. */
2290                         for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) {
2291                                 if ((tunnels_info[j].tunnel & flow->layers) ==
2292                                     tunnels_info[j].tunnel) {
2293                                         rxq_ctrl->flow_tunnels_n[j]--;
2294                                         break;
2295                                 }
2296                         }
2297                         mlx5_flow_rxq_tunnel_ptype_update(rxq_ctrl);
2298                 }
2299         }
2300 }
2301
2302 /**
2303  * Clear the Mark/Flag and Tunnel ptype information in all Rx queues.
2304  *
2305  * @param dev
2306  *   Pointer to Ethernet device.
2307  */
2308 static void
2309 mlx5_flow_rxq_flags_clear(struct rte_eth_dev *dev)
2310 {
2311         struct priv *priv = dev->data->dev_private;
2312         unsigned int i;
2313
2314         for (i = 0; i != priv->rxqs_n; ++i) {
2315                 struct mlx5_rxq_ctrl *rxq_ctrl;
2316                 unsigned int j;
2317
2318                 if (!(*priv->rxqs)[i])
2319                         continue;
2320                 rxq_ctrl = container_of((*priv->rxqs)[i],
2321                                         struct mlx5_rxq_ctrl, rxq);
2322                 rxq_ctrl->flow_mark_n = 0;
2323                 rxq_ctrl->rxq.mark = 0;
2324                 for (j = 0; j != MLX5_FLOW_TUNNEL; ++j)
2325                         rxq_ctrl->flow_tunnels_n[j] = 0;
2326                 rxq_ctrl->rxq.tunnel = 0;
2327         }
2328 }
2329
2330 /*
2331  * Validate the flag action.
2332  *
2333  * @param[in] action_flags
2334  *   Bit-fields that holds the actions detected until now.
2335  * @param[out] error
2336  *   Pointer to error structure.
2337  *
2338  * @return
2339  *   0 on success, a negative errno value otherwise and rte_errno is set.
2340  */
2341 static int
2342 mlx5_flow_validate_action_flag(uint64_t action_flags,
2343                                struct rte_flow_error *error)
2344 {
2345
2346         if (action_flags & MLX5_FLOW_ACTION_DROP)
2347                 return rte_flow_error_set(error, EINVAL,
2348                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
2349                                           "can't drop and flag in same flow");
2350         if (action_flags & MLX5_FLOW_ACTION_MARK)
2351                 return rte_flow_error_set(error, EINVAL,
2352                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
2353                                           "can't mark and flag in same flow");
2354         if (action_flags & MLX5_FLOW_ACTION_FLAG)
2355                 return rte_flow_error_set(error, EINVAL,
2356                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
2357                                           "can't have 2 flag"
2358                                           " actions in same flow");
2359         return 0;
2360 }
2361
2362 /*
2363  * Validate the mark action.
2364  *
2365  * @param[in] action
2366  *   Pointer to the queue action.
2367  * @param[in] action_flags
2368  *   Bit-fields that holds the actions detected until now.
2369  * @param[out] error
2370  *   Pointer to error structure.
2371  *
2372  * @return
2373  *   0 on success, a negative errno value otherwise and rte_errno is set.
2374  */
2375 static int
2376 mlx5_flow_validate_action_mark(const struct rte_flow_action *action,
2377                                uint64_t action_flags,
2378                                struct rte_flow_error *error)
2379 {
2380         const struct rte_flow_action_mark *mark = action->conf;
2381
2382         if (!mark)
2383                 return rte_flow_error_set(error, EINVAL,
2384                                           RTE_FLOW_ERROR_TYPE_ACTION,
2385                                           action,
2386                                           "configuration cannot be null");
2387         if (mark->id >= MLX5_FLOW_MARK_MAX)
2388                 return rte_flow_error_set(error, EINVAL,
2389                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
2390                                           &mark->id,
2391                                           "mark id must in 0 <= id < "
2392                                           RTE_STR(MLX5_FLOW_MARK_MAX));
2393         if (action_flags & MLX5_FLOW_ACTION_DROP)
2394                 return rte_flow_error_set(error, EINVAL,
2395                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
2396                                           "can't drop and mark in same flow");
2397         if (action_flags & MLX5_FLOW_ACTION_FLAG)
2398                 return rte_flow_error_set(error, EINVAL,
2399                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
2400                                           "can't flag and mark in same flow");
2401         if (action_flags & MLX5_FLOW_ACTION_MARK)
2402                 return rte_flow_error_set(error, EINVAL,
2403                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
2404                                           "can't have 2 mark actions in same"
2405                                           " flow");
2406         return 0;
2407 }
2408
2409 /*
2410  * Validate the drop action.
2411  *
2412  * @param[in] action_flags
2413  *   Bit-fields that holds the actions detected until now.
2414  * @param[out] error
2415  *   Pointer to error structure.
2416  *
2417  * @return
2418  *   0 on success, a negative errno value otherwise and rte_ernno is set.
2419  */
2420 static int
2421 mlx5_flow_validate_action_drop(uint64_t action_flags,
2422                                struct rte_flow_error *error)
2423 {
2424         if (action_flags & MLX5_FLOW_ACTION_FLAG)
2425                 return rte_flow_error_set(error, EINVAL,
2426                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
2427                                           "can't drop and flag in same flow");
2428         if (action_flags & MLX5_FLOW_ACTION_MARK)
2429                 return rte_flow_error_set(error, EINVAL,
2430                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
2431                                           "can't drop and mark in same flow");
2432         if (action_flags & MLX5_FLOW_FATE_ACTIONS)
2433                 return rte_flow_error_set(error, EINVAL,
2434                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
2435                                           "can't have 2 fate actions in"
2436                                           " same flow");
2437         return 0;
2438 }
2439
2440 /*
2441  * Validate the queue action.
2442  *
2443  * @param[in] action
2444  *   Pointer to the queue action.
2445  * @param[in] action_flags
2446  *   Bit-fields that holds the actions detected until now.
2447  * @param[in] dev
2448  *   Pointer to the Ethernet device structure.
2449  * @param[out] error
2450  *   Pointer to error structure.
2451  *
2452  * @return
2453  *   0 on success, a negative errno value otherwise and rte_ernno is set.
2454  */
2455 static int
2456 mlx5_flow_validate_action_queue(const struct rte_flow_action *action,
2457                                 uint64_t action_flags,
2458                                 struct rte_eth_dev *dev,
2459                                 struct rte_flow_error *error)
2460 {
2461         struct priv *priv = dev->data->dev_private;
2462         const struct rte_flow_action_queue *queue = action->conf;
2463
2464         if (action_flags & MLX5_FLOW_FATE_ACTIONS)
2465                 return rte_flow_error_set(error, EINVAL,
2466                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
2467                                           "can't have 2 fate actions in"
2468                                           " same flow");
2469         if (queue->index >= priv->rxqs_n)
2470                 return rte_flow_error_set(error, EINVAL,
2471                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
2472                                           &queue->index,
2473                                           "queue index out of range");
2474         if (!(*priv->rxqs)[queue->index])
2475                 return rte_flow_error_set(error, EINVAL,
2476                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
2477                                           &queue->index,
2478                                           "queue is not configured");
2479         return 0;
2480 }
2481
2482 /*
2483  * Validate the rss action.
2484  *
2485  * @param[in] action
2486  *   Pointer to the queue action.
2487  * @param[in] action_flags
2488  *   Bit-fields that holds the actions detected until now.
2489  * @param[in] dev
2490  *   Pointer to the Ethernet device structure.
2491  * @param[out] error
2492  *   Pointer to error structure.
2493  *
2494  * @return
2495  *   0 on success, a negative errno value otherwise and rte_ernno is set.
2496  */
2497 static int
2498 mlx5_flow_validate_action_rss(const struct rte_flow_action *action,
2499                               uint64_t action_flags,
2500                               struct rte_eth_dev *dev,
2501                               struct rte_flow_error *error)
2502 {
2503         struct priv *priv = dev->data->dev_private;
2504         const struct rte_flow_action_rss *rss = action->conf;
2505         unsigned int i;
2506
2507         if (action_flags & MLX5_FLOW_FATE_ACTIONS)
2508                 return rte_flow_error_set(error, EINVAL,
2509                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
2510                                           "can't have 2 fate actions"
2511                                           " in same flow");
2512         if (rss->func != RTE_ETH_HASH_FUNCTION_DEFAULT &&
2513             rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ)
2514                 return rte_flow_error_set(error, ENOTSUP,
2515                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
2516                                           &rss->func,
2517                                           "RSS hash function not supported");
2518 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
2519         if (rss->level > 2)
2520 #else
2521         if (rss->level > 1)
2522 #endif
2523                 return rte_flow_error_set(error, ENOTSUP,
2524                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
2525                                           &rss->level,
2526                                           "tunnel RSS is not supported");
2527         if (rss->key_len < MLX5_RSS_HASH_KEY_LEN)
2528                 return rte_flow_error_set(error, ENOTSUP,
2529                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
2530                                           &rss->key_len,
2531                                           "RSS hash key too small");
2532         if (rss->key_len > MLX5_RSS_HASH_KEY_LEN)
2533                 return rte_flow_error_set(error, ENOTSUP,
2534                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
2535                                           &rss->key_len,
2536                                           "RSS hash key too large");
2537         if (rss->queue_num > priv->config.ind_table_max_size)
2538                 return rte_flow_error_set(error, ENOTSUP,
2539                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
2540                                           &rss->queue_num,
2541                                           "number of queues too large");
2542         if (rss->types & MLX5_RSS_HF_MASK)
2543                 return rte_flow_error_set(error, ENOTSUP,
2544                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
2545                                           &rss->types,
2546                                           "some RSS protocols are not"
2547                                           " supported");
2548         for (i = 0; i != rss->queue_num; ++i) {
2549                 if (!(*priv->rxqs)[rss->queue[i]])
2550                         return rte_flow_error_set
2551                                 (error, EINVAL, RTE_FLOW_ERROR_TYPE_ACTION_CONF,
2552                                  &rss->queue[i], "queue is not configured");
2553         }
2554         return 0;
2555 }
2556
2557 /*
2558  * Validate the count action.
2559  *
2560  * @param[in] dev
2561  *   Pointer to the Ethernet device structure.
2562  * @param[out] error
2563  *   Pointer to error structure.
2564  *
2565  * @return
2566  *   0 on success, a negative errno value otherwise and rte_ernno is set.
2567  */
2568 static int
2569 mlx5_flow_validate_action_count(struct rte_eth_dev *dev,
2570                                 struct rte_flow_error *error)
2571 {
2572         struct priv *priv = dev->data->dev_private;
2573
2574         if (!priv->config.flow_counter_en)
2575                 return rte_flow_error_set(error, ENOTSUP,
2576                                           RTE_FLOW_ERROR_TYPE_ACTION, NULL,
2577                                           "flow counters are not supported.");
2578         return 0;
2579 }
2580
2581 /**
2582  * Verify the @p attributes will be correctly understood by the NIC and store
2583  * them in the @p flow if everything is correct.
2584  *
2585  * @param[in] dev
2586  *   Pointer to the Ethernet device structure.
2587  * @param[in] attributes
2588  *   Pointer to flow attributes
2589  * @param[out] error
2590  *   Pointer to error structure.
2591  *
2592  * @return
2593  *   0 on success, a negative errno value otherwise and rte_errno is set.
2594  */
2595 static int
2596 mlx5_flow_validate_attributes(struct rte_eth_dev *dev,
2597                               const struct rte_flow_attr *attributes,
2598                               struct rte_flow_error *error)
2599 {
2600         struct priv *priv = dev->data->dev_private;
2601         uint32_t priority_max = priv->config.flow_prio - 1;
2602
2603         if (attributes->group)
2604                 return rte_flow_error_set(error, ENOTSUP,
2605                                           RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
2606                                           NULL, "groups is not supported");
2607         if (attributes->priority != MLX5_FLOW_PRIO_RSVD &&
2608             attributes->priority >= priority_max)
2609                 return rte_flow_error_set(error, ENOTSUP,
2610                                           RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
2611                                           NULL, "priority out of range");
2612         if (attributes->egress)
2613                 return rte_flow_error_set(error, ENOTSUP,
2614                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS, NULL,
2615                                           "egress is not supported");
2616         if (attributes->transfer)
2617                 return rte_flow_error_set(error, ENOTSUP,
2618                                           RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
2619                                           NULL, "transfer is not supported");
2620         if (!attributes->ingress)
2621                 return rte_flow_error_set(error, EINVAL,
2622                                           RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
2623                                           NULL,
2624                                           "ingress attribute is mandatory");
2625         return 0;
2626 }
2627
2628 /**
2629  * Validate Ethernet item.
2630  *
2631  * @param[in] item
2632  *   Item specification.
2633  * @param[in] item_flags
2634  *   Bit-fields that holds the items detected until now.
2635  * @param[out] error
2636  *   Pointer to error structure.
2637  *
2638  * @return
2639  *   0 on success, a negative errno value otherwise and rte_errno is set.
2640  */
2641 static int
2642 mlx5_flow_validate_item_eth(const struct rte_flow_item *item,
2643                             uint64_t item_flags,
2644                             struct rte_flow_error *error)
2645 {
2646         const struct rte_flow_item_eth *mask = item->mask;
2647         const struct rte_flow_item_eth nic_mask = {
2648                 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
2649                 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
2650                 .type = RTE_BE16(0xffff),
2651         };
2652         int ret;
2653         int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2654
2655         if (item_flags & MLX5_FLOW_LAYER_OUTER_L2)
2656                 return rte_flow_error_set(error, ENOTSUP,
2657                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2658                                           "3 levels of l2 are not supported");
2659         if ((item_flags & MLX5_FLOW_LAYER_INNER_L2) && !tunnel)
2660                 return rte_flow_error_set(error, ENOTSUP,
2661                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2662                                           "2 L2 without tunnel are not supported");
2663         if (!mask)
2664                 mask = &rte_flow_item_eth_mask;
2665         ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
2666                                         (const uint8_t *)&nic_mask,
2667                                         sizeof(struct rte_flow_item_eth),
2668                                         error);
2669         return ret;
2670 }
2671
2672 /**
2673  * Validate VLAN item.
2674  *
2675  * @param[in] item
2676  *   Item specification.
2677  * @param[in] item_flags
2678  *   Bit-fields that holds the items detected until now.
2679  * @param[out] error
2680  *   Pointer to error structure.
2681  *
2682  * @return
2683  *   0 on success, a negative errno value otherwise and rte_errno is set.
2684  */
2685 static int
2686 mlx5_flow_validate_item_vlan(const struct rte_flow_item *item,
2687                              int64_t item_flags,
2688                              struct rte_flow_error *error)
2689 {
2690         const struct rte_flow_item_vlan *spec = item->spec;
2691         const struct rte_flow_item_vlan *mask = item->mask;
2692         const struct rte_flow_item_vlan nic_mask = {
2693                 .tci = RTE_BE16(0x0fff),
2694                 .inner_type = RTE_BE16(0xffff),
2695         };
2696         uint16_t vlan_tag = 0;
2697         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2698         int ret;
2699         const uint32_t l34m = tunnel ? (MLX5_FLOW_LAYER_INNER_L3 |
2700                                         MLX5_FLOW_LAYER_INNER_L4) :
2701                                        (MLX5_FLOW_LAYER_OUTER_L3 |
2702                                         MLX5_FLOW_LAYER_OUTER_L4);
2703         const uint32_t vlanm = tunnel ? MLX5_FLOW_LAYER_INNER_VLAN :
2704                                         MLX5_FLOW_LAYER_OUTER_VLAN;
2705
2706         if (item_flags & vlanm)
2707                 return rte_flow_error_set(error, EINVAL,
2708                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2709                                           "VLAN layer already configured");
2710         else if ((item_flags & l34m) != 0)
2711                 return rte_flow_error_set(error, EINVAL,
2712                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2713                                           "L2 layer cannot follow L3/L4 layer");
2714         if (!mask)
2715                 mask = &rte_flow_item_vlan_mask;
2716         ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
2717                                         (const uint8_t *)&nic_mask,
2718                                         sizeof(struct rte_flow_item_vlan),
2719                                         error);
2720         if (ret)
2721                 return ret;
2722         if (spec) {
2723                 vlan_tag = spec->tci;
2724                 vlan_tag &= mask->tci;
2725         }
2726         /*
2727          * From verbs perspective an empty VLAN is equivalent
2728          * to a packet without VLAN layer.
2729          */
2730         if (!vlan_tag)
2731                 return rte_flow_error_set(error, EINVAL,
2732                                           RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
2733                                           item->spec,
2734                                           "VLAN cannot be empty");
2735         return 0;
2736 }
2737
2738 /**
2739  * Validate IPV4 item.
2740  *
2741  * @param[in] item
2742  *   Item specification.
2743  * @param[in] item_flags
2744  *   Bit-fields that holds the items detected until now.
2745  * @param[out] error
2746  *   Pointer to error structure.
2747  *
2748  * @return
2749  *   0 on success, a negative errno value otherwise and rte_errno is set.
2750  */
2751 static int
2752 mlx5_flow_validate_item_ipv4(const struct rte_flow_item *item,
2753                              int64_t item_flags,
2754                              struct rte_flow_error *error)
2755 {
2756         const struct rte_flow_item_ipv4 *mask = item->mask;
2757         const struct rte_flow_item_ipv4 nic_mask = {
2758                 .hdr = {
2759                         .src_addr = RTE_BE32(0xffffffff),
2760                         .dst_addr = RTE_BE32(0xffffffff),
2761                         .type_of_service = 0xff,
2762                         .next_proto_id = 0xff,
2763                 },
2764         };
2765         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2766         int ret;
2767
2768         if (item_flags & (tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
2769                                    MLX5_FLOW_LAYER_OUTER_L3))
2770                 return rte_flow_error_set(error, ENOTSUP,
2771                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2772                                           "multiple L3 layers not supported");
2773         else if (item_flags & (tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2774                                         MLX5_FLOW_LAYER_OUTER_L4))
2775                 return rte_flow_error_set(error, EINVAL,
2776                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2777                                           "L3 cannot follow an L4 layer.");
2778         if (!mask)
2779                 mask = &rte_flow_item_ipv4_mask;
2780         ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
2781                                         (const uint8_t *)&nic_mask,
2782                                         sizeof(struct rte_flow_item_ipv4),
2783                                         error);
2784         if (ret < 0)
2785                 return ret;
2786         return 0;
2787 }
2788
2789 /**
2790  * Validate IPV6 item.
2791  *
2792  * @param[in] item
2793  *   Item specification.
2794  * @param[in] item_flags
2795  *   Bit-fields that holds the items detected until now.
2796  * @param[out] error
2797  *   Pointer to error structure.
2798  *
2799  * @return
2800  *   0 on success, a negative errno value otherwise and rte_errno is set.
2801  */
2802 static int
2803 mlx5_flow_validate_item_ipv6(const struct rte_flow_item *item,
2804                              uint64_t item_flags,
2805                              struct rte_flow_error *error)
2806 {
2807         const struct rte_flow_item_ipv6 *mask = item->mask;
2808         const struct rte_flow_item_ipv6 nic_mask = {
2809                 .hdr = {
2810                         .src_addr =
2811                                 "\xff\xff\xff\xff\xff\xff\xff\xff"
2812                                 "\xff\xff\xff\xff\xff\xff\xff\xff",
2813                         .dst_addr =
2814                                 "\xff\xff\xff\xff\xff\xff\xff\xff"
2815                                 "\xff\xff\xff\xff\xff\xff\xff\xff",
2816                         .vtc_flow = RTE_BE32(0xffffffff),
2817                         .proto = 0xff,
2818                         .hop_limits = 0xff,
2819                 },
2820         };
2821         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2822         int ret;
2823
2824         if (item_flags & (tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
2825                                    MLX5_FLOW_LAYER_OUTER_L3))
2826                 return rte_flow_error_set(error, ENOTSUP,
2827                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2828                                           "multiple L3 layers not supported");
2829         else if (item_flags & (tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2830                                         MLX5_FLOW_LAYER_OUTER_L4))
2831                 return rte_flow_error_set(error, EINVAL,
2832                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2833                                           "L3 cannot follow an L4 layer.");
2834         /*
2835          * IPv6 is not recognised by the NIC inside a GRE tunnel.
2836          * Such support has to be disabled as the rule will be
2837          * accepted.  Issue reproduced with Mellanox OFED 4.3-3.0.2.1 and
2838          * Mellanox OFED 4.4-1.0.0.0.
2839          */
2840         if (tunnel && item_flags & MLX5_FLOW_LAYER_GRE)
2841                 return rte_flow_error_set(error, ENOTSUP,
2842                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2843                                           "IPv6 inside a GRE tunnel is"
2844                                           " not recognised.");
2845         if (!mask)
2846                 mask = &rte_flow_item_ipv6_mask;
2847         ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
2848                                         (const uint8_t *)&nic_mask,
2849                                         sizeof(struct rte_flow_item_ipv6),
2850                                         error);
2851         if (ret < 0)
2852                 return ret;
2853         return 0;
2854 }
2855
2856 /**
2857  * Validate UDP item.
2858  *
2859  * @param[in] item
2860  *   Item specification.
2861  * @param[in] item_flags
2862  *   Bit-fields that holds the items detected until now.
2863  * @param[in] target_protocol
2864  *   The next protocol in the previous item.
2865  * @param[out] error
2866  *   Pointer to error structure.
2867  *
2868  * @return
2869  *   0 on success, a negative errno value otherwise and rte_errno is set.
2870  */
2871 static int
2872 mlx5_flow_validate_item_udp(const struct rte_flow_item *item,
2873                             uint64_t item_flags,
2874                             uint8_t target_protocol,
2875                             struct rte_flow_error *error)
2876 {
2877         const struct rte_flow_item_udp *mask = item->mask;
2878         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2879         int ret;
2880
2881         if (target_protocol != 0xff && target_protocol != MLX5_IP_PROTOCOL_UDP)
2882                 return rte_flow_error_set(error, EINVAL,
2883                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2884                                           "protocol filtering not compatible"
2885                                           " with UDP layer");
2886         if (!(item_flags & (tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
2887                                      MLX5_FLOW_LAYER_OUTER_L3)))
2888                 return rte_flow_error_set(error, EINVAL,
2889                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2890                                           "L3 is mandatory to filter on L4");
2891         if (item_flags & (tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2892                                    MLX5_FLOW_LAYER_OUTER_L4))
2893                 return rte_flow_error_set(error, EINVAL,
2894                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2895                                           "L4 layer is already present");
2896         if (!mask)
2897                 mask = &rte_flow_item_udp_mask;
2898         ret = mlx5_flow_item_acceptable
2899                 (item, (const uint8_t *)mask,
2900                  (const uint8_t *)&rte_flow_item_udp_mask,
2901                  sizeof(struct rte_flow_item_udp), error);
2902         if (ret < 0)
2903                 return ret;
2904         return 0;
2905 }
2906
2907 /**
2908  * Validate TCP item.
2909  *
2910  * @param[in] item
2911  *   Item specification.
2912  * @param[in] item_flags
2913  *   Bit-fields that holds the items detected until now.
2914  * @param[in] target_protocol
2915  *   The next protocol in the previous item.
2916  * @param[out] error
2917  *   Pointer to error structure.
2918  *
2919  * @return
2920  *   0 on success, a negative errno value otherwise and rte_errno is set.
2921  */
2922 static int
2923 mlx5_flow_validate_item_tcp(const struct rte_flow_item *item,
2924                             uint64_t item_flags,
2925                             uint8_t target_protocol,
2926                             struct rte_flow_error *error)
2927 {
2928         const struct rte_flow_item_tcp *mask = item->mask;
2929         const int tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
2930         int ret;
2931
2932         if (target_protocol != 0xff && target_protocol != MLX5_IP_PROTOCOL_TCP)
2933                 return rte_flow_error_set(error, EINVAL,
2934                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2935                                           "protocol filtering not compatible"
2936                                           " with TCP layer");
2937         if (!(item_flags & (tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
2938                                      MLX5_FLOW_LAYER_OUTER_L3)))
2939                 return rte_flow_error_set(error, EINVAL,
2940                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2941                                           "L3 is mandatory to filter on L4");
2942         if (item_flags & (tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
2943                                    MLX5_FLOW_LAYER_OUTER_L4))
2944                 return rte_flow_error_set(error, EINVAL,
2945                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2946                                           "L4 layer is already present");
2947         if (!mask)
2948                 mask = &rte_flow_item_tcp_mask;
2949         ret = mlx5_flow_item_acceptable
2950                 (item, (const uint8_t *)mask,
2951                  (const uint8_t *)&rte_flow_item_tcp_mask,
2952                  sizeof(struct rte_flow_item_tcp), error);
2953         if (ret < 0)
2954                 return ret;
2955         return 0;
2956 }
2957
2958 /**
2959  * Validate VXLAN item.
2960  *
2961  * @param[in] item
2962  *   Item specification.
2963  * @param[in] item_flags
2964  *   Bit-fields that holds the items detected until now.
2965  * @param[in] target_protocol
2966  *   The next protocol in the previous item.
2967  * @param[out] error
2968  *   Pointer to error structure.
2969  *
2970  * @return
2971  *   0 on success, a negative errno value otherwise and rte_errno is set.
2972  */
2973 static int
2974 mlx5_flow_validate_item_vxlan(const struct rte_flow_item *item,
2975                               uint64_t item_flags,
2976                               struct rte_flow_error *error)
2977 {
2978         const struct rte_flow_item_vxlan *spec = item->spec;
2979         const struct rte_flow_item_vxlan *mask = item->mask;
2980         int ret;
2981         union vni {
2982                 uint32_t vlan_id;
2983                 uint8_t vni[4];
2984         } id = { .vlan_id = 0, };
2985         uint32_t vlan_id = 0;
2986
2987
2988         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
2989                 return rte_flow_error_set(error, ENOTSUP,
2990                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2991                                           "a tunnel is already present");
2992         /*
2993          * Verify only UDPv4 is present as defined in
2994          * https://tools.ietf.org/html/rfc7348
2995          */
2996         if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
2997                 return rte_flow_error_set(error, EINVAL,
2998                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
2999                                           "no outer UDP layer found");
3000         if (!mask)
3001                 mask = &rte_flow_item_vxlan_mask;
3002         ret = mlx5_flow_item_acceptable
3003                 (item, (const uint8_t *)mask,
3004                  (const uint8_t *)&rte_flow_item_vxlan_mask,
3005                  sizeof(struct rte_flow_item_vxlan),
3006                  error);
3007         if (ret < 0)
3008                 return ret;
3009         if (spec) {
3010                 memcpy(&id.vni[1], spec->vni, 3);
3011                 vlan_id = id.vlan_id;
3012                 memcpy(&id.vni[1], mask->vni, 3);
3013                 vlan_id &= id.vlan_id;
3014         }
3015         /*
3016          * Tunnel id 0 is equivalent as not adding a VXLAN layer, if
3017          * only this layer is defined in the Verbs specification it is
3018          * interpreted as wildcard and all packets will match this
3019          * rule, if it follows a full stack layer (ex: eth / ipv4 /
3020          * udp), all packets matching the layers before will also
3021          * match this rule.  To avoid such situation, VNI 0 is
3022          * currently refused.
3023          */
3024         if (!vlan_id)
3025                 return rte_flow_error_set(error, ENOTSUP,
3026                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3027                                           "VXLAN vni cannot be 0");
3028         if (!(item_flags & MLX5_FLOW_LAYER_OUTER))
3029                 return rte_flow_error_set(error, ENOTSUP,
3030                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3031                                           "VXLAN tunnel must be fully defined");
3032         return 0;
3033 }
3034
3035 /**
3036  * Validate VXLAN_GPE item.
3037  *
3038  * @param[in] item
3039  *   Item specification.
3040  * @param[in] item_flags
3041  *   Bit-fields that holds the items detected until now.
3042  * @param[in] priv
3043  *   Pointer to the private data structure.
3044  * @param[in] target_protocol
3045  *   The next protocol in the previous item.
3046  * @param[out] error
3047  *   Pointer to error structure.
3048  *
3049  * @return
3050  *   0 on success, a negative errno value otherwise and rte_errno is set.
3051  */
3052 static int
3053 mlx5_flow_validate_item_vxlan_gpe(const struct rte_flow_item *item,
3054                                   uint64_t item_flags,
3055                                   struct rte_eth_dev *dev,
3056                                   struct rte_flow_error *error)
3057 {
3058         struct priv *priv = dev->data->dev_private;
3059         const struct rte_flow_item_vxlan_gpe *spec = item->spec;
3060         const struct rte_flow_item_vxlan_gpe *mask = item->mask;
3061         int ret;
3062         union vni {
3063                 uint32_t vlan_id;
3064                 uint8_t vni[4];
3065         } id = { .vlan_id = 0, };
3066         uint32_t vlan_id = 0;
3067
3068         if (!priv->config.l3_vxlan_en)
3069                 return rte_flow_error_set(error, ENOTSUP,
3070                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3071                                           "L3 VXLAN is not enabled by device"
3072                                           " parameter and/or not configured in"
3073                                           " firmware");
3074         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
3075                 return rte_flow_error_set(error, ENOTSUP,
3076                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3077                                           "a tunnel is already present");
3078         /*
3079          * Verify only UDPv4 is present as defined in
3080          * https://tools.ietf.org/html/rfc7348
3081          */
3082         if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L4_UDP))
3083                 return rte_flow_error_set(error, EINVAL,
3084                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3085                                           "no outer UDP layer found");
3086         if (!mask)
3087                 mask = &rte_flow_item_vxlan_gpe_mask;
3088         ret = mlx5_flow_item_acceptable
3089                 (item, (const uint8_t *)mask,
3090                  (const uint8_t *)&rte_flow_item_vxlan_gpe_mask,
3091                  sizeof(struct rte_flow_item_vxlan_gpe),
3092                  error);
3093         if (ret < 0)
3094                 return ret;
3095         if (spec) {
3096                 if (spec->protocol)
3097                         return rte_flow_error_set(error, ENOTSUP,
3098                                                   RTE_FLOW_ERROR_TYPE_ITEM,
3099                                                   item,
3100                                                   "VxLAN-GPE protocol"
3101                                                   " not supported");
3102                 memcpy(&id.vni[1], spec->vni, 3);
3103                 vlan_id = id.vlan_id;
3104                 memcpy(&id.vni[1], mask->vni, 3);
3105                 vlan_id &= id.vlan_id;
3106         }
3107         /*
3108          * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
3109          * layer is defined in the Verbs specification it is interpreted as
3110          * wildcard and all packets will match this rule, if it follows a full
3111          * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
3112          * before will also match this rule.  To avoid such situation, VNI 0
3113          * is currently refused.
3114          */
3115         if (!vlan_id)
3116                 return rte_flow_error_set(error, ENOTSUP,
3117                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3118                                           "VXLAN-GPE vni cannot be 0");
3119         if (!(item_flags & MLX5_FLOW_LAYER_OUTER))
3120                 return rte_flow_error_set(error, ENOTSUP,
3121                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3122                                           "VXLAN-GPE tunnel must be fully"
3123                                           " defined");
3124         return 0;
3125 }
3126
3127 /**
3128  * Validate GRE item.
3129  *
3130  * @param[in] item
3131  *   Item specification.
3132  * @param[in] item_flags
3133  *   Bit flags to mark detected items.
3134  * @param[in] target_protocol
3135  *   The next protocol in the previous item.
3136  * @param[out] error
3137  *   Pointer to error structure.
3138  *
3139  * @return
3140  *   0 on success, a negative errno value otherwise and rte_errno is set.
3141  */
3142 static int
3143 mlx5_flow_validate_item_gre(const struct rte_flow_item *item,
3144                             uint64_t item_flags,
3145                             uint8_t target_protocol,
3146                             struct rte_flow_error *error)
3147 {
3148         const struct rte_flow_item_gre *spec __rte_unused = item->spec;
3149         const struct rte_flow_item_gre *mask = item->mask;
3150         int ret;
3151
3152         if (target_protocol != 0xff && target_protocol != MLX5_IP_PROTOCOL_GRE)
3153                 return rte_flow_error_set(error, EINVAL,
3154                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3155                                           "protocol filtering not compatible"
3156                                           " with this GRE layer");
3157         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
3158                 return rte_flow_error_set(error, ENOTSUP,
3159                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3160                                           "a tunnel is already present");
3161         if (!(item_flags & MLX5_FLOW_LAYER_OUTER_L3))
3162                 return rte_flow_error_set(error, ENOTSUP,
3163                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3164                                           "L3 Layer is missing");
3165         if (!mask)
3166                 mask = &rte_flow_item_gre_mask;
3167         ret = mlx5_flow_item_acceptable
3168                 (item, (const uint8_t *)mask,
3169                  (const uint8_t *)&rte_flow_item_gre_mask,
3170                  sizeof(struct rte_flow_item_gre), error);
3171         if (ret < 0)
3172                 return ret;
3173 #ifndef HAVE_IBV_DEVICE_MPLS_SUPPORT
3174         if (spec && (spec->protocol & mask->protocol))
3175                 return rte_flow_error_set(error, ENOTSUP,
3176                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3177                                           "without MPLS support the"
3178                                           " specification cannot be used for"
3179                                           " filtering");
3180 #endif
3181         return 0;
3182 }
3183
3184 /**
3185  * Validate MPLS item.
3186  *
3187  * @param[in] item
3188  *   Item specification.
3189  * @param[in] item_flags
3190  *   Bit-fields that holds the items detected until now.
3191  * @param[in] target_protocol
3192  *   The next protocol in the previous item.
3193  * @param[out] error
3194  *   Pointer to error structure.
3195  *
3196  * @return
3197  *   0 on success, a negative errno value otherwise and rte_errno is set.
3198  */
3199 static int
3200 mlx5_flow_validate_item_mpls(const struct rte_flow_item *item __rte_unused,
3201                              uint64_t item_flags __rte_unused,
3202                              uint8_t target_protocol __rte_unused,
3203                              struct rte_flow_error *error)
3204 {
3205 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
3206         const struct rte_flow_item_mpls *mask = item->mask;
3207         int ret;
3208
3209         if (target_protocol != 0xff && target_protocol != MLX5_IP_PROTOCOL_MPLS)
3210                 return rte_flow_error_set(error, EINVAL,
3211                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3212                                           "protocol filtering not compatible"
3213                                           " with MPLS layer");
3214         if (item_flags & MLX5_FLOW_LAYER_TUNNEL)
3215                 return rte_flow_error_set(error, ENOTSUP,
3216                                           RTE_FLOW_ERROR_TYPE_ITEM, item,
3217                                           "a tunnel is already"
3218                                           " present");
3219         if (!mask)
3220                 mask = &rte_flow_item_mpls_mask;
3221         ret = mlx5_flow_item_acceptable
3222                 (item, (const uint8_t *)mask,
3223                  (const uint8_t *)&rte_flow_item_mpls_mask,
3224                  sizeof(struct rte_flow_item_mpls), error);
3225         if (ret < 0)
3226                 return ret;
3227         return 0;
3228 #endif
3229         return rte_flow_error_set(error, ENOTSUP,
3230                                   RTE_FLOW_ERROR_TYPE_ITEM, item,
3231                                   "MPLS is not supported by Verbs, please"
3232                                   " update.");
3233 }
3234
3235 /**
3236  * Internal validation function.
3237  *
3238  * @param[in] dev
3239  *   Pointer to the Ethernet device structure.
3240  * @param[in] attr
3241  *   Pointer to the flow attributes.
3242  * @param[in] items
3243  *   Pointer to the list of items.
3244  * @param[in] actions
3245  *   Pointer to the list of actions.
3246  * @param[out] error
3247  *   Pointer to the error structure.
3248  *
3249  * @return
3250  *   0 on success, a negative errno value otherwise and rte_ernno is set.
3251  */
3252 static int mlx5_flow_verbs_validate(struct rte_eth_dev *dev,
3253                                     const struct rte_flow_attr *attr,
3254                                     const struct rte_flow_item items[],
3255                                     const struct rte_flow_action actions[],
3256                                     struct rte_flow_error *error)
3257 {
3258         int ret;
3259         uint32_t action_flags = 0;
3260         uint32_t item_flags = 0;
3261         int tunnel = 0;
3262         uint8_t next_protocol = 0xff;
3263
3264         if (items == NULL)
3265                 return -1;
3266         ret = mlx5_flow_validate_attributes(dev, attr, error);
3267         if (ret < 0)
3268                 return ret;
3269         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
3270                 int ret = 0;
3271                 switch (items->type) {
3272                 case RTE_FLOW_ITEM_TYPE_VOID:
3273                         break;
3274                 case RTE_FLOW_ITEM_TYPE_ETH:
3275                         ret = mlx5_flow_validate_item_eth(items, item_flags,
3276                                                           error);
3277                         if (ret < 0)
3278                                 return ret;
3279                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
3280                                                MLX5_FLOW_LAYER_OUTER_L2;
3281                         break;
3282                 case RTE_FLOW_ITEM_TYPE_VLAN:
3283                         ret = mlx5_flow_validate_item_vlan(items, item_flags,
3284                                                            error);
3285                         if (ret < 0)
3286                                 return ret;
3287                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_VLAN :
3288                                                MLX5_FLOW_LAYER_OUTER_VLAN;
3289                         break;
3290                 case RTE_FLOW_ITEM_TYPE_IPV4:
3291                         ret = mlx5_flow_validate_item_ipv4(items, item_flags,
3292                                                            error);
3293                         if (ret < 0)
3294                                 return ret;
3295                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
3296                                                MLX5_FLOW_LAYER_OUTER_L3_IPV4;
3297                         if (items->mask != NULL &&
3298                             ((const struct rte_flow_item_ipv4 *)
3299                              items->mask)->hdr.next_proto_id)
3300                                 next_protocol =
3301                                         ((const struct rte_flow_item_ipv4 *)
3302                                          (items->spec))->hdr.next_proto_id;
3303                         break;
3304                 case RTE_FLOW_ITEM_TYPE_IPV6:
3305                         ret = mlx5_flow_validate_item_ipv6(items, item_flags,
3306                                                            error);
3307                         if (ret < 0)
3308                                 return ret;
3309                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
3310                                                MLX5_FLOW_LAYER_OUTER_L3_IPV6;
3311                         if (items->mask != NULL &&
3312                             ((const struct rte_flow_item_ipv6 *)
3313                              items->mask)->hdr.proto)
3314                                 next_protocol =
3315                                         ((const struct rte_flow_item_ipv6 *)
3316                                          items->spec)->hdr.proto;
3317                         break;
3318                 case RTE_FLOW_ITEM_TYPE_UDP:
3319                         ret = mlx5_flow_validate_item_udp(items, item_flags,
3320                                                           next_protocol,
3321                                                           error);
3322                         if (ret < 0)
3323                                 return ret;
3324                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP :
3325                                                MLX5_FLOW_LAYER_OUTER_L4_UDP;
3326                         break;
3327                 case RTE_FLOW_ITEM_TYPE_TCP:
3328                         ret = mlx5_flow_validate_item_tcp(items, item_flags,
3329                                                           next_protocol, error);
3330                         if (ret < 0)
3331                                 return ret;
3332                         item_flags |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP :
3333                                                MLX5_FLOW_LAYER_OUTER_L4_TCP;
3334                         break;
3335                 case RTE_FLOW_ITEM_TYPE_VXLAN:
3336                         ret = mlx5_flow_validate_item_vxlan(items, item_flags,
3337                                                             error);
3338                         if (ret < 0)
3339                                 return ret;
3340                         item_flags |= MLX5_FLOW_LAYER_VXLAN;
3341                         break;
3342                 case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
3343                         ret = mlx5_flow_validate_item_vxlan_gpe(items,
3344                                                                 item_flags,
3345                                                                 dev, error);
3346                         if (ret < 0)
3347                                 return ret;
3348                         item_flags |= MLX5_FLOW_LAYER_VXLAN_GPE;
3349                         break;
3350                 case RTE_FLOW_ITEM_TYPE_GRE:
3351                         ret = mlx5_flow_validate_item_gre(items, item_flags,
3352                                                           next_protocol, error);
3353                         if (ret < 0)
3354                                 return ret;
3355                         item_flags |= MLX5_FLOW_LAYER_GRE;
3356                         break;
3357                 case RTE_FLOW_ITEM_TYPE_MPLS:
3358                         ret = mlx5_flow_validate_item_mpls(items, item_flags,
3359                                                            next_protocol,
3360                                                            error);
3361                         if (ret < 0)
3362                                 return ret;
3363                         if (next_protocol != 0xff &&
3364                             next_protocol != MLX5_IP_PROTOCOL_MPLS)
3365                                 return rte_flow_error_set
3366                                         (error, ENOTSUP,
3367                                          RTE_FLOW_ERROR_TYPE_ITEM, items,
3368                                          "protocol filtering not compatible"
3369                                          " with MPLS layer");
3370                         item_flags |= MLX5_FLOW_LAYER_MPLS;
3371                         break;
3372                 default:
3373                         return rte_flow_error_set(error, ENOTSUP,
3374                                                   RTE_FLOW_ERROR_TYPE_ITEM,
3375                                                   NULL,
3376                                                   "item not supported");
3377                 }
3378         }
3379         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
3380                 tunnel = !!(item_flags & MLX5_FLOW_LAYER_TUNNEL);
3381                 switch (actions->type) {
3382                 case RTE_FLOW_ACTION_TYPE_VOID:
3383                         break;
3384                 case RTE_FLOW_ACTION_TYPE_FLAG:
3385                         ret = mlx5_flow_validate_action_flag(action_flags,
3386                                                              error);
3387                         if (ret < 0)
3388                                 return ret;
3389                         action_flags |= MLX5_FLOW_ACTION_FLAG;
3390                         break;
3391                 case RTE_FLOW_ACTION_TYPE_MARK:
3392                         ret = mlx5_flow_validate_action_mark(actions,
3393                                                              action_flags,
3394                                                              error);
3395                         if (ret < 0)
3396                                 return ret;
3397                         action_flags |= MLX5_FLOW_ACTION_MARK;
3398                         break;
3399                 case RTE_FLOW_ACTION_TYPE_DROP:
3400                         ret = mlx5_flow_validate_action_drop(action_flags,
3401                                                              error);
3402                         if (ret < 0)
3403                                 return ret;
3404                         action_flags |= MLX5_FLOW_ACTION_DROP;
3405                         break;
3406                 case RTE_FLOW_ACTION_TYPE_QUEUE:
3407                         ret = mlx5_flow_validate_action_queue(actions,
3408                                                               action_flags, dev,
3409                                                               error);
3410                         if (ret < 0)
3411                                 return ret;
3412                         action_flags |= MLX5_FLOW_ACTION_QUEUE;
3413                         break;
3414                 case RTE_FLOW_ACTION_TYPE_RSS:
3415                         ret = mlx5_flow_validate_action_rss(actions,
3416                                                             action_flags, dev,
3417                                                             error);
3418                         if (ret < 0)
3419                                 return ret;
3420                         action_flags |= MLX5_FLOW_ACTION_RSS;
3421                         break;
3422                 case RTE_FLOW_ACTION_TYPE_COUNT:
3423                         ret = mlx5_flow_validate_action_count(dev, error);
3424                         if (ret < 0)
3425                                 return ret;
3426                         action_flags |= MLX5_FLOW_ACTION_COUNT;
3427                         break;
3428                 default:
3429                         return rte_flow_error_set(error, ENOTSUP,
3430                                                   RTE_FLOW_ERROR_TYPE_ACTION,
3431                                                   actions,
3432                                                   "action not supported");
3433                 }
3434         }
3435         return 0;
3436 }
3437
3438 /**
3439  * Validate a flow supported by the NIC.
3440  *
3441  * @see rte_flow_validate()
3442  * @see rte_flow_ops
3443  */
3444 int
3445 mlx5_flow_validate(struct rte_eth_dev *dev,
3446                    const struct rte_flow_attr *attr,
3447                    const struct rte_flow_item items[],
3448                    const struct rte_flow_action actions[],
3449                    struct rte_flow_error *error)
3450 {
3451         int ret;
3452
3453         ret =  mlx5_flow_verbs_validate(dev, attr, items, actions, error);
3454         if (ret < 0)
3455                 return ret;
3456         return 0;
3457 }
3458
3459 /**
3460  * Calculate the required bytes that are needed for the action part of the verbs
3461  * flow, in addtion returns bit-fields with all the detected action, in order to
3462  * avoid another interation over the actions.
3463  *
3464  * @param[in] actions
3465  *   Pointer to the list of actions.
3466  * @param[out] action_flags
3467  *   Pointer to the detected actions.
3468  *
3469  * @return
3470  *   The size of the memory needed for all actions.
3471  */
3472 static int
3473 mlx5_flow_verbs_get_actions_and_size(const struct rte_flow_action actions[],
3474                                      uint64_t *action_flags)
3475 {
3476         int size = 0;
3477         uint64_t detected_actions = 0;
3478
3479         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
3480                 switch (actions->type) {
3481                 case RTE_FLOW_ACTION_TYPE_VOID:
3482                         break;
3483                 case RTE_FLOW_ACTION_TYPE_FLAG:
3484                         size += sizeof(struct ibv_flow_spec_action_tag);
3485                         detected_actions |= MLX5_FLOW_ACTION_FLAG;
3486                         break;
3487                 case RTE_FLOW_ACTION_TYPE_MARK:
3488                         size += sizeof(struct ibv_flow_spec_action_tag);
3489                         detected_actions |= MLX5_FLOW_ACTION_MARK;
3490                         break;
3491                 case RTE_FLOW_ACTION_TYPE_DROP:
3492                         size += sizeof(struct ibv_flow_spec_action_drop);
3493                         detected_actions |= MLX5_FLOW_ACTION_DROP;
3494                         break;
3495                 case RTE_FLOW_ACTION_TYPE_QUEUE:
3496                         detected_actions |= MLX5_FLOW_ACTION_QUEUE;
3497                         break;
3498                 case RTE_FLOW_ACTION_TYPE_RSS:
3499                         detected_actions |= MLX5_FLOW_ACTION_RSS;
3500                         break;
3501                 case RTE_FLOW_ACTION_TYPE_COUNT:
3502 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
3503                         size += sizeof(struct ibv_flow_spec_counter_action);
3504 #endif
3505                         detected_actions |= MLX5_FLOW_ACTION_COUNT;
3506                         break;
3507                 default:
3508                         break;
3509                 }
3510         }
3511         *action_flags = detected_actions;
3512         return size;
3513 }
3514
3515 /**
3516  * Calculate the required bytes that are needed for the item part of the verbs
3517  * flow, in addtion returns bit-fields with all the detected action, in order to
3518  * avoid another interation over the actions.
3519  *
3520  * @param[in] actions
3521  *   Pointer to the list of items.
3522  * @param[in, out] item_flags
3523  *   Pointer to the detected items.
3524  *
3525  * @return
3526  *   The size of the memory needed for all items.
3527  */
3528 static int
3529 mlx5_flow_verbs_get_items_and_size(const struct rte_flow_item items[],
3530                                    uint64_t *item_flags)
3531 {
3532         int size = 0;
3533         uint64_t detected_items = 0;
3534         const int tunnel = !!(*item_flags & MLX5_FLOW_LAYER_TUNNEL);
3535
3536         for (; items->type != RTE_FLOW_ITEM_TYPE_END; items++) {
3537                 switch (items->type) {
3538                 case RTE_FLOW_ITEM_TYPE_VOID:
3539                         break;
3540                 case RTE_FLOW_ITEM_TYPE_ETH:
3541                         size += sizeof(struct ibv_flow_spec_eth);
3542                         detected_items |= tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
3543                                         MLX5_FLOW_LAYER_OUTER_L2;
3544                         break;
3545                 case RTE_FLOW_ITEM_TYPE_VLAN:
3546                         size += sizeof(struct ibv_flow_spec_eth);
3547                         detected_items |= tunnel ? MLX5_FLOW_LAYER_INNER_VLAN :
3548                                         MLX5_FLOW_LAYER_OUTER_VLAN;
3549                         break;
3550                 case RTE_FLOW_ITEM_TYPE_IPV4:
3551                         size += sizeof(struct ibv_flow_spec_ipv4_ext);
3552                         detected_items |= tunnel ?
3553                                         MLX5_FLOW_LAYER_INNER_L3_IPV4 :
3554                                         MLX5_FLOW_LAYER_OUTER_L3_IPV4;
3555                         break;
3556                 case RTE_FLOW_ITEM_TYPE_IPV6:
3557                         size += sizeof(struct ibv_flow_spec_ipv6);
3558                         detected_items |= tunnel ?
3559                                 MLX5_FLOW_LAYER_INNER_L3_IPV6 :
3560                                 MLX5_FLOW_LAYER_OUTER_L3_IPV6;
3561                         break;
3562                 case RTE_FLOW_ITEM_TYPE_UDP:
3563                         size += sizeof(struct ibv_flow_spec_tcp_udp);
3564                         detected_items |= tunnel ?
3565                                         MLX5_FLOW_LAYER_INNER_L4_UDP :
3566                                         MLX5_FLOW_LAYER_OUTER_L4_UDP;
3567                         break;
3568                 case RTE_FLOW_ITEM_TYPE_TCP:
3569                         size += sizeof(struct ibv_flow_spec_tcp_udp);
3570                         detected_items |= tunnel ?
3571                                         MLX5_FLOW_LAYER_INNER_L4_TCP :
3572                                         MLX5_FLOW_LAYER_OUTER_L4_TCP;
3573                         break;
3574                 case RTE_FLOW_ITEM_TYPE_VXLAN:
3575                         size += sizeof(struct ibv_flow_spec_tunnel);
3576                         detected_items |= MLX5_FLOW_LAYER_VXLAN;
3577                         break;
3578                 case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
3579                         size += sizeof(struct ibv_flow_spec_tunnel);
3580                         detected_items |= MLX5_FLOW_LAYER_VXLAN_GPE;
3581                         break;
3582                 case RTE_FLOW_ITEM_TYPE_GRE:
3583 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
3584                         size += sizeof(struct ibv_flow_spec_gre);
3585                         detected_items |= MLX5_FLOW_LAYER_GRE;
3586 #else
3587                         size += sizeof(struct ibv_flow_spec_tunnel);
3588                         detected_items |= MLX5_FLOW_LAYER_TUNNEL;
3589 #endif
3590                         break;
3591                 case RTE_FLOW_ITEM_TYPE_MPLS:
3592 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
3593                         size += sizeof(struct ibv_flow_spec_mpls);
3594                         detected_items |= MLX5_FLOW_LAYER_MPLS;
3595 #endif
3596                         break;
3597                 default:
3598                         break;
3599                 }
3600         }
3601         *item_flags = detected_items;
3602         return size;
3603 }
3604
3605 /**
3606  * Get RSS action from the action list.
3607  *
3608  * @param[in] actions
3609  *   Pointer to the list of actions.
3610  *
3611  * @return
3612  *   Pointer to the RSS action if exist, else return NULL.
3613  */
3614 static const struct rte_flow_action_rss*
3615 mlx5_flow_get_rss_action(const struct rte_flow_action actions[])
3616 {
3617         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
3618                 switch (actions->type) {
3619                 case RTE_FLOW_ACTION_TYPE_RSS:
3620                         return (const struct rte_flow_action_rss *)
3621                                actions->conf;
3622                 default:
3623                         break;
3624                 }
3625         }
3626         return NULL;
3627 }
3628
3629 /**
3630  * Internal preparation function. Allocate mlx5_flow with the required size.
3631  * The required size is calculate based on the actions and items. This function
3632  * also returns the detected actions and items for later use.
3633  *
3634  * @param[in] attr
3635  *   Pointer to the flow attributes.
3636  * @param[in] items
3637  *   Pointer to the list of items.
3638  * @param[in] actions
3639  *   Pointer to the list of actions.
3640  * @param[out] item_flags
3641  *   Pointer to bit mask of all items detected.
3642  * @param[out] action_flags
3643  *   Pointer to bit mask of all actions detected.
3644  * @param[out] error
3645  *   Pointer to the error structure.
3646  *
3647  * @return
3648  *   Pointer to mlx5_flow object on success, otherwise NULL and rte_errno
3649  *   is set.
3650  */
3651 static struct mlx5_flow *
3652 mlx5_flow_verbs_prepare(const struct rte_flow_attr *attr __rte_unused,
3653                         const struct rte_flow_item items[],
3654                         const struct rte_flow_action actions[],
3655                         uint64_t *item_flags,
3656                         uint64_t *action_flags,
3657                         struct rte_flow_error *error)
3658 {
3659         uint32_t size = sizeof(struct ibv_flow_attr);
3660         struct mlx5_flow *flow;
3661
3662         size += mlx5_flow_verbs_get_actions_and_size(actions, action_flags);
3663         size += mlx5_flow_verbs_get_items_and_size(items, item_flags);
3664         flow = rte_calloc(__func__, 1, size, 0);
3665         if (!flow) {
3666                 rte_flow_error_set(error, ENOMEM,
3667                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
3668                                    NULL,
3669                                    "not enough memory to create flow");
3670                 return NULL;
3671         }
3672         return flow;
3673 }
3674
3675 /**
3676  * Remove the flow.
3677  *
3678  * @param[in] dev
3679  *   Pointer to the Ethernet device structure.
3680  * @param[in, out] flow
3681  *   Pointer to flow structure.
3682  */
3683 static void
3684 mlx5_flow_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
3685 {
3686         struct priv *priv = dev->data->dev_private;
3687         struct mlx5_flow_verbs *verbs;
3688
3689         if (flow->nl_flow && priv->mnl_socket)
3690                 mlx5_nl_flow_destroy(priv->mnl_socket, flow->nl_flow, NULL);
3691         LIST_FOREACH(verbs, &flow->verbs, next) {
3692                 if (verbs->flow) {
3693                         claim_zero(mlx5_glue->destroy_flow(verbs->flow));
3694                         verbs->flow = NULL;
3695                 }
3696                 if (verbs->hrxq) {
3697                         if (flow->fate & MLX5_FLOW_FATE_DROP)
3698                                 mlx5_hrxq_drop_release(dev);
3699                         else
3700                                 mlx5_hrxq_release(dev, verbs->hrxq);
3701                         verbs->hrxq = NULL;
3702                 }
3703         }
3704         if (flow->counter) {
3705                 mlx5_flow_counter_release(flow->counter);
3706                 flow->counter = NULL;
3707         }
3708 }
3709
3710 /**
3711  * Apply the flow.
3712  *
3713  * @param[in] dev
3714  *   Pointer to the Ethernet device structure.
3715  * @param[in, out] flow
3716  *   Pointer to flow structure.
3717  * @param[out] error
3718  *   Pointer to error structure.
3719  *
3720  * @return
3721  *   0 on success, a negative errno value otherwise and rte_errno is set.
3722  */
3723 static int
3724 mlx5_flow_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
3725                 struct rte_flow_error *error)
3726 {
3727         struct priv *priv = dev->data->dev_private;
3728         struct mlx5_flow_verbs *verbs;
3729         int err;
3730
3731         LIST_FOREACH(verbs, &flow->verbs, next) {
3732                 if (flow->fate & MLX5_FLOW_FATE_DROP) {
3733                         verbs->hrxq = mlx5_hrxq_drop_new(dev);
3734                         if (!verbs->hrxq) {
3735                                 rte_flow_error_set
3736                                         (error, errno,
3737                                          RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
3738                                          NULL,
3739                                          "cannot get drop hash queue");
3740                                 goto error;
3741                         }
3742                 } else {
3743                         struct mlx5_hrxq *hrxq;
3744
3745                         hrxq = mlx5_hrxq_get(dev, flow->key,
3746                                              MLX5_RSS_HASH_KEY_LEN,
3747                                              verbs->hash_fields,
3748                                              (*flow->queue),
3749                                              flow->rss.queue_num);
3750                         if (!hrxq)
3751                                 hrxq = mlx5_hrxq_new(dev, flow->key,
3752                                                      MLX5_RSS_HASH_KEY_LEN,
3753                                                      verbs->hash_fields,
3754                                                      (*flow->queue),
3755                                                      flow->rss.queue_num,
3756                                                      !!(flow->layers &
3757                                                       MLX5_FLOW_LAYER_TUNNEL));
3758                         if (!hrxq) {
3759                                 rte_flow_error_set
3760                                         (error, rte_errno,
3761                                          RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
3762                                          NULL,
3763                                          "cannot get hash queue");
3764                                 goto error;
3765                         }
3766                         verbs->hrxq = hrxq;
3767                 }
3768                 verbs->flow =
3769                         mlx5_glue->create_flow(verbs->hrxq->qp, verbs->attr);
3770                 if (!verbs->flow) {
3771                         rte_flow_error_set(error, errno,
3772                                            RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
3773                                            NULL,
3774                                            "hardware refuses to create flow");
3775                         goto error;
3776                 }
3777         }
3778         if (flow->nl_flow &&
3779             priv->mnl_socket &&
3780             mlx5_nl_flow_create(priv->mnl_socket, flow->nl_flow, error))
3781                 goto error;
3782         return 0;
3783 error:
3784         err = rte_errno; /* Save rte_errno before cleanup. */
3785         LIST_FOREACH(verbs, &flow->verbs, next) {
3786                 if (verbs->hrxq) {
3787                         if (flow->fate & MLX5_FLOW_FATE_DROP)
3788                                 mlx5_hrxq_drop_release(dev);
3789                         else
3790                                 mlx5_hrxq_release(dev, verbs->hrxq);
3791                         verbs->hrxq = NULL;
3792                 }
3793         }
3794         rte_errno = err; /* Restore rte_errno. */
3795         return -rte_errno;
3796 }
3797
3798 /**
3799  * Create a flow and add it to @p list.
3800  *
3801  * @param dev
3802  *   Pointer to Ethernet device.
3803  * @param list
3804  *   Pointer to a TAILQ flow list.
3805  * @param[in] attr
3806  *   Flow rule attributes.
3807  * @param[in] items
3808  *   Pattern specification (list terminated by the END pattern item).
3809  * @param[in] actions
3810  *   Associated actions (list terminated by the END action).
3811  * @param[out] error
3812  *   Perform verbose error reporting if not NULL.
3813  *
3814  * @return
3815  *   A flow on success, NULL otherwise and rte_errno is set.
3816  */
3817 static struct rte_flow *
3818 mlx5_flow_list_create(struct rte_eth_dev *dev,
3819                       struct mlx5_flows *list,
3820                       const struct rte_flow_attr *attr,
3821                       const struct rte_flow_item items[],
3822                       const struct rte_flow_action actions[],
3823                       struct rte_flow_error *error)
3824 {
3825         struct rte_flow *flow = NULL;
3826         struct mlx5_flow *dev_flow;
3827         size_t size = 0;
3828         uint64_t action_flags = 0;
3829         uint64_t item_flags = 0;
3830         const struct rte_flow_action_rss *rss;
3831         union {
3832                 struct rte_flow_expand_rss buf;
3833                 uint8_t buffer[2048];
3834         } expand_buffer;
3835         struct rte_flow_expand_rss *buf = &expand_buffer.buf;
3836         int ret;
3837         uint32_t i;
3838
3839         ret = mlx5_flow_validate(dev, attr, items, actions, error);
3840         if (ret < 0)
3841                 return NULL;
3842         flow = rte_calloc(__func__, 1, sizeof(*flow), 0);
3843         LIST_INIT(&flow->dev_flows);
3844         rss = mlx5_flow_get_rss_action(actions);
3845         if (rss && rss->types) {
3846                 unsigned int graph_root;
3847
3848                 graph_root = mlx5_find_graph_root(items, rss->level);
3849                 ret = rte_flow_expand_rss(buf, sizeof(expand_buffer.buffer),
3850                                           items, rss->types,
3851                                           mlx5_support_expansion,
3852                                           graph_root);
3853                 assert(ret > 0 &&
3854                        (unsigned int)ret < sizeof(expand_buffer.buffer));
3855         } else {
3856                 buf->entries = 1;
3857                 buf->entry[0].pattern = (void *)(uintptr_t)items;
3858         }
3859         for (i = 0; i < buf->entries; ++i) {
3860                 dev_flow = mlx5_flow_verbs_prepare(attr, buf->entry[i].pattern,
3861                                                    actions, &item_flags,
3862                                                    &action_flags, error);
3863                 dev_flow->flow = flow;
3864                 LIST_INSERT_HEAD(&flow->dev_flows, dev_flow, next);
3865         }
3866         ret = mlx5_flow_merge(dev, flow, size, attr, items, actions, error);
3867         if (ret < 0)
3868                 return NULL;
3869         size = ret;
3870         flow = rte_calloc(__func__, 1, size, 0);
3871         if (!flow) {
3872                 rte_flow_error_set(error, ENOMEM,
3873                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
3874                                    NULL,
3875                                    "not enough memory to create flow");
3876                 return NULL;
3877         }
3878         ret = mlx5_flow_merge(dev, flow, size, attr, items, actions, error);
3879         if (ret < 0) {
3880                 rte_free(flow);
3881                 return NULL;
3882         }
3883         assert((size_t)ret == size);
3884         if (dev->data->dev_started) {
3885                 ret = mlx5_flow_apply(dev, flow, error);
3886                 if (ret < 0) {
3887                         ret = rte_errno; /* Save rte_errno before cleanup. */
3888                         if (flow) {
3889                                 mlx5_flow_remove(dev, flow);
3890                                 rte_free(flow);
3891                         }
3892                         rte_errno = ret; /* Restore rte_errno. */
3893                         return NULL;
3894                 }
3895         }
3896         TAILQ_INSERT_TAIL(list, flow, next);
3897         mlx5_flow_rxq_flags_set(dev, flow);
3898         return flow;
3899 }
3900
3901 /**
3902  * Create a flow.
3903  *
3904  * @see rte_flow_create()
3905  * @see rte_flow_ops
3906  */
3907 struct rte_flow *
3908 mlx5_flow_create(struct rte_eth_dev *dev,
3909                  const struct rte_flow_attr *attr,
3910                  const struct rte_flow_item items[],
3911                  const struct rte_flow_action actions[],
3912                  struct rte_flow_error *error)
3913 {
3914         return mlx5_flow_list_create
3915                 (dev, &((struct priv *)dev->data->dev_private)->flows,
3916                  attr, items, actions, error);
3917 }
3918
3919 /**
3920  * Destroy a flow in a list.
3921  *
3922  * @param dev
3923  *   Pointer to Ethernet device.
3924  * @param list
3925  *   Pointer to a TAILQ flow list.
3926  * @param[in] flow
3927  *   Flow to destroy.
3928  */
3929 static void
3930 mlx5_flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list,
3931                        struct rte_flow *flow)
3932 {
3933         mlx5_flow_remove(dev, flow);
3934         TAILQ_REMOVE(list, flow, next);
3935         /*
3936          * Update RX queue flags only if port is started, otherwise it is
3937          * already clean.
3938          */
3939         if (dev->data->dev_started)
3940                 mlx5_flow_rxq_flags_trim(dev, flow);
3941         rte_free(flow);
3942 }
3943
3944 /**
3945  * Destroy all flows.
3946  *
3947  * @param dev
3948  *   Pointer to Ethernet device.
3949  * @param list
3950  *   Pointer to a TAILQ flow list.
3951  */
3952 void
3953 mlx5_flow_list_flush(struct rte_eth_dev *dev, struct mlx5_flows *list)
3954 {
3955         while (!TAILQ_EMPTY(list)) {
3956                 struct rte_flow *flow;
3957
3958                 flow = TAILQ_FIRST(list);
3959                 mlx5_flow_list_destroy(dev, list, flow);
3960         }
3961 }
3962
3963 /**
3964  * Remove all flows.
3965  *
3966  * @param dev
3967  *   Pointer to Ethernet device.
3968  * @param list
3969  *   Pointer to a TAILQ flow list.
3970  */
3971 void
3972 mlx5_flow_stop(struct rte_eth_dev *dev, struct mlx5_flows *list)
3973 {
3974         struct rte_flow *flow;
3975
3976         TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next)
3977                 mlx5_flow_remove(dev, flow);
3978         mlx5_flow_rxq_flags_clear(dev);
3979 }
3980
3981 /**
3982  * Add all flows.
3983  *
3984  * @param dev
3985  *   Pointer to Ethernet device.
3986  * @param list
3987  *   Pointer to a TAILQ flow list.
3988  *
3989  * @return
3990  *   0 on success, a negative errno value otherwise and rte_errno is set.
3991  */
3992 int
3993 mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list)
3994 {
3995         struct rte_flow *flow;
3996         struct rte_flow_error error;
3997         int ret = 0;
3998
3999         TAILQ_FOREACH(flow, list, next) {
4000                 ret = mlx5_flow_apply(dev, flow, &error);
4001                 if (ret < 0)
4002                         goto error;
4003                 mlx5_flow_rxq_flags_set(dev, flow);
4004         }
4005         return 0;
4006 error:
4007         ret = rte_errno; /* Save rte_errno before cleanup. */
4008         mlx5_flow_stop(dev, list);
4009         rte_errno = ret; /* Restore rte_errno. */
4010         return -rte_errno;
4011 }
4012
4013 /**
4014  * Verify the flow list is empty
4015  *
4016  * @param dev
4017  *  Pointer to Ethernet device.
4018  *
4019  * @return the number of flows not released.
4020  */
4021 int
4022 mlx5_flow_verify(struct rte_eth_dev *dev)
4023 {
4024         struct priv *priv = dev->data->dev_private;
4025         struct rte_flow *flow;
4026         int ret = 0;
4027
4028         TAILQ_FOREACH(flow, &priv->flows, next) {
4029                 DRV_LOG(DEBUG, "port %u flow %p still referenced",
4030                         dev->data->port_id, (void *)flow);
4031                 ++ret;
4032         }
4033         return ret;
4034 }
4035
4036 /**
4037  * Enable a control flow configured from the control plane.
4038  *
4039  * @param dev
4040  *   Pointer to Ethernet device.
4041  * @param eth_spec
4042  *   An Ethernet flow spec to apply.
4043  * @param eth_mask
4044  *   An Ethernet flow mask to apply.
4045  * @param vlan_spec
4046  *   A VLAN flow spec to apply.
4047  * @param vlan_mask
4048  *   A VLAN flow mask to apply.
4049  *
4050  * @return
4051  *   0 on success, a negative errno value otherwise and rte_errno is set.
4052  */
4053 int
4054 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
4055                     struct rte_flow_item_eth *eth_spec,
4056                     struct rte_flow_item_eth *eth_mask,
4057                     struct rte_flow_item_vlan *vlan_spec,
4058                     struct rte_flow_item_vlan *vlan_mask)
4059 {
4060         struct priv *priv = dev->data->dev_private;
4061         const struct rte_flow_attr attr = {
4062                 .ingress = 1,
4063                 .priority = MLX5_FLOW_PRIO_RSVD,
4064         };
4065         struct rte_flow_item items[] = {
4066                 {
4067                         .type = RTE_FLOW_ITEM_TYPE_ETH,
4068                         .spec = eth_spec,
4069                         .last = NULL,
4070                         .mask = eth_mask,
4071                 },
4072                 {
4073                         .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
4074                                               RTE_FLOW_ITEM_TYPE_END,
4075                         .spec = vlan_spec,
4076                         .last = NULL,
4077                         .mask = vlan_mask,
4078                 },
4079                 {
4080                         .type = RTE_FLOW_ITEM_TYPE_END,
4081                 },
4082         };
4083         uint16_t queue[priv->reta_idx_n];
4084         struct rte_flow_action_rss action_rss = {
4085                 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
4086                 .level = 0,
4087                 .types = priv->rss_conf.rss_hf,
4088                 .key_len = priv->rss_conf.rss_key_len,
4089                 .queue_num = priv->reta_idx_n,
4090                 .key = priv->rss_conf.rss_key,
4091                 .queue = queue,
4092         };
4093         struct rte_flow_action actions[] = {
4094                 {
4095                         .type = RTE_FLOW_ACTION_TYPE_RSS,
4096                         .conf = &action_rss,
4097                 },
4098                 {
4099                         .type = RTE_FLOW_ACTION_TYPE_END,
4100                 },
4101         };
4102         struct rte_flow *flow;
4103         struct rte_flow_error error;
4104         unsigned int i;
4105
4106         if (!priv->reta_idx_n) {
4107                 rte_errno = EINVAL;
4108                 return -rte_errno;
4109         }
4110         for (i = 0; i != priv->reta_idx_n; ++i)
4111                 queue[i] = (*priv->reta_idx)[i];
4112         flow = mlx5_flow_list_create(dev, &priv->ctrl_flows, &attr, items,
4113                                      actions, &error);
4114         if (!flow)
4115                 return -rte_errno;
4116         return 0;
4117 }
4118
4119 /**
4120  * Enable a flow control configured from the control plane.
4121  *
4122  * @param dev
4123  *   Pointer to Ethernet device.
4124  * @param eth_spec
4125  *   An Ethernet flow spec to apply.
4126  * @param eth_mask
4127  *   An Ethernet flow mask to apply.
4128  *
4129  * @return
4130  *   0 on success, a negative errno value otherwise and rte_errno is set.
4131  */
4132 int
4133 mlx5_ctrl_flow(struct rte_eth_dev *dev,
4134                struct rte_flow_item_eth *eth_spec,
4135                struct rte_flow_item_eth *eth_mask)
4136 {
4137         return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
4138 }
4139
4140 /**
4141  * Destroy a flow.
4142  *
4143  * @see rte_flow_destroy()
4144  * @see rte_flow_ops
4145  */
4146 int
4147 mlx5_flow_destroy(struct rte_eth_dev *dev,
4148                   struct rte_flow *flow,
4149                   struct rte_flow_error *error __rte_unused)
4150 {
4151         struct priv *priv = dev->data->dev_private;
4152
4153         mlx5_flow_list_destroy(dev, &priv->flows, flow);
4154         return 0;
4155 }
4156
4157 /**
4158  * Destroy all flows.
4159  *
4160  * @see rte_flow_flush()
4161  * @see rte_flow_ops
4162  */
4163 int
4164 mlx5_flow_flush(struct rte_eth_dev *dev,
4165                 struct rte_flow_error *error __rte_unused)
4166 {
4167         struct priv *priv = dev->data->dev_private;
4168
4169         mlx5_flow_list_flush(dev, &priv->flows);
4170         return 0;
4171 }
4172
4173 /**
4174  * Isolated mode.
4175  *
4176  * @see rte_flow_isolate()
4177  * @see rte_flow_ops
4178  */
4179 int
4180 mlx5_flow_isolate(struct rte_eth_dev *dev,
4181                   int enable,
4182                   struct rte_flow_error *error)
4183 {
4184         struct priv *priv = dev->data->dev_private;
4185
4186         if (dev->data->dev_started) {
4187                 rte_flow_error_set(error, EBUSY,
4188                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
4189                                    NULL,
4190                                    "port must be stopped first");
4191                 return -rte_errno;
4192         }
4193         priv->isolated = !!enable;
4194         if (enable)
4195                 dev->dev_ops = &mlx5_dev_ops_isolate;
4196         else
4197                 dev->dev_ops = &mlx5_dev_ops;
4198         return 0;
4199 }
4200
4201 /**
4202  * Query flow counter.
4203  *
4204  * @param flow
4205  *   Pointer to the flow.
4206  *
4207  * @return
4208  *   0 on success, a negative errno value otherwise and rte_errno is set.
4209  */
4210 static int
4211 mlx5_flow_query_count(struct rte_flow *flow __rte_unused,
4212                       void *data __rte_unused,
4213                       struct rte_flow_error *error)
4214 {
4215 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
4216         if (flow->modifier & MLX5_FLOW_MOD_COUNT) {
4217                 struct rte_flow_query_count *qc = data;
4218                 uint64_t counters[2] = {0, 0};
4219                 struct ibv_query_counter_set_attr query_cs_attr = {
4220                         .cs = flow->counter->cs,
4221                         .query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
4222                 };
4223                 struct ibv_counter_set_data query_out = {
4224                         .out = counters,
4225                         .outlen = 2 * sizeof(uint64_t),
4226                 };
4227                 int err = mlx5_glue->query_counter_set(&query_cs_attr,
4228                                                        &query_out);
4229
4230                 if (err)
4231                         return rte_flow_error_set
4232                                 (error, err,
4233                                  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
4234                                  NULL,
4235                                  "cannot read counter");
4236                 qc->hits_set = 1;
4237                 qc->bytes_set = 1;
4238                 qc->hits = counters[0] - flow->counter->hits;
4239                 qc->bytes = counters[1] - flow->counter->bytes;
4240                 if (qc->reset) {
4241                         flow->counter->hits = counters[0];
4242                         flow->counter->bytes = counters[1];
4243                 }
4244                 return 0;
4245         }
4246         return rte_flow_error_set(error, ENOTSUP,
4247                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
4248                                   NULL,
4249                                   "flow does not have counter");
4250 #endif
4251         return rte_flow_error_set(error, ENOTSUP,
4252                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
4253                                   NULL,
4254                                   "counters are not available");
4255 }
4256
4257 /**
4258  * Query a flows.
4259  *
4260  * @see rte_flow_query()
4261  * @see rte_flow_ops
4262  */
4263 int
4264 mlx5_flow_query(struct rte_eth_dev *dev __rte_unused,
4265                 struct rte_flow *flow,
4266                 const struct rte_flow_action *actions,
4267                 void *data,
4268                 struct rte_flow_error *error)
4269 {
4270         int ret = 0;
4271
4272         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
4273                 switch (actions->type) {
4274                 case RTE_FLOW_ACTION_TYPE_VOID:
4275                         break;
4276                 case RTE_FLOW_ACTION_TYPE_COUNT:
4277                         ret = mlx5_flow_query_count(flow, data, error);
4278                         break;
4279                 default:
4280                         return rte_flow_error_set(error, ENOTSUP,
4281                                                   RTE_FLOW_ERROR_TYPE_ACTION,
4282                                                   actions,
4283                                                   "action not supported");
4284                 }
4285                 if (ret < 0)
4286                         return ret;
4287         }
4288         return 0;
4289 }
4290
4291 /**
4292  * Convert a flow director filter to a generic flow.
4293  *
4294  * @param dev
4295  *   Pointer to Ethernet device.
4296  * @param fdir_filter
4297  *   Flow director filter to add.
4298  * @param attributes
4299  *   Generic flow parameters structure.
4300  *
4301  * @return
4302  *   0 on success, a negative errno value otherwise and rte_errno is set.
4303  */
4304 static int
4305 mlx5_fdir_filter_convert(struct rte_eth_dev *dev,
4306                          const struct rte_eth_fdir_filter *fdir_filter,
4307                          struct mlx5_fdir *attributes)
4308 {
4309         struct priv *priv = dev->data->dev_private;
4310         const struct rte_eth_fdir_input *input = &fdir_filter->input;
4311         const struct rte_eth_fdir_masks *mask =
4312                 &dev->data->dev_conf.fdir_conf.mask;
4313
4314         /* Validate queue number. */
4315         if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
4316                 DRV_LOG(ERR, "port %u invalid queue number %d",
4317                         dev->data->port_id, fdir_filter->action.rx_queue);
4318                 rte_errno = EINVAL;
4319                 return -rte_errno;
4320         }
4321         attributes->attr.ingress = 1;
4322         attributes->items[0] = (struct rte_flow_item) {
4323                 .type = RTE_FLOW_ITEM_TYPE_ETH,
4324                 .spec = &attributes->l2,
4325                 .mask = &attributes->l2_mask,
4326         };
4327         switch (fdir_filter->action.behavior) {
4328         case RTE_ETH_FDIR_ACCEPT:
4329                 attributes->actions[0] = (struct rte_flow_action){
4330                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
4331                         .conf = &attributes->queue,
4332                 };
4333                 break;
4334         case RTE_ETH_FDIR_REJECT:
4335                 attributes->actions[0] = (struct rte_flow_action){
4336                         .type = RTE_FLOW_ACTION_TYPE_DROP,
4337                 };
4338                 break;
4339         default:
4340                 DRV_LOG(ERR, "port %u invalid behavior %d",
4341                         dev->data->port_id,
4342                         fdir_filter->action.behavior);
4343                 rte_errno = ENOTSUP;
4344                 return -rte_errno;
4345         }
4346         attributes->queue.index = fdir_filter->action.rx_queue;
4347         /* Handle L3. */
4348         switch (fdir_filter->input.flow_type) {
4349         case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
4350         case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
4351         case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
4352                 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
4353                         .src_addr = input->flow.ip4_flow.src_ip,
4354                         .dst_addr = input->flow.ip4_flow.dst_ip,
4355                         .time_to_live = input->flow.ip4_flow.ttl,
4356                         .type_of_service = input->flow.ip4_flow.tos,
4357                 };
4358                 attributes->l3_mask.ipv4.hdr = (struct ipv4_hdr){
4359                         .src_addr = mask->ipv4_mask.src_ip,
4360                         .dst_addr = mask->ipv4_mask.dst_ip,
4361                         .time_to_live = mask->ipv4_mask.ttl,
4362                         .type_of_service = mask->ipv4_mask.tos,
4363                         .next_proto_id = mask->ipv4_mask.proto,
4364                 };
4365                 attributes->items[1] = (struct rte_flow_item){
4366                         .type = RTE_FLOW_ITEM_TYPE_IPV4,
4367                         .spec = &attributes->l3,
4368                         .mask = &attributes->l3_mask,
4369                 };
4370                 break;
4371         case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
4372         case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
4373         case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
4374                 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
4375                         .hop_limits = input->flow.ipv6_flow.hop_limits,
4376                         .proto = input->flow.ipv6_flow.proto,
4377                 };
4378
4379                 memcpy(attributes->l3.ipv6.hdr.src_addr,
4380                        input->flow.ipv6_flow.src_ip,
4381                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
4382                 memcpy(attributes->l3.ipv6.hdr.dst_addr,
4383                        input->flow.ipv6_flow.dst_ip,
4384                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
4385                 memcpy(attributes->l3_mask.ipv6.hdr.src_addr,
4386                        mask->ipv6_mask.src_ip,
4387                        RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
4388                 memcpy(attributes->l3_mask.ipv6.hdr.dst_addr,
4389                        mask->ipv6_mask.dst_ip,
4390                        RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
4391                 attributes->items[1] = (struct rte_flow_item){
4392                         .type = RTE_FLOW_ITEM_TYPE_IPV6,
4393                         .spec = &attributes->l3,
4394                         .mask = &attributes->l3_mask,
4395                 };
4396                 break;
4397         default:
4398                 DRV_LOG(ERR, "port %u invalid flow type%d",
4399                         dev->data->port_id, fdir_filter->input.flow_type);
4400                 rte_errno = ENOTSUP;
4401                 return -rte_errno;
4402         }
4403         /* Handle L4. */
4404         switch (fdir_filter->input.flow_type) {
4405         case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
4406                 attributes->l4.udp.hdr = (struct udp_hdr){
4407                         .src_port = input->flow.udp4_flow.src_port,
4408                         .dst_port = input->flow.udp4_flow.dst_port,
4409                 };
4410                 attributes->l4_mask.udp.hdr = (struct udp_hdr){
4411                         .src_port = mask->src_port_mask,
4412                         .dst_port = mask->dst_port_mask,
4413                 };
4414                 attributes->items[2] = (struct rte_flow_item){
4415                         .type = RTE_FLOW_ITEM_TYPE_UDP,
4416                         .spec = &attributes->l4,
4417                         .mask = &attributes->l4_mask,
4418                 };
4419                 break;
4420         case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
4421                 attributes->l4.tcp.hdr = (struct tcp_hdr){
4422                         .src_port = input->flow.tcp4_flow.src_port,
4423                         .dst_port = input->flow.tcp4_flow.dst_port,
4424                 };
4425                 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
4426                         .src_port = mask->src_port_mask,
4427                         .dst_port = mask->dst_port_mask,
4428                 };
4429                 attributes->items[2] = (struct rte_flow_item){
4430                         .type = RTE_FLOW_ITEM_TYPE_TCP,
4431                         .spec = &attributes->l4,
4432                         .mask = &attributes->l4_mask,
4433                 };
4434                 break;
4435         case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
4436                 attributes->l4.udp.hdr = (struct udp_hdr){
4437                         .src_port = input->flow.udp6_flow.src_port,
4438                         .dst_port = input->flow.udp6_flow.dst_port,
4439                 };
4440                 attributes->l4_mask.udp.hdr = (struct udp_hdr){
4441                         .src_port = mask->src_port_mask,
4442                         .dst_port = mask->dst_port_mask,
4443                 };
4444                 attributes->items[2] = (struct rte_flow_item){
4445                         .type = RTE_FLOW_ITEM_TYPE_UDP,
4446                         .spec = &attributes->l4,
4447                         .mask = &attributes->l4_mask,
4448                 };
4449                 break;
4450         case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
4451                 attributes->l4.tcp.hdr = (struct tcp_hdr){
4452                         .src_port = input->flow.tcp6_flow.src_port,
4453                         .dst_port = input->flow.tcp6_flow.dst_port,
4454                 };
4455                 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
4456                         .src_port = mask->src_port_mask,
4457                         .dst_port = mask->dst_port_mask,
4458                 };
4459                 attributes->items[2] = (struct rte_flow_item){
4460                         .type = RTE_FLOW_ITEM_TYPE_TCP,
4461                         .spec = &attributes->l4,
4462                         .mask = &attributes->l4_mask,
4463                 };
4464                 break;
4465         case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
4466         case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
4467                 break;
4468         default:
4469                 DRV_LOG(ERR, "port %u invalid flow type%d",
4470                         dev->data->port_id, fdir_filter->input.flow_type);
4471                 rte_errno = ENOTSUP;
4472                 return -rte_errno;
4473         }
4474         return 0;
4475 }
4476
4477 /**
4478  * Add new flow director filter and store it in list.
4479  *
4480  * @param dev
4481  *   Pointer to Ethernet device.
4482  * @param fdir_filter
4483  *   Flow director filter to add.
4484  *
4485  * @return
4486  *   0 on success, a negative errno value otherwise and rte_errno is set.
4487  */
4488 static int
4489 mlx5_fdir_filter_add(struct rte_eth_dev *dev,
4490                      const struct rte_eth_fdir_filter *fdir_filter)
4491 {
4492         struct priv *priv = dev->data->dev_private;
4493         struct mlx5_fdir attributes = {
4494                 .attr.group = 0,
4495                 .l2_mask = {
4496                         .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
4497                         .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
4498                         .type = 0,
4499                 },
4500         };
4501         struct rte_flow_error error;
4502         struct rte_flow *flow;
4503         int ret;
4504
4505         ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
4506         if (ret)
4507                 return ret;
4508         flow = mlx5_flow_list_create(dev, &priv->flows, &attributes.attr,
4509                                      attributes.items, attributes.actions,
4510                                      &error);
4511         if (flow) {
4512                 DRV_LOG(DEBUG, "port %u FDIR created %p", dev->data->port_id,
4513                         (void *)flow);
4514                 return 0;
4515         }
4516         return -rte_errno;
4517 }
4518
4519 /**
4520  * Delete specific filter.
4521  *
4522  * @param dev
4523  *   Pointer to Ethernet device.
4524  * @param fdir_filter
4525  *   Filter to be deleted.
4526  *
4527  * @return
4528  *   0 on success, a negative errno value otherwise and rte_errno is set.
4529  */
4530 static int
4531 mlx5_fdir_filter_delete(struct rte_eth_dev *dev __rte_unused,
4532                         const struct rte_eth_fdir_filter *fdir_filter
4533                         __rte_unused)
4534 {
4535         rte_errno = ENOTSUP;
4536         return -rte_errno;
4537 }
4538
4539 /**
4540  * Update queue for specific filter.
4541  *
4542  * @param dev
4543  *   Pointer to Ethernet device.
4544  * @param fdir_filter
4545  *   Filter to be updated.
4546  *
4547  * @return
4548  *   0 on success, a negative errno value otherwise and rte_errno is set.
4549  */
4550 static int
4551 mlx5_fdir_filter_update(struct rte_eth_dev *dev,
4552                         const struct rte_eth_fdir_filter *fdir_filter)
4553 {
4554         int ret;
4555
4556         ret = mlx5_fdir_filter_delete(dev, fdir_filter);
4557         if (ret)
4558                 return ret;
4559         return mlx5_fdir_filter_add(dev, fdir_filter);
4560 }
4561
4562 /**
4563  * Flush all filters.
4564  *
4565  * @param dev
4566  *   Pointer to Ethernet device.
4567  */
4568 static void
4569 mlx5_fdir_filter_flush(struct rte_eth_dev *dev)
4570 {
4571         struct priv *priv = dev->data->dev_private;
4572
4573         mlx5_flow_list_flush(dev, &priv->flows);
4574 }
4575
4576 /**
4577  * Get flow director information.
4578  *
4579  * @param dev
4580  *   Pointer to Ethernet device.
4581  * @param[out] fdir_info
4582  *   Resulting flow director information.
4583  */
4584 static void
4585 mlx5_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info)
4586 {
4587         struct rte_eth_fdir_masks *mask =
4588                 &dev->data->dev_conf.fdir_conf.mask;
4589
4590         fdir_info->mode = dev->data->dev_conf.fdir_conf.mode;
4591         fdir_info->guarant_spc = 0;
4592         rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
4593         fdir_info->max_flexpayload = 0;
4594         fdir_info->flow_types_mask[0] = 0;
4595         fdir_info->flex_payload_unit = 0;
4596         fdir_info->max_flex_payload_segment_num = 0;
4597         fdir_info->flex_payload_limit = 0;
4598         memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
4599 }
4600
4601 /**
4602  * Deal with flow director operations.
4603  *
4604  * @param dev
4605  *   Pointer to Ethernet device.
4606  * @param filter_op
4607  *   Operation to perform.
4608  * @param arg
4609  *   Pointer to operation-specific structure.
4610  *
4611  * @return
4612  *   0 on success, a negative errno value otherwise and rte_errno is set.
4613  */
4614 static int
4615 mlx5_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op,
4616                     void *arg)
4617 {
4618         enum rte_fdir_mode fdir_mode =
4619                 dev->data->dev_conf.fdir_conf.mode;
4620
4621         if (filter_op == RTE_ETH_FILTER_NOP)
4622                 return 0;
4623         if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
4624             fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
4625                 DRV_LOG(ERR, "port %u flow director mode %d not supported",
4626                         dev->data->port_id, fdir_mode);
4627                 rte_errno = EINVAL;
4628                 return -rte_errno;
4629         }
4630         switch (filter_op) {
4631         case RTE_ETH_FILTER_ADD:
4632                 return mlx5_fdir_filter_add(dev, arg);
4633         case RTE_ETH_FILTER_UPDATE:
4634                 return mlx5_fdir_filter_update(dev, arg);
4635         case RTE_ETH_FILTER_DELETE:
4636                 return mlx5_fdir_filter_delete(dev, arg);
4637         case RTE_ETH_FILTER_FLUSH:
4638                 mlx5_fdir_filter_flush(dev);
4639                 break;
4640         case RTE_ETH_FILTER_INFO:
4641                 mlx5_fdir_info_get(dev, arg);
4642                 break;
4643         default:
4644                 DRV_LOG(DEBUG, "port %u unknown operation %u",
4645                         dev->data->port_id, filter_op);
4646                 rte_errno = EINVAL;
4647                 return -rte_errno;
4648         }
4649         return 0;
4650 }
4651
4652 /**
4653  * Manage filter operations.
4654  *
4655  * @param dev
4656  *   Pointer to Ethernet device structure.
4657  * @param filter_type
4658  *   Filter type.
4659  * @param filter_op
4660  *   Operation to perform.
4661  * @param arg
4662  *   Pointer to operation-specific structure.
4663  *
4664  * @return
4665  *   0 on success, a negative errno value otherwise and rte_errno is set.
4666  */
4667 int
4668 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
4669                      enum rte_filter_type filter_type,
4670                      enum rte_filter_op filter_op,
4671                      void *arg)
4672 {
4673         switch (filter_type) {
4674         case RTE_ETH_FILTER_GENERIC:
4675                 if (filter_op != RTE_ETH_FILTER_GET) {
4676                         rte_errno = EINVAL;
4677                         return -rte_errno;
4678                 }
4679                 *(const void **)arg = &mlx5_flow_ops;
4680                 return 0;
4681         case RTE_ETH_FILTER_FDIR:
4682                 return mlx5_fdir_ctrl_func(dev, filter_op, arg);
4683         default:
4684                 DRV_LOG(ERR, "port %u filter type (%d) not supported",
4685                         dev->data->port_id, filter_type);
4686                 rte_errno = ENOTSUP;
4687                 return -rte_errno;
4688         }
4689         return 0;
4690 }