3f548a9a43ecaff172cf920ce7175920402a26f3
[dpdk.git] / drivers / net / mlx5 / mlx5_flow.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2016 6WIND S.A.
3  * Copyright 2016 Mellanox Technologies, Ltd
4  */
5
6 #include <sys/queue.h>
7 #include <stdalign.h>
8 #include <stdint.h>
9 #include <string.h>
10
11 /* Verbs header. */
12 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
13 #ifdef PEDANTIC
14 #pragma GCC diagnostic ignored "-Wpedantic"
15 #endif
16 #include <infiniband/verbs.h>
17 #ifdef PEDANTIC
18 #pragma GCC diagnostic error "-Wpedantic"
19 #endif
20
21 #include <rte_common.h>
22 #include <rte_ether.h>
23 #include <rte_eth_ctrl.h>
24 #include <rte_ethdev_driver.h>
25 #include <rte_flow.h>
26 #include <rte_flow_driver.h>
27 #include <rte_malloc.h>
28 #include <rte_ip.h>
29
30 #include "mlx5.h"
31 #include "mlx5_defs.h"
32 #include "mlx5_prm.h"
33 #include "mlx5_glue.h"
34
35 /* Dev ops structure defined in mlx5.c */
36 extern const struct eth_dev_ops mlx5_dev_ops;
37 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
38
39 /* Pattern outer Layer bits. */
40 #define MLX5_FLOW_LAYER_OUTER_L2 (1u << 0)
41 #define MLX5_FLOW_LAYER_OUTER_L3_IPV4 (1u << 1)
42 #define MLX5_FLOW_LAYER_OUTER_L3_IPV6 (1u << 2)
43 #define MLX5_FLOW_LAYER_OUTER_L4_UDP (1u << 3)
44 #define MLX5_FLOW_LAYER_OUTER_L4_TCP (1u << 4)
45 #define MLX5_FLOW_LAYER_OUTER_VLAN (1u << 5)
46
47 /* Pattern inner Layer bits. */
48 #define MLX5_FLOW_LAYER_INNER_L2 (1u << 6)
49 #define MLX5_FLOW_LAYER_INNER_L3_IPV4 (1u << 7)
50 #define MLX5_FLOW_LAYER_INNER_L3_IPV6 (1u << 8)
51 #define MLX5_FLOW_LAYER_INNER_L4_UDP (1u << 9)
52 #define MLX5_FLOW_LAYER_INNER_L4_TCP (1u << 10)
53 #define MLX5_FLOW_LAYER_INNER_VLAN (1u << 11)
54
55 /* Pattern tunnel Layer bits. */
56 #define MLX5_FLOW_LAYER_VXLAN (1u << 12)
57 #define MLX5_FLOW_LAYER_VXLAN_GPE (1u << 13)
58 #define MLX5_FLOW_LAYER_GRE (1u << 14)
59 #define MLX5_FLOW_LAYER_MPLS (1u << 15)
60
61 /* Outer Masks. */
62 #define MLX5_FLOW_LAYER_OUTER_L3 \
63         (MLX5_FLOW_LAYER_OUTER_L3_IPV4 | MLX5_FLOW_LAYER_OUTER_L3_IPV6)
64 #define MLX5_FLOW_LAYER_OUTER_L4 \
65         (MLX5_FLOW_LAYER_OUTER_L4_UDP | MLX5_FLOW_LAYER_OUTER_L4_TCP)
66 #define MLX5_FLOW_LAYER_OUTER \
67         (MLX5_FLOW_LAYER_OUTER_L2 | MLX5_FLOW_LAYER_OUTER_L3 | \
68          MLX5_FLOW_LAYER_OUTER_L4)
69
70 /* Tunnel Masks. */
71 #define MLX5_FLOW_LAYER_TUNNEL \
72         (MLX5_FLOW_LAYER_VXLAN | MLX5_FLOW_LAYER_VXLAN_GPE | \
73          MLX5_FLOW_LAYER_GRE | MLX5_FLOW_LAYER_MPLS)
74
75 /* Inner Masks. */
76 #define MLX5_FLOW_LAYER_INNER_L3 \
77         (MLX5_FLOW_LAYER_INNER_L3_IPV4 | MLX5_FLOW_LAYER_INNER_L3_IPV6)
78 #define MLX5_FLOW_LAYER_INNER_L4 \
79         (MLX5_FLOW_LAYER_INNER_L4_UDP | MLX5_FLOW_LAYER_INNER_L4_TCP)
80 #define MLX5_FLOW_LAYER_INNER \
81         (MLX5_FLOW_LAYER_INNER_L2 | MLX5_FLOW_LAYER_INNER_L3 | \
82          MLX5_FLOW_LAYER_INNER_L4)
83
84 /* Actions that modify the fate of matching traffic. */
85 #define MLX5_FLOW_FATE_DROP (1u << 0)
86 #define MLX5_FLOW_FATE_QUEUE (1u << 1)
87 #define MLX5_FLOW_FATE_RSS (1u << 2)
88
89 /* Modify a packet. */
90 #define MLX5_FLOW_MOD_FLAG (1u << 0)
91 #define MLX5_FLOW_MOD_MARK (1u << 1)
92 #define MLX5_FLOW_MOD_COUNT (1u << 2)
93
94 /* possible L3 layers protocols filtering. */
95 #define MLX5_IP_PROTOCOL_TCP 6
96 #define MLX5_IP_PROTOCOL_UDP 17
97 #define MLX5_IP_PROTOCOL_GRE 47
98 #define MLX5_IP_PROTOCOL_MPLS 147
99
100 /* Priority reserved for default flows. */
101 #define MLX5_FLOW_PRIO_RSVD ((uint32_t)-1)
102
103 enum mlx5_expansion {
104         MLX5_EXPANSION_ROOT,
105         MLX5_EXPANSION_ROOT_OUTER,
106         MLX5_EXPANSION_ROOT_ETH_VLAN,
107         MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN,
108         MLX5_EXPANSION_OUTER_ETH,
109         MLX5_EXPANSION_OUTER_ETH_VLAN,
110         MLX5_EXPANSION_OUTER_VLAN,
111         MLX5_EXPANSION_OUTER_IPV4,
112         MLX5_EXPANSION_OUTER_IPV4_UDP,
113         MLX5_EXPANSION_OUTER_IPV4_TCP,
114         MLX5_EXPANSION_OUTER_IPV6,
115         MLX5_EXPANSION_OUTER_IPV6_UDP,
116         MLX5_EXPANSION_OUTER_IPV6_TCP,
117         MLX5_EXPANSION_VXLAN,
118         MLX5_EXPANSION_VXLAN_GPE,
119         MLX5_EXPANSION_GRE,
120         MLX5_EXPANSION_MPLS,
121         MLX5_EXPANSION_ETH,
122         MLX5_EXPANSION_ETH_VLAN,
123         MLX5_EXPANSION_VLAN,
124         MLX5_EXPANSION_IPV4,
125         MLX5_EXPANSION_IPV4_UDP,
126         MLX5_EXPANSION_IPV4_TCP,
127         MLX5_EXPANSION_IPV6,
128         MLX5_EXPANSION_IPV6_UDP,
129         MLX5_EXPANSION_IPV6_TCP,
130 };
131
132 /** Supported expansion of items. */
133 static const struct rte_flow_expand_node mlx5_support_expansion[] = {
134         [MLX5_EXPANSION_ROOT] = {
135                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
136                                                  MLX5_EXPANSION_IPV4,
137                                                  MLX5_EXPANSION_IPV6),
138                 .type = RTE_FLOW_ITEM_TYPE_END,
139         },
140         [MLX5_EXPANSION_ROOT_OUTER] = {
141                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_ETH,
142                                                  MLX5_EXPANSION_OUTER_IPV4,
143                                                  MLX5_EXPANSION_OUTER_IPV6),
144                 .type = RTE_FLOW_ITEM_TYPE_END,
145         },
146         [MLX5_EXPANSION_ROOT_ETH_VLAN] = {
147                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH_VLAN),
148                 .type = RTE_FLOW_ITEM_TYPE_END,
149         },
150         [MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN] = {
151                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_ETH_VLAN),
152                 .type = RTE_FLOW_ITEM_TYPE_END,
153         },
154         [MLX5_EXPANSION_OUTER_ETH] = {
155                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_IPV4,
156                                                  MLX5_EXPANSION_OUTER_IPV6,
157                                                  MLX5_EXPANSION_MPLS),
158                 .type = RTE_FLOW_ITEM_TYPE_ETH,
159                 .rss_types = 0,
160         },
161         [MLX5_EXPANSION_OUTER_ETH_VLAN] = {
162                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_VLAN),
163                 .type = RTE_FLOW_ITEM_TYPE_ETH,
164                 .rss_types = 0,
165         },
166         [MLX5_EXPANSION_OUTER_VLAN] = {
167                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_IPV4,
168                                                  MLX5_EXPANSION_OUTER_IPV6),
169                 .type = RTE_FLOW_ITEM_TYPE_VLAN,
170         },
171         [MLX5_EXPANSION_OUTER_IPV4] = {
172                 .next = RTE_FLOW_EXPAND_RSS_NEXT
173                         (MLX5_EXPANSION_OUTER_IPV4_UDP,
174                          MLX5_EXPANSION_OUTER_IPV4_TCP,
175                          MLX5_EXPANSION_GRE),
176                 .type = RTE_FLOW_ITEM_TYPE_IPV4,
177                 .rss_types = ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
178                         ETH_RSS_NONFRAG_IPV4_OTHER,
179         },
180         [MLX5_EXPANSION_OUTER_IPV4_UDP] = {
181                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN,
182                                                  MLX5_EXPANSION_VXLAN_GPE),
183                 .type = RTE_FLOW_ITEM_TYPE_UDP,
184                 .rss_types = ETH_RSS_NONFRAG_IPV4_UDP,
185         },
186         [MLX5_EXPANSION_OUTER_IPV4_TCP] = {
187                 .type = RTE_FLOW_ITEM_TYPE_TCP,
188                 .rss_types = ETH_RSS_NONFRAG_IPV4_TCP,
189         },
190         [MLX5_EXPANSION_OUTER_IPV6] = {
191                 .next = RTE_FLOW_EXPAND_RSS_NEXT
192                         (MLX5_EXPANSION_OUTER_IPV6_UDP,
193                          MLX5_EXPANSION_OUTER_IPV6_TCP),
194                 .type = RTE_FLOW_ITEM_TYPE_IPV6,
195                 .rss_types = ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 |
196                         ETH_RSS_NONFRAG_IPV6_OTHER,
197         },
198         [MLX5_EXPANSION_OUTER_IPV6_UDP] = {
199                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN,
200                                                  MLX5_EXPANSION_VXLAN_GPE),
201                 .type = RTE_FLOW_ITEM_TYPE_UDP,
202                 .rss_types = ETH_RSS_NONFRAG_IPV6_UDP,
203         },
204         [MLX5_EXPANSION_OUTER_IPV6_TCP] = {
205                 .type = RTE_FLOW_ITEM_TYPE_TCP,
206                 .rss_types = ETH_RSS_NONFRAG_IPV6_TCP,
207         },
208         [MLX5_EXPANSION_VXLAN] = {
209                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH),
210                 .type = RTE_FLOW_ITEM_TYPE_VXLAN,
211         },
212         [MLX5_EXPANSION_VXLAN_GPE] = {
213                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
214                                                  MLX5_EXPANSION_IPV4,
215                                                  MLX5_EXPANSION_IPV6),
216                 .type = RTE_FLOW_ITEM_TYPE_VXLAN_GPE,
217         },
218         [MLX5_EXPANSION_GRE] = {
219                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4),
220                 .type = RTE_FLOW_ITEM_TYPE_GRE,
221         },
222         [MLX5_EXPANSION_MPLS] = {
223                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
224                                                  MLX5_EXPANSION_IPV6),
225                 .type = RTE_FLOW_ITEM_TYPE_MPLS,
226         },
227         [MLX5_EXPANSION_ETH] = {
228                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
229                                                  MLX5_EXPANSION_IPV6),
230                 .type = RTE_FLOW_ITEM_TYPE_ETH,
231         },
232         [MLX5_EXPANSION_ETH_VLAN] = {
233                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VLAN),
234                 .type = RTE_FLOW_ITEM_TYPE_ETH,
235         },
236         [MLX5_EXPANSION_VLAN] = {
237                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
238                                                  MLX5_EXPANSION_IPV6),
239                 .type = RTE_FLOW_ITEM_TYPE_VLAN,
240         },
241         [MLX5_EXPANSION_IPV4] = {
242                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4_UDP,
243                                                  MLX5_EXPANSION_IPV4_TCP),
244                 .type = RTE_FLOW_ITEM_TYPE_IPV4,
245                 .rss_types = ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
246                         ETH_RSS_NONFRAG_IPV4_OTHER,
247         },
248         [MLX5_EXPANSION_IPV4_UDP] = {
249                 .type = RTE_FLOW_ITEM_TYPE_UDP,
250                 .rss_types = ETH_RSS_NONFRAG_IPV4_UDP,
251         },
252         [MLX5_EXPANSION_IPV4_TCP] = {
253                 .type = RTE_FLOW_ITEM_TYPE_TCP,
254                 .rss_types = ETH_RSS_NONFRAG_IPV4_TCP,
255         },
256         [MLX5_EXPANSION_IPV6] = {
257                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV6_UDP,
258                                                  MLX5_EXPANSION_IPV6_TCP),
259                 .type = RTE_FLOW_ITEM_TYPE_IPV6,
260                 .rss_types = ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 |
261                         ETH_RSS_NONFRAG_IPV6_OTHER,
262         },
263         [MLX5_EXPANSION_IPV6_UDP] = {
264                 .type = RTE_FLOW_ITEM_TYPE_UDP,
265                 .rss_types = ETH_RSS_NONFRAG_IPV6_UDP,
266         },
267         [MLX5_EXPANSION_IPV6_TCP] = {
268                 .type = RTE_FLOW_ITEM_TYPE_TCP,
269                 .rss_types = ETH_RSS_NONFRAG_IPV6_TCP,
270         },
271 };
272
273 /** Handles information leading to a drop fate. */
274 struct mlx5_flow_verbs {
275         LIST_ENTRY(mlx5_flow_verbs) next;
276         unsigned int size; /**< Size of the attribute. */
277         struct {
278                 struct ibv_flow_attr *attr;
279                 /**< Pointer to the Specification buffer. */
280                 uint8_t *specs; /**< Pointer to the specifications. */
281         };
282         struct ibv_flow *flow; /**< Verbs flow pointer. */
283         struct mlx5_hrxq *hrxq; /**< Hash Rx queue object. */
284         uint64_t hash_fields; /**< Verbs hash Rx queue hash fields. */
285 };
286
287 /* Counters information. */
288 struct mlx5_flow_counter {
289         LIST_ENTRY(mlx5_flow_counter) next; /**< Pointer to the next counter. */
290         uint32_t shared:1; /**< Share counter ID with other flow rules. */
291         uint32_t ref_cnt:31; /**< Reference counter. */
292         uint32_t id; /**< Counter ID. */
293         struct ibv_counter_set *cs; /**< Holds the counters for the rule. */
294         uint64_t hits; /**< Number of packets matched by the rule. */
295         uint64_t bytes; /**< Number of bytes matched by the rule. */
296 };
297
298 /* Flow structure. */
299 struct rte_flow {
300         TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
301         struct rte_flow_attr attributes; /**< User flow attribute. */
302         uint32_t l3_protocol_en:1; /**< Protocol filtering requested. */
303         uint32_t layers;
304         /**< Bit-fields of present layers see MLX5_FLOW_LAYER_*. */
305         uint32_t modifier;
306         /**< Bit-fields of present modifier see MLX5_FLOW_MOD_*. */
307         uint32_t fate;
308         /**< Bit-fields of present fate see MLX5_FLOW_FATE_*. */
309         uint8_t l3_protocol; /**< valid when l3_protocol_en is set. */
310         LIST_HEAD(verbs, mlx5_flow_verbs) verbs; /**< Verbs flows list. */
311         struct mlx5_flow_verbs *cur_verbs;
312         /**< Current Verbs flow structure being filled. */
313         struct mlx5_flow_counter *counter; /**< Holds Verbs flow counter. */
314         struct rte_flow_action_rss rss;/**< RSS context. */
315         uint8_t key[MLX5_RSS_HASH_KEY_LEN]; /**< RSS hash key. */
316         uint16_t (*queue)[]; /**< Destination queues to redirect traffic to. */
317         void *nl_flow; /**< Netlink flow buffer if relevant. */
318 };
319
320 static const struct rte_flow_ops mlx5_flow_ops = {
321         .validate = mlx5_flow_validate,
322         .create = mlx5_flow_create,
323         .destroy = mlx5_flow_destroy,
324         .flush = mlx5_flow_flush,
325         .isolate = mlx5_flow_isolate,
326         .query = mlx5_flow_query,
327 };
328
329 /* Convert FDIR request to Generic flow. */
330 struct mlx5_fdir {
331         struct rte_flow_attr attr;
332         struct rte_flow_action actions[2];
333         struct rte_flow_item items[4];
334         struct rte_flow_item_eth l2;
335         struct rte_flow_item_eth l2_mask;
336         union {
337                 struct rte_flow_item_ipv4 ipv4;
338                 struct rte_flow_item_ipv6 ipv6;
339         } l3;
340         union {
341                 struct rte_flow_item_ipv4 ipv4;
342                 struct rte_flow_item_ipv6 ipv6;
343         } l3_mask;
344         union {
345                 struct rte_flow_item_udp udp;
346                 struct rte_flow_item_tcp tcp;
347         } l4;
348         union {
349                 struct rte_flow_item_udp udp;
350                 struct rte_flow_item_tcp tcp;
351         } l4_mask;
352         struct rte_flow_action_queue queue;
353 };
354
355 /* Verbs specification header. */
356 struct ibv_spec_header {
357         enum ibv_flow_spec_type type;
358         uint16_t size;
359 };
360
361 /*
362  * Number of sub priorities.
363  * For each kind of pattern matching i.e. L2, L3, L4 to have a correct
364  * matching on the NIC (firmware dependent) L4 most have the higher priority
365  * followed by L3 and ending with L2.
366  */
367 #define MLX5_PRIORITY_MAP_L2 2
368 #define MLX5_PRIORITY_MAP_L3 1
369 #define MLX5_PRIORITY_MAP_L4 0
370 #define MLX5_PRIORITY_MAP_MAX 3
371
372 /* Map of Verbs to Flow priority with 8 Verbs priorities. */
373 static const uint32_t priority_map_3[][MLX5_PRIORITY_MAP_MAX] = {
374         { 0, 1, 2 }, { 2, 3, 4 }, { 5, 6, 7 },
375 };
376
377 /* Map of Verbs to Flow priority with 16 Verbs priorities. */
378 static const uint32_t priority_map_5[][MLX5_PRIORITY_MAP_MAX] = {
379         { 0, 1, 2 }, { 3, 4, 5 }, { 6, 7, 8 },
380         { 9, 10, 11 }, { 12, 13, 14 },
381 };
382
383 /* Tunnel information. */
384 struct mlx5_flow_tunnel_info {
385         uint32_t tunnel; /**< Tunnel bit (see MLX5_FLOW_*). */
386         uint32_t ptype; /**< Tunnel Ptype (see RTE_PTYPE_*). */
387 };
388
389 static struct mlx5_flow_tunnel_info tunnels_info[] = {
390         {
391                 .tunnel = MLX5_FLOW_LAYER_VXLAN,
392                 .ptype = RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L4_UDP,
393         },
394         {
395                 .tunnel = MLX5_FLOW_LAYER_VXLAN_GPE,
396                 .ptype = RTE_PTYPE_TUNNEL_VXLAN_GPE | RTE_PTYPE_L4_UDP,
397         },
398         {
399                 .tunnel = MLX5_FLOW_LAYER_GRE,
400                 .ptype = RTE_PTYPE_TUNNEL_GRE,
401         },
402         {
403                 .tunnel = MLX5_FLOW_LAYER_MPLS | MLX5_FLOW_LAYER_OUTER_L4_UDP,
404                 .ptype = RTE_PTYPE_TUNNEL_MPLS_IN_GRE | RTE_PTYPE_L4_UDP,
405         },
406         {
407                 .tunnel = MLX5_FLOW_LAYER_MPLS,
408                 .ptype = RTE_PTYPE_TUNNEL_MPLS_IN_GRE,
409         },
410 };
411
412 /**
413  * Discover the maximum number of priority available.
414  *
415  * @param[in] dev
416  *   Pointer to Ethernet device.
417  *
418  * @return
419  *   number of supported flow priority on success, a negative errno
420  *   value otherwise and rte_errno is set.
421  */
422 int
423 mlx5_flow_discover_priorities(struct rte_eth_dev *dev)
424 {
425         struct {
426                 struct ibv_flow_attr attr;
427                 struct ibv_flow_spec_eth eth;
428                 struct ibv_flow_spec_action_drop drop;
429         } flow_attr = {
430                 .attr = {
431                         .num_of_specs = 2,
432                 },
433                 .eth = {
434                         .type = IBV_FLOW_SPEC_ETH,
435                         .size = sizeof(struct ibv_flow_spec_eth),
436                 },
437                 .drop = {
438                         .size = sizeof(struct ibv_flow_spec_action_drop),
439                         .type = IBV_FLOW_SPEC_ACTION_DROP,
440                 },
441         };
442         struct ibv_flow *flow;
443         struct mlx5_hrxq *drop = mlx5_hrxq_drop_new(dev);
444         uint16_t vprio[] = { 8, 16 };
445         int i;
446         int priority = 0;
447
448         if (!drop) {
449                 rte_errno = ENOTSUP;
450                 return -rte_errno;
451         }
452         for (i = 0; i != RTE_DIM(vprio); i++) {
453                 flow_attr.attr.priority = vprio[i] - 1;
454                 flow = mlx5_glue->create_flow(drop->qp, &flow_attr.attr);
455                 if (!flow)
456                         break;
457                 claim_zero(mlx5_glue->destroy_flow(flow));
458                 priority = vprio[i];
459         }
460         switch (priority) {
461         case 8:
462                 priority = RTE_DIM(priority_map_3);
463                 break;
464         case 16:
465                 priority = RTE_DIM(priority_map_5);
466                 break;
467         default:
468                 rte_errno = ENOTSUP;
469                 DRV_LOG(ERR,
470                         "port %u verbs maximum priority: %d expected 8/16",
471                         dev->data->port_id, vprio[i]);
472                 return -rte_errno;
473         }
474         mlx5_hrxq_drop_release(dev);
475         DRV_LOG(INFO, "port %u flow maximum priority: %d",
476                 dev->data->port_id, priority);
477         return priority;
478 }
479
480 /**
481  * Adjust flow priority.
482  *
483  * @param dev
484  *   Pointer to Ethernet device.
485  * @param flow
486  *   Pointer to an rte flow.
487  */
488 static void
489 mlx5_flow_adjust_priority(struct rte_eth_dev *dev, struct rte_flow *flow)
490 {
491         struct priv *priv = dev->data->dev_private;
492         uint32_t priority = flow->attributes.priority;
493         uint32_t subpriority = flow->cur_verbs->attr->priority;
494
495         switch (priv->config.flow_prio) {
496         case RTE_DIM(priority_map_3):
497                 priority = priority_map_3[priority][subpriority];
498                 break;
499         case RTE_DIM(priority_map_5):
500                 priority = priority_map_5[priority][subpriority];
501                 break;
502         }
503         flow->cur_verbs->attr->priority = priority;
504 }
505
506 /**
507  * Get a flow counter.
508  *
509  * @param[in] dev
510  *   Pointer to Ethernet device.
511  * @param[in] shared
512  *   Indicate if this counter is shared with other flows.
513  * @param[in] id
514  *   Counter identifier.
515  *
516  * @return
517  *   A pointer to the counter, NULL otherwise and rte_errno is set.
518  */
519 static struct mlx5_flow_counter *
520 mlx5_flow_counter_new(struct rte_eth_dev *dev, uint32_t shared, uint32_t id)
521 {
522         struct priv *priv = dev->data->dev_private;
523         struct mlx5_flow_counter *cnt;
524
525         LIST_FOREACH(cnt, &priv->flow_counters, next) {
526                 if (!cnt->shared || cnt->shared != shared)
527                         continue;
528                 if (cnt->id != id)
529                         continue;
530                 cnt->ref_cnt++;
531                 return cnt;
532         }
533 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
534
535         struct mlx5_flow_counter tmpl = {
536                 .shared = shared,
537                 .id = id,
538                 .cs = mlx5_glue->create_counter_set
539                         (priv->ctx,
540                          &(struct ibv_counter_set_init_attr){
541                                  .counter_set_id = id,
542                          }),
543                 .hits = 0,
544                 .bytes = 0,
545         };
546
547         if (!tmpl.cs) {
548                 rte_errno = errno;
549                 return NULL;
550         }
551         cnt = rte_calloc(__func__, 1, sizeof(*cnt), 0);
552         if (!cnt) {
553                 rte_errno = ENOMEM;
554                 return NULL;
555         }
556         *cnt = tmpl;
557         LIST_INSERT_HEAD(&priv->flow_counters, cnt, next);
558         return cnt;
559 #endif
560         rte_errno = ENOTSUP;
561         return NULL;
562 }
563
564 /**
565  * Release a flow counter.
566  *
567  * @param[in] counter
568  *   Pointer to the counter handler.
569  */
570 static void
571 mlx5_flow_counter_release(struct mlx5_flow_counter *counter)
572 {
573         if (--counter->ref_cnt == 0) {
574                 claim_zero(mlx5_glue->destroy_counter_set(counter->cs));
575                 LIST_REMOVE(counter, next);
576                 rte_free(counter);
577         }
578 }
579
580 /**
581  * Verify the @p attributes will be correctly understood by the NIC and store
582  * them in the @p flow if everything is correct.
583  *
584  * @param[in] dev
585  *   Pointer to Ethernet device.
586  * @param[in] attributes
587  *   Pointer to flow attributes
588  * @param[in, out] flow
589  *   Pointer to the rte_flow structure.
590  * @param[out] error
591  *   Pointer to error structure.
592  *
593  * @return
594  *   0 on success, a negative errno value otherwise and rte_errno is set.
595  */
596 static int
597 mlx5_flow_attributes(struct rte_eth_dev *dev,
598                      const struct rte_flow_attr *attributes,
599                      struct rte_flow *flow,
600                      struct rte_flow_error *error)
601 {
602         uint32_t priority_max =
603                 ((struct priv *)dev->data->dev_private)->config.flow_prio - 1;
604
605         if (attributes->group)
606                 return rte_flow_error_set(error, ENOTSUP,
607                                           RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
608                                           NULL,
609                                           "groups is not supported");
610         if (attributes->priority != MLX5_FLOW_PRIO_RSVD &&
611             attributes->priority >= priority_max)
612                 return rte_flow_error_set(error, ENOTSUP,
613                                           RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
614                                           NULL,
615                                           "priority out of range");
616         if (attributes->egress)
617                 return rte_flow_error_set(error, ENOTSUP,
618                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
619                                           NULL,
620                                           "egress is not supported");
621         if (attributes->transfer)
622                 return rte_flow_error_set(error, ENOTSUP,
623                                           RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
624                                           NULL,
625                                           "transfer is not supported");
626         if (!attributes->ingress)
627                 return rte_flow_error_set(error, ENOTSUP,
628                                           RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
629                                           NULL,
630                                           "ingress attribute is mandatory");
631         flow->attributes = *attributes;
632         if (attributes->priority == MLX5_FLOW_PRIO_RSVD)
633                 flow->attributes.priority = priority_max;
634         return 0;
635 }
636
637 /**
638  * Verify the @p item specifications (spec, last, mask) are compatible with the
639  * NIC capabilities.
640  *
641  * @param[in] item
642  *   Item specification.
643  * @param[in] mask
644  *   @p item->mask or flow default bit-masks.
645  * @param[in] nic_mask
646  *   Bit-masks covering supported fields by the NIC to compare with user mask.
647  * @param[in] size
648  *   Bit-masks size in bytes.
649  * @param[out] error
650  *   Pointer to error structure.
651  *
652  * @return
653  *   0 on success, a negative errno value otherwise and rte_errno is set.
654  */
655 static int
656 mlx5_flow_item_acceptable(const struct rte_flow_item *item,
657                           const uint8_t *mask,
658                           const uint8_t *nic_mask,
659                           unsigned int size,
660                           struct rte_flow_error *error)
661 {
662         unsigned int i;
663
664         assert(nic_mask);
665         for (i = 0; i < size; ++i)
666                 if ((nic_mask[i] | mask[i]) != nic_mask[i])
667                         return rte_flow_error_set(error, ENOTSUP,
668                                                   RTE_FLOW_ERROR_TYPE_ITEM,
669                                                   item,
670                                                   "mask enables non supported"
671                                                   " bits");
672         if (!item->spec && (item->mask || item->last))
673                 return rte_flow_error_set(error, EINVAL,
674                                           RTE_FLOW_ERROR_TYPE_ITEM,
675                                           item,
676                                           "mask/last without a spec is not"
677                                           " supported");
678         if (item->spec && item->last) {
679                 uint8_t spec[size];
680                 uint8_t last[size];
681                 unsigned int i;
682                 int ret;
683
684                 for (i = 0; i < size; ++i) {
685                         spec[i] = ((const uint8_t *)item->spec)[i] & mask[i];
686                         last[i] = ((const uint8_t *)item->last)[i] & mask[i];
687                 }
688                 ret = memcmp(spec, last, size);
689                 if (ret != 0)
690                         return rte_flow_error_set(error, ENOTSUP,
691                                                   RTE_FLOW_ERROR_TYPE_ITEM,
692                                                   item,
693                                                   "range is not supported");
694         }
695         return 0;
696 }
697
698 /**
699  * Add a verbs item specification into @p flow.
700  *
701  * @param[in, out] flow
702  *   Pointer to flow structure.
703  * @param[in] src
704  *   Create specification.
705  * @param[in] size
706  *   Size in bytes of the specification to copy.
707  */
708 static void
709 mlx5_flow_spec_verbs_add(struct rte_flow *flow, void *src, unsigned int size)
710 {
711         struct mlx5_flow_verbs *verbs = flow->cur_verbs;
712
713         if (verbs->specs) {
714                 void *dst;
715
716                 dst = (void *)(verbs->specs + verbs->size);
717                 memcpy(dst, src, size);
718                 ++verbs->attr->num_of_specs;
719         }
720         verbs->size += size;
721 }
722
723 /**
724  * Adjust verbs hash fields according to the @p flow information.
725  *
726  * @param[in, out] flow.
727  *   Pointer to flow structure.
728  * @param[in] tunnel
729  *   1 when the hash field is for a tunnel item.
730  * @param[in] layer_types
731  *   ETH_RSS_* types.
732  * @param[in] hash_fields
733  *   Item hash fields.
734  */
735 static void
736 mlx5_flow_verbs_hashfields_adjust(struct rte_flow *flow,
737                                   int tunnel __rte_unused,
738                                   uint32_t layer_types, uint64_t hash_fields)
739 {
740 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
741         hash_fields |= (tunnel ? IBV_RX_HASH_INNER : 0);
742         if (flow->rss.level == 2 && !tunnel)
743                 hash_fields = 0;
744         else if (flow->rss.level < 2 && tunnel)
745                 hash_fields = 0;
746 #endif
747         if (!(flow->rss.types & layer_types))
748                 hash_fields = 0;
749         flow->cur_verbs->hash_fields |= hash_fields;
750 }
751
752 /**
753  * Convert the @p item into a Verbs specification after ensuring the NIC
754  * will understand and process it correctly.
755  * If the necessary size for the conversion is greater than the @p flow_size,
756  * nothing is written in @p flow, the validation is still performed.
757  *
758  * @param[in] item
759  *   Item specification.
760  * @param[in, out] flow
761  *   Pointer to flow structure.
762  * @param[in] flow_size
763  *   Size in bytes of the available space in @p flow, if too small, nothing is
764  *   written.
765  * @param[out] error
766  *   Pointer to error structure.
767  *
768  * @return
769  *   On success the number of bytes consumed/necessary, if the returned value
770  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
771  *   otherwise another call with this returned memory size should be done.
772  *   On error, a negative errno value is returned and rte_errno is set.
773  */
774 static int
775 mlx5_flow_item_eth(const struct rte_flow_item *item, struct rte_flow *flow,
776                    const size_t flow_size, struct rte_flow_error *error)
777 {
778         const struct rte_flow_item_eth *spec = item->spec;
779         const struct rte_flow_item_eth *mask = item->mask;
780         const struct rte_flow_item_eth nic_mask = {
781                 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
782                 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
783                 .type = RTE_BE16(0xffff),
784         };
785         const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
786         const unsigned int size = sizeof(struct ibv_flow_spec_eth);
787         struct ibv_flow_spec_eth eth = {
788                 .type = IBV_FLOW_SPEC_ETH | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
789                 .size = size,
790         };
791         int ret;
792
793         if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
794                             MLX5_FLOW_LAYER_OUTER_L2))
795                 return rte_flow_error_set(error, ENOTSUP,
796                                           RTE_FLOW_ERROR_TYPE_ITEM,
797                                           item,
798                                           "L2 layers already configured");
799         if (!mask)
800                 mask = &rte_flow_item_eth_mask;
801         ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
802                                         (const uint8_t *)&nic_mask,
803                                         sizeof(struct rte_flow_item_eth),
804                                         error);
805         if (ret)
806                 return ret;
807         flow->layers |= tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
808                 MLX5_FLOW_LAYER_OUTER_L2;
809         if (size > flow_size)
810                 return size;
811         if (spec) {
812                 unsigned int i;
813
814                 memcpy(&eth.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
815                 memcpy(&eth.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
816                 eth.val.ether_type = spec->type;
817                 memcpy(&eth.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
818                 memcpy(&eth.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
819                 eth.mask.ether_type = mask->type;
820                 /* Remove unwanted bits from values. */
821                 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
822                         eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
823                         eth.val.src_mac[i] &= eth.mask.src_mac[i];
824                 }
825                 eth.val.ether_type &= eth.mask.ether_type;
826         }
827         flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2;
828         mlx5_flow_spec_verbs_add(flow, &eth, size);
829         return size;
830 }
831
832 /**
833  * Update the VLAN tag in the Verbs Ethernet specification.
834  *
835  * @param[in, out] attr
836  *   Pointer to Verbs attributes structure.
837  * @param[in] eth
838  *   Verbs structure containing the VLAN information to copy.
839  */
840 static void
841 mlx5_flow_item_vlan_update(struct ibv_flow_attr *attr,
842                            struct ibv_flow_spec_eth *eth)
843 {
844         unsigned int i;
845         const enum ibv_flow_spec_type search = eth->type;
846         struct ibv_spec_header *hdr = (struct ibv_spec_header *)
847                 ((uint8_t *)attr + sizeof(struct ibv_flow_attr));
848
849         for (i = 0; i != attr->num_of_specs; ++i) {
850                 if (hdr->type == search) {
851                         struct ibv_flow_spec_eth *e =
852                                 (struct ibv_flow_spec_eth *)hdr;
853
854                         e->val.vlan_tag = eth->val.vlan_tag;
855                         e->mask.vlan_tag = eth->mask.vlan_tag;
856                         e->val.ether_type = eth->val.ether_type;
857                         e->mask.ether_type = eth->mask.ether_type;
858                         break;
859                 }
860                 hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size);
861         }
862 }
863
864 /**
865  * Convert the @p item into @p flow (or by updating the already present
866  * Ethernet Verbs) specification after ensuring the NIC will understand and
867  * process it correctly.
868  * If the necessary size for the conversion is greater than the @p flow_size,
869  * nothing is written in @p flow, the validation is still performed.
870  *
871  * @param[in] item
872  *   Item specification.
873  * @param[in, out] flow
874  *   Pointer to flow structure.
875  * @param[in] flow_size
876  *   Size in bytes of the available space in @p flow, if too small, nothing is
877  *   written.
878  * @param[out] error
879  *   Pointer to error structure.
880  *
881  * @return
882  *   On success the number of bytes consumed/necessary, if the returned value
883  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
884  *   otherwise another call with this returned memory size should be done.
885  *   On error, a negative errno value is returned and rte_errno is set.
886  */
887 static int
888 mlx5_flow_item_vlan(const struct rte_flow_item *item, struct rte_flow *flow,
889                     const size_t flow_size, struct rte_flow_error *error)
890 {
891         const struct rte_flow_item_vlan *spec = item->spec;
892         const struct rte_flow_item_vlan *mask = item->mask;
893         const struct rte_flow_item_vlan nic_mask = {
894                 .tci = RTE_BE16(0x0fff),
895                 .inner_type = RTE_BE16(0xffff),
896         };
897         unsigned int size = sizeof(struct ibv_flow_spec_eth);
898         const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
899         struct ibv_flow_spec_eth eth = {
900                 .type = IBV_FLOW_SPEC_ETH | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
901                 .size = size,
902         };
903         int ret;
904         const uint32_t l34m = tunnel ? (MLX5_FLOW_LAYER_INNER_L3 |
905                                         MLX5_FLOW_LAYER_INNER_L4) :
906                 (MLX5_FLOW_LAYER_OUTER_L3 | MLX5_FLOW_LAYER_OUTER_L4);
907         const uint32_t vlanm = tunnel ? MLX5_FLOW_LAYER_INNER_VLAN :
908                 MLX5_FLOW_LAYER_OUTER_VLAN;
909         const uint32_t l2m = tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
910                 MLX5_FLOW_LAYER_OUTER_L2;
911
912         if (flow->layers & vlanm)
913                 return rte_flow_error_set(error, ENOTSUP,
914                                           RTE_FLOW_ERROR_TYPE_ITEM,
915                                           item,
916                                           "VLAN layer already configured");
917         else if ((flow->layers & l34m) != 0)
918                 return rte_flow_error_set(error, ENOTSUP,
919                                           RTE_FLOW_ERROR_TYPE_ITEM,
920                                           item,
921                                           "L2 layer cannot follow L3/L4 layer");
922         if (!mask)
923                 mask = &rte_flow_item_vlan_mask;
924         ret = mlx5_flow_item_acceptable
925                 (item, (const uint8_t *)mask,
926                  (const uint8_t *)&nic_mask,
927                  sizeof(struct rte_flow_item_vlan), error);
928         if (ret)
929                 return ret;
930         if (spec) {
931                 eth.val.vlan_tag = spec->tci;
932                 eth.mask.vlan_tag = mask->tci;
933                 eth.val.vlan_tag &= eth.mask.vlan_tag;
934                 eth.val.ether_type = spec->inner_type;
935                 eth.mask.ether_type = mask->inner_type;
936                 eth.val.ether_type &= eth.mask.ether_type;
937         }
938         /*
939          * From verbs perspective an empty VLAN is equivalent
940          * to a packet without VLAN layer.
941          */
942         if (!eth.mask.vlan_tag)
943                 return rte_flow_error_set(error, EINVAL,
944                                           RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
945                                           item->spec,
946                                           "VLAN cannot be empty");
947         if (!(flow->layers & l2m)) {
948                 if (size <= flow_size) {
949                         flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2;
950                         mlx5_flow_spec_verbs_add(flow, &eth, size);
951                 }
952         } else {
953                 if (flow->cur_verbs)
954                         mlx5_flow_item_vlan_update(flow->cur_verbs->attr,
955                                                    &eth);
956                 size = 0; /* Only an update is done in eth specification. */
957         }
958         flow->layers |= tunnel ?
959                 (MLX5_FLOW_LAYER_INNER_L2 | MLX5_FLOW_LAYER_INNER_VLAN) :
960                 (MLX5_FLOW_LAYER_OUTER_L2 | MLX5_FLOW_LAYER_OUTER_VLAN);
961         return size;
962 }
963
964 /**
965  * Convert the @p item into a Verbs specification after ensuring the NIC
966  * will understand and process it correctly.
967  * If the necessary size for the conversion is greater than the @p flow_size,
968  * nothing is written in @p flow, the validation is still performed.
969  *
970  * @param[in] item
971  *   Item specification.
972  * @param[in, out] flow
973  *   Pointer to flow structure.
974  * @param[in] flow_size
975  *   Size in bytes of the available space in @p flow, if too small, nothing is
976  *   written.
977  * @param[out] error
978  *   Pointer to error structure.
979  *
980  * @return
981  *   On success the number of bytes consumed/necessary, if the returned value
982  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
983  *   otherwise another call with this returned memory size should be done.
984  *   On error, a negative errno value is returned and rte_errno is set.
985  */
986 static int
987 mlx5_flow_item_ipv4(const struct rte_flow_item *item, struct rte_flow *flow,
988                     const size_t flow_size, struct rte_flow_error *error)
989 {
990         const struct rte_flow_item_ipv4 *spec = item->spec;
991         const struct rte_flow_item_ipv4 *mask = item->mask;
992         const struct rte_flow_item_ipv4 nic_mask = {
993                 .hdr = {
994                         .src_addr = RTE_BE32(0xffffffff),
995                         .dst_addr = RTE_BE32(0xffffffff),
996                         .type_of_service = 0xff,
997                         .next_proto_id = 0xff,
998                 },
999         };
1000         const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
1001         unsigned int size = sizeof(struct ibv_flow_spec_ipv4_ext);
1002         struct ibv_flow_spec_ipv4_ext ipv4 = {
1003                 .type = IBV_FLOW_SPEC_IPV4_EXT |
1004                         (tunnel ? IBV_FLOW_SPEC_INNER : 0),
1005                 .size = size,
1006         };
1007         int ret;
1008
1009         if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
1010                             MLX5_FLOW_LAYER_OUTER_L3))
1011                 return rte_flow_error_set(error, ENOTSUP,
1012                                           RTE_FLOW_ERROR_TYPE_ITEM,
1013                                           item,
1014                                           "multiple L3 layers not supported");
1015         else if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
1016                                  MLX5_FLOW_LAYER_OUTER_L4))
1017                 return rte_flow_error_set(error, ENOTSUP,
1018                                           RTE_FLOW_ERROR_TYPE_ITEM,
1019                                           item,
1020                                           "L3 cannot follow an L4 layer.");
1021         if (!mask)
1022                 mask = &rte_flow_item_ipv4_mask;
1023         ret = mlx5_flow_item_acceptable
1024                 (item, (const uint8_t *)mask,
1025                  (const uint8_t *)&nic_mask,
1026                  sizeof(struct rte_flow_item_ipv4), error);
1027         if (ret < 0)
1028                 return ret;
1029         flow->layers |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
1030                 MLX5_FLOW_LAYER_OUTER_L3_IPV4;
1031         if (spec) {
1032                 ipv4.val = (struct ibv_flow_ipv4_ext_filter){
1033                         .src_ip = spec->hdr.src_addr,
1034                         .dst_ip = spec->hdr.dst_addr,
1035                         .proto = spec->hdr.next_proto_id,
1036                         .tos = spec->hdr.type_of_service,
1037                 };
1038                 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
1039                         .src_ip = mask->hdr.src_addr,
1040                         .dst_ip = mask->hdr.dst_addr,
1041                         .proto = mask->hdr.next_proto_id,
1042                         .tos = mask->hdr.type_of_service,
1043                 };
1044                 /* Remove unwanted bits from values. */
1045                 ipv4.val.src_ip &= ipv4.mask.src_ip;
1046                 ipv4.val.dst_ip &= ipv4.mask.dst_ip;
1047                 ipv4.val.proto &= ipv4.mask.proto;
1048                 ipv4.val.tos &= ipv4.mask.tos;
1049         }
1050         flow->l3_protocol_en = !!ipv4.mask.proto;
1051         flow->l3_protocol = ipv4.val.proto;
1052         if (size <= flow_size) {
1053                 mlx5_flow_verbs_hashfields_adjust
1054                         (flow, tunnel,
1055                          (ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
1056                           ETH_RSS_NONFRAG_IPV4_TCP |
1057                           ETH_RSS_NONFRAG_IPV4_UDP |
1058                           ETH_RSS_NONFRAG_IPV4_OTHER),
1059                          (IBV_RX_HASH_SRC_IPV4 | IBV_RX_HASH_DST_IPV4));
1060                 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L3;
1061                 mlx5_flow_spec_verbs_add(flow, &ipv4, size);
1062         }
1063         return size;
1064 }
1065
1066 /**
1067  * Convert the @p item into a Verbs specification after ensuring the NIC
1068  * will understand and process it correctly.
1069  * If the necessary size for the conversion is greater than the @p flow_size,
1070  * nothing is written in @p flow, the validation is still performed.
1071  *
1072  * @param[in] item
1073  *   Item specification.
1074  * @param[in, out] flow
1075  *   Pointer to flow structure.
1076  * @param[in] flow_size
1077  *   Size in bytes of the available space in @p flow, if too small, nothing is
1078  *   written.
1079  * @param[out] error
1080  *   Pointer to error structure.
1081  *
1082  * @return
1083  *   On success the number of bytes consumed/necessary, if the returned value
1084  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
1085  *   otherwise another call with this returned memory size should be done.
1086  *   On error, a negative errno value is returned and rte_errno is set.
1087  */
1088 static int
1089 mlx5_flow_item_ipv6(const struct rte_flow_item *item, struct rte_flow *flow,
1090                     const size_t flow_size, struct rte_flow_error *error)
1091 {
1092         const struct rte_flow_item_ipv6 *spec = item->spec;
1093         const struct rte_flow_item_ipv6 *mask = item->mask;
1094         const struct rte_flow_item_ipv6 nic_mask = {
1095                 .hdr = {
1096                         .src_addr =
1097                                 "\xff\xff\xff\xff\xff\xff\xff\xff"
1098                                 "\xff\xff\xff\xff\xff\xff\xff\xff",
1099                         .dst_addr =
1100                                 "\xff\xff\xff\xff\xff\xff\xff\xff"
1101                                 "\xff\xff\xff\xff\xff\xff\xff\xff",
1102                         .vtc_flow = RTE_BE32(0xffffffff),
1103                         .proto = 0xff,
1104                         .hop_limits = 0xff,
1105                 },
1106         };
1107         const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
1108         unsigned int size = sizeof(struct ibv_flow_spec_ipv6);
1109         struct ibv_flow_spec_ipv6 ipv6 = {
1110                 .type = IBV_FLOW_SPEC_IPV6 | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
1111                 .size = size,
1112         };
1113         int ret;
1114
1115         if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
1116                             MLX5_FLOW_LAYER_OUTER_L3))
1117                 return rte_flow_error_set(error, ENOTSUP,
1118                                           RTE_FLOW_ERROR_TYPE_ITEM,
1119                                           item,
1120                                           "multiple L3 layers not supported");
1121         else if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
1122                                  MLX5_FLOW_LAYER_OUTER_L4))
1123                 return rte_flow_error_set(error, ENOTSUP,
1124                                           RTE_FLOW_ERROR_TYPE_ITEM,
1125                                           item,
1126                                           "L3 cannot follow an L4 layer.");
1127         /*
1128          * IPv6 is not recognised by the NIC inside a GRE tunnel.
1129          * Such support has to be disabled as the rule will be
1130          * accepted.  Issue reproduced with Mellanox OFED 4.3-3.0.2.1 and
1131          * Mellanox OFED 4.4-1.0.0.0.
1132          */
1133         if (tunnel && flow->layers & MLX5_FLOW_LAYER_GRE)
1134                 return rte_flow_error_set(error, ENOTSUP,
1135                                           RTE_FLOW_ERROR_TYPE_ITEM,
1136                                           item,
1137                                           "IPv6 inside a GRE tunnel is"
1138                                           " not recognised.");
1139         if (!mask)
1140                 mask = &rte_flow_item_ipv6_mask;
1141         ret = mlx5_flow_item_acceptable
1142                 (item, (const uint8_t *)mask,
1143                  (const uint8_t *)&nic_mask,
1144                  sizeof(struct rte_flow_item_ipv6), error);
1145         if (ret < 0)
1146                 return ret;
1147         flow->layers |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
1148                 MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1149         if (spec) {
1150                 unsigned int i;
1151                 uint32_t vtc_flow_val;
1152                 uint32_t vtc_flow_mask;
1153
1154                 memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
1155                        RTE_DIM(ipv6.val.src_ip));
1156                 memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
1157                        RTE_DIM(ipv6.val.dst_ip));
1158                 memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
1159                        RTE_DIM(ipv6.mask.src_ip));
1160                 memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
1161                        RTE_DIM(ipv6.mask.dst_ip));
1162                 vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
1163                 vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
1164                 ipv6.val.flow_label =
1165                         rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >>
1166                                          IPV6_HDR_FL_SHIFT);
1167                 ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >>
1168                                          IPV6_HDR_TC_SHIFT;
1169                 ipv6.val.next_hdr = spec->hdr.proto;
1170                 ipv6.val.hop_limit = spec->hdr.hop_limits;
1171                 ipv6.mask.flow_label =
1172                         rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >>
1173                                          IPV6_HDR_FL_SHIFT);
1174                 ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >>
1175                                           IPV6_HDR_TC_SHIFT;
1176                 ipv6.mask.next_hdr = mask->hdr.proto;
1177                 ipv6.mask.hop_limit = mask->hdr.hop_limits;
1178                 /* Remove unwanted bits from values. */
1179                 for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
1180                         ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
1181                         ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
1182                 }
1183                 ipv6.val.flow_label &= ipv6.mask.flow_label;
1184                 ipv6.val.traffic_class &= ipv6.mask.traffic_class;
1185                 ipv6.val.next_hdr &= ipv6.mask.next_hdr;
1186                 ipv6.val.hop_limit &= ipv6.mask.hop_limit;
1187         }
1188         flow->l3_protocol_en = !!ipv6.mask.next_hdr;
1189         flow->l3_protocol = ipv6.val.next_hdr;
1190         if (size <= flow_size) {
1191                 mlx5_flow_verbs_hashfields_adjust
1192                         (flow, tunnel,
1193                          (ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 |
1194                           ETH_RSS_NONFRAG_IPV6_TCP | ETH_RSS_NONFRAG_IPV6_UDP |
1195                           ETH_RSS_NONFRAG_IPV6_OTHER | ETH_RSS_IPV6_EX |
1196                           ETH_RSS_IPV6_TCP_EX | ETH_RSS_IPV6_UDP_EX),
1197                          (IBV_RX_HASH_SRC_IPV6 | IBV_RX_HASH_DST_IPV6));
1198                 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L3;
1199                 mlx5_flow_spec_verbs_add(flow, &ipv6, size);
1200         }
1201         return size;
1202 }
1203
1204 /**
1205  * Convert the @p item into a Verbs specification after ensuring the NIC
1206  * will understand and process it correctly.
1207  * If the necessary size for the conversion is greater than the @p flow_size,
1208  * nothing is written in @p flow, the validation is still performed.
1209  *
1210  * @param[in] item
1211  *   Item specification.
1212  * @param[in, out] flow
1213  *   Pointer to flow structure.
1214  * @param[in] flow_size
1215  *   Size in bytes of the available space in @p flow, if too small, nothing is
1216  *   written.
1217  * @param[out] error
1218  *   Pointer to error structure.
1219  *
1220  * @return
1221  *   On success the number of bytes consumed/necessary, if the returned value
1222  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
1223  *   otherwise another call with this returned memory size should be done.
1224  *   On error, a negative errno value is returned and rte_errno is set.
1225  */
1226 static int
1227 mlx5_flow_item_udp(const struct rte_flow_item *item, struct rte_flow *flow,
1228                    const size_t flow_size, struct rte_flow_error *error)
1229 {
1230         const struct rte_flow_item_udp *spec = item->spec;
1231         const struct rte_flow_item_udp *mask = item->mask;
1232         const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
1233         unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
1234         struct ibv_flow_spec_tcp_udp udp = {
1235                 .type = IBV_FLOW_SPEC_UDP | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
1236                 .size = size,
1237         };
1238         int ret;
1239
1240         if (flow->l3_protocol_en && flow->l3_protocol != MLX5_IP_PROTOCOL_UDP)
1241                 return rte_flow_error_set(error, ENOTSUP,
1242                                           RTE_FLOW_ERROR_TYPE_ITEM,
1243                                           item,
1244                                           "protocol filtering not compatible"
1245                                           " with UDP layer");
1246         if (!(flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
1247                               MLX5_FLOW_LAYER_OUTER_L3)))
1248                 return rte_flow_error_set(error, ENOTSUP,
1249                                           RTE_FLOW_ERROR_TYPE_ITEM,
1250                                           item,
1251                                           "L3 is mandatory to filter"
1252                                           " on L4");
1253         if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
1254                             MLX5_FLOW_LAYER_OUTER_L4))
1255                 return rte_flow_error_set(error, ENOTSUP,
1256                                           RTE_FLOW_ERROR_TYPE_ITEM,
1257                                           item,
1258                                           "L4 layer is already"
1259                                           " present");
1260         if (!mask)
1261                 mask = &rte_flow_item_udp_mask;
1262         ret = mlx5_flow_item_acceptable
1263                 (item, (const uint8_t *)mask,
1264                  (const uint8_t *)&rte_flow_item_udp_mask,
1265                  sizeof(struct rte_flow_item_udp), error);
1266         if (ret < 0)
1267                 return ret;
1268         flow->layers |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP :
1269                 MLX5_FLOW_LAYER_OUTER_L4_UDP;
1270         if (spec) {
1271                 udp.val.dst_port = spec->hdr.dst_port;
1272                 udp.val.src_port = spec->hdr.src_port;
1273                 udp.mask.dst_port = mask->hdr.dst_port;
1274                 udp.mask.src_port = mask->hdr.src_port;
1275                 /* Remove unwanted bits from values. */
1276                 udp.val.src_port &= udp.mask.src_port;
1277                 udp.val.dst_port &= udp.mask.dst_port;
1278         }
1279         if (size <= flow_size) {
1280                 mlx5_flow_verbs_hashfields_adjust(flow, tunnel, ETH_RSS_UDP,
1281                                                   (IBV_RX_HASH_SRC_PORT_UDP |
1282                                                    IBV_RX_HASH_DST_PORT_UDP));
1283                 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L4;
1284                 mlx5_flow_spec_verbs_add(flow, &udp, size);
1285         }
1286         return size;
1287 }
1288
1289 /**
1290  * Convert the @p item into a Verbs specification after ensuring the NIC
1291  * will understand and process it correctly.
1292  * If the necessary size for the conversion is greater than the @p flow_size,
1293  * nothing is written in @p flow, the validation is still performed.
1294  *
1295  * @param[in] item
1296  *   Item specification.
1297  * @param[in, out] flow
1298  *   Pointer to flow structure.
1299  * @param[in] flow_size
1300  *   Size in bytes of the available space in @p flow, if too small, nothing is
1301  *   written.
1302  * @param[out] error
1303  *   Pointer to error structure.
1304  *
1305  * @return
1306  *   On success the number of bytes consumed/necessary, if the returned value
1307  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
1308  *   otherwise another call with this returned memory size should be done.
1309  *   On error, a negative errno value is returned and rte_errno is set.
1310  */
1311 static int
1312 mlx5_flow_item_tcp(const struct rte_flow_item *item, struct rte_flow *flow,
1313                    const size_t flow_size, struct rte_flow_error *error)
1314 {
1315         const struct rte_flow_item_tcp *spec = item->spec;
1316         const struct rte_flow_item_tcp *mask = item->mask;
1317         const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
1318         unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
1319         struct ibv_flow_spec_tcp_udp tcp = {
1320                 .type = IBV_FLOW_SPEC_TCP | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
1321                 .size = size,
1322         };
1323         int ret;
1324
1325         if (flow->l3_protocol_en && flow->l3_protocol != MLX5_IP_PROTOCOL_TCP)
1326                 return rte_flow_error_set(error, ENOTSUP,
1327                                           RTE_FLOW_ERROR_TYPE_ITEM,
1328                                           item,
1329                                           "protocol filtering not compatible"
1330                                           " with TCP layer");
1331         if (!(flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
1332                               MLX5_FLOW_LAYER_OUTER_L3)))
1333                 return rte_flow_error_set(error, ENOTSUP,
1334                                           RTE_FLOW_ERROR_TYPE_ITEM,
1335                                           item,
1336                                           "L3 is mandatory to filter on L4");
1337         if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
1338                             MLX5_FLOW_LAYER_OUTER_L4))
1339                 return rte_flow_error_set(error, ENOTSUP,
1340                                           RTE_FLOW_ERROR_TYPE_ITEM,
1341                                           item,
1342                                           "L4 layer is already present");
1343         if (!mask)
1344                 mask = &rte_flow_item_tcp_mask;
1345         ret = mlx5_flow_item_acceptable
1346                 (item, (const uint8_t *)mask,
1347                  (const uint8_t *)&rte_flow_item_tcp_mask,
1348                  sizeof(struct rte_flow_item_tcp), error);
1349         if (ret < 0)
1350                 return ret;
1351         flow->layers |=  tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP :
1352                 MLX5_FLOW_LAYER_OUTER_L4_TCP;
1353         if (spec) {
1354                 tcp.val.dst_port = spec->hdr.dst_port;
1355                 tcp.val.src_port = spec->hdr.src_port;
1356                 tcp.mask.dst_port = mask->hdr.dst_port;
1357                 tcp.mask.src_port = mask->hdr.src_port;
1358                 /* Remove unwanted bits from values. */
1359                 tcp.val.src_port &= tcp.mask.src_port;
1360                 tcp.val.dst_port &= tcp.mask.dst_port;
1361         }
1362         if (size <= flow_size) {
1363                 mlx5_flow_verbs_hashfields_adjust(flow, tunnel, ETH_RSS_TCP,
1364                                                   (IBV_RX_HASH_SRC_PORT_TCP |
1365                                                    IBV_RX_HASH_DST_PORT_TCP));
1366                 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L4;
1367                 mlx5_flow_spec_verbs_add(flow, &tcp, size);
1368         }
1369         return size;
1370 }
1371
1372 /**
1373  * Convert the @p item into a Verbs specification after ensuring the NIC
1374  * will understand and process it correctly.
1375  * If the necessary size for the conversion is greater than the @p flow_size,
1376  * nothing is written in @p flow, the validation is still performed.
1377  *
1378  * @param[in] item
1379  *   Item specification.
1380  * @param[in, out] flow
1381  *   Pointer to flow structure.
1382  * @param[in] flow_size
1383  *   Size in bytes of the available space in @p flow, if too small, nothing is
1384  *   written.
1385  * @param[out] error
1386  *   Pointer to error structure.
1387  *
1388  * @return
1389  *   On success the number of bytes consumed/necessary, if the returned value
1390  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
1391  *   otherwise another call with this returned memory size should be done.
1392  *   On error, a negative errno value is returned and rte_errno is set.
1393  */
1394 static int
1395 mlx5_flow_item_vxlan(const struct rte_flow_item *item, struct rte_flow *flow,
1396                      const size_t flow_size, struct rte_flow_error *error)
1397 {
1398         const struct rte_flow_item_vxlan *spec = item->spec;
1399         const struct rte_flow_item_vxlan *mask = item->mask;
1400         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1401         struct ibv_flow_spec_tunnel vxlan = {
1402                 .type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
1403                 .size = size,
1404         };
1405         int ret;
1406         union vni {
1407                 uint32_t vlan_id;
1408                 uint8_t vni[4];
1409         } id = { .vlan_id = 0, };
1410
1411         if (flow->layers & MLX5_FLOW_LAYER_TUNNEL)
1412                 return rte_flow_error_set(error, ENOTSUP,
1413                                           RTE_FLOW_ERROR_TYPE_ITEM,
1414                                           item,
1415                                           "a tunnel is already present");
1416         /*
1417          * Verify only UDPv4 is present as defined in
1418          * https://tools.ietf.org/html/rfc7348
1419          */
1420         if (!(flow->layers & MLX5_FLOW_LAYER_OUTER_L4_UDP))
1421                 return rte_flow_error_set(error, ENOTSUP,
1422                                           RTE_FLOW_ERROR_TYPE_ITEM,
1423                                           item,
1424                                           "no outer UDP layer found");
1425         if (!mask)
1426                 mask = &rte_flow_item_vxlan_mask;
1427         ret = mlx5_flow_item_acceptable
1428                 (item, (const uint8_t *)mask,
1429                  (const uint8_t *)&rte_flow_item_vxlan_mask,
1430                  sizeof(struct rte_flow_item_vxlan), error);
1431         if (ret < 0)
1432                 return ret;
1433         if (spec) {
1434                 memcpy(&id.vni[1], spec->vni, 3);
1435                 vxlan.val.tunnel_id = id.vlan_id;
1436                 memcpy(&id.vni[1], mask->vni, 3);
1437                 vxlan.mask.tunnel_id = id.vlan_id;
1438                 /* Remove unwanted bits from values. */
1439                 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
1440         }
1441         /*
1442          * Tunnel id 0 is equivalent as not adding a VXLAN layer, if
1443          * only this layer is defined in the Verbs specification it is
1444          * interpreted as wildcard and all packets will match this
1445          * rule, if it follows a full stack layer (ex: eth / ipv4 /
1446          * udp), all packets matching the layers before will also
1447          * match this rule.  To avoid such situation, VNI 0 is
1448          * currently refused.
1449          */
1450         if (!vxlan.val.tunnel_id)
1451                 return rte_flow_error_set(error, EINVAL,
1452                                           RTE_FLOW_ERROR_TYPE_ITEM,
1453                                           item,
1454                                           "VXLAN vni cannot be 0");
1455         if (!(flow->layers & MLX5_FLOW_LAYER_OUTER))
1456                 return rte_flow_error_set(error, EINVAL,
1457                                           RTE_FLOW_ERROR_TYPE_ITEM,
1458                                           item,
1459                                           "VXLAN tunnel must be fully defined");
1460         if (size <= flow_size) {
1461                 mlx5_flow_spec_verbs_add(flow, &vxlan, size);
1462                 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2;
1463         }
1464         flow->layers |= MLX5_FLOW_LAYER_VXLAN;
1465         return size;
1466 }
1467
1468 /**
1469  * Convert the @p item into a Verbs specification after ensuring the NIC
1470  * will understand and process it correctly.
1471  * If the necessary size for the conversion is greater than the @p flow_size,
1472  * nothing is written in @p flow, the validation is still performed.
1473  *
1474  * @param dev
1475  *   Pointer to Ethernet device.
1476  * @param[in] item
1477  *   Item specification.
1478  * @param[in, out] flow
1479  *   Pointer to flow structure.
1480  * @param[in] flow_size
1481  *   Size in bytes of the available space in @p flow, if too small, nothing is
1482  *   written.
1483  * @param[out] error
1484  *   Pointer to error structure.
1485  *
1486  * @return
1487  *   On success the number of bytes consumed/necessary, if the returned value
1488  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
1489  *   otherwise another call with this returned memory size should be done.
1490  *   On error, a negative errno value is returned and rte_errno is set.
1491  */
1492 static int
1493 mlx5_flow_item_vxlan_gpe(struct rte_eth_dev *dev,
1494                          const struct rte_flow_item *item,
1495                          struct rte_flow *flow, const size_t flow_size,
1496                          struct rte_flow_error *error)
1497 {
1498         const struct rte_flow_item_vxlan_gpe *spec = item->spec;
1499         const struct rte_flow_item_vxlan_gpe *mask = item->mask;
1500         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1501         struct ibv_flow_spec_tunnel vxlan_gpe = {
1502                 .type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
1503                 .size = size,
1504         };
1505         int ret;
1506         union vni {
1507                 uint32_t vlan_id;
1508                 uint8_t vni[4];
1509         } id = { .vlan_id = 0, };
1510
1511         if (!((struct priv *)dev->data->dev_private)->config.l3_vxlan_en)
1512                 return rte_flow_error_set(error, ENOTSUP,
1513                                           RTE_FLOW_ERROR_TYPE_ITEM,
1514                                           item,
1515                                           "L3 VXLAN is not enabled by device"
1516                                           " parameter and/or not configured in"
1517                                           " firmware");
1518         if (flow->layers & MLX5_FLOW_LAYER_TUNNEL)
1519                 return rte_flow_error_set(error, ENOTSUP,
1520                                           RTE_FLOW_ERROR_TYPE_ITEM,
1521                                           item,
1522                                           "a tunnel is already present");
1523         /*
1524          * Verify only UDPv4 is present as defined in
1525          * https://tools.ietf.org/html/rfc7348
1526          */
1527         if (!(flow->layers & MLX5_FLOW_LAYER_OUTER_L4_UDP))
1528                 return rte_flow_error_set(error, ENOTSUP,
1529                                           RTE_FLOW_ERROR_TYPE_ITEM,
1530                                           item,
1531                                           "no outer UDP layer found");
1532         if (!mask)
1533                 mask = &rte_flow_item_vxlan_gpe_mask;
1534         ret = mlx5_flow_item_acceptable
1535                 (item, (const uint8_t *)mask,
1536                  (const uint8_t *)&rte_flow_item_vxlan_gpe_mask,
1537                  sizeof(struct rte_flow_item_vxlan_gpe), error);
1538         if (ret < 0)
1539                 return ret;
1540         if (spec) {
1541                 memcpy(&id.vni[1], spec->vni, 3);
1542                 vxlan_gpe.val.tunnel_id = id.vlan_id;
1543                 memcpy(&id.vni[1], mask->vni, 3);
1544                 vxlan_gpe.mask.tunnel_id = id.vlan_id;
1545                 if (spec->protocol)
1546                         return rte_flow_error_set
1547                                 (error, EINVAL,
1548                                  RTE_FLOW_ERROR_TYPE_ITEM,
1549                                  item,
1550                                  "VxLAN-GPE protocol not supported");
1551                 /* Remove unwanted bits from values. */
1552                 vxlan_gpe.val.tunnel_id &= vxlan_gpe.mask.tunnel_id;
1553         }
1554         /*
1555          * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
1556          * layer is defined in the Verbs specification it is interpreted as
1557          * wildcard and all packets will match this rule, if it follows a full
1558          * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
1559          * before will also match this rule.  To avoid such situation, VNI 0
1560          * is currently refused.
1561          */
1562         if (!vxlan_gpe.val.tunnel_id)
1563                 return rte_flow_error_set(error, EINVAL,
1564                                           RTE_FLOW_ERROR_TYPE_ITEM,
1565                                           item,
1566                                           "VXLAN-GPE vni cannot be 0");
1567         if (!(flow->layers & MLX5_FLOW_LAYER_OUTER))
1568                 return rte_flow_error_set(error, EINVAL,
1569                                           RTE_FLOW_ERROR_TYPE_ITEM,
1570                                           item,
1571                                           "VXLAN-GPE tunnel must be fully"
1572                                           " defined");
1573         if (size <= flow_size) {
1574                 mlx5_flow_spec_verbs_add(flow, &vxlan_gpe, size);
1575                 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2;
1576         }
1577         flow->layers |= MLX5_FLOW_LAYER_VXLAN_GPE;
1578         return size;
1579 }
1580
1581 /**
1582  * Update the protocol in Verbs IPv4/IPv6 spec.
1583  *
1584  * @param[in, out] attr
1585  *   Pointer to Verbs attributes structure.
1586  * @param[in] search
1587  *   Specification type to search in order to update the IP protocol.
1588  * @param[in] protocol
1589  *   Protocol value to set if none is present in the specification.
1590  */
1591 static void
1592 mlx5_flow_item_gre_ip_protocol_update(struct ibv_flow_attr *attr,
1593                                       enum ibv_flow_spec_type search,
1594                                       uint8_t protocol)
1595 {
1596         unsigned int i;
1597         struct ibv_spec_header *hdr = (struct ibv_spec_header *)
1598                 ((uint8_t *)attr + sizeof(struct ibv_flow_attr));
1599
1600         if (!attr)
1601                 return;
1602         for (i = 0; i != attr->num_of_specs; ++i) {
1603                 if (hdr->type == search) {
1604                         union {
1605                                 struct ibv_flow_spec_ipv4_ext *ipv4;
1606                                 struct ibv_flow_spec_ipv6 *ipv6;
1607                         } ip;
1608
1609                         switch (search) {
1610                         case IBV_FLOW_SPEC_IPV4_EXT:
1611                                 ip.ipv4 = (struct ibv_flow_spec_ipv4_ext *)hdr;
1612                                 if (!ip.ipv4->val.proto) {
1613                                         ip.ipv4->val.proto = protocol;
1614                                         ip.ipv4->mask.proto = 0xff;
1615                                 }
1616                                 break;
1617                         case IBV_FLOW_SPEC_IPV6:
1618                                 ip.ipv6 = (struct ibv_flow_spec_ipv6 *)hdr;
1619                                 if (!ip.ipv6->val.next_hdr) {
1620                                         ip.ipv6->val.next_hdr = protocol;
1621                                         ip.ipv6->mask.next_hdr = 0xff;
1622                                 }
1623                                 break;
1624                         default:
1625                                 break;
1626                         }
1627                         break;
1628                 }
1629                 hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size);
1630         }
1631 }
1632
1633 /**
1634  * Convert the @p item into a Verbs specification after ensuring the NIC
1635  * will understand and process it correctly.
1636  * It will also update the previous L3 layer with the protocol value matching
1637  * the GRE.
1638  * If the necessary size for the conversion is greater than the @p flow_size,
1639  * nothing is written in @p flow, the validation is still performed.
1640  *
1641  * @param dev
1642  *   Pointer to Ethernet device.
1643  * @param[in] item
1644  *   Item specification.
1645  * @param[in, out] flow
1646  *   Pointer to flow structure.
1647  * @param[in] flow_size
1648  *   Size in bytes of the available space in @p flow, if too small, nothing is
1649  *   written.
1650  * @param[out] error
1651  *   Pointer to error structure.
1652  *
1653  * @return
1654  *   On success the number of bytes consumed/necessary, if the returned value
1655  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
1656  *   otherwise another call with this returned memory size should be done.
1657  *   On error, a negative errno value is returned and rte_errno is set.
1658  */
1659 static int
1660 mlx5_flow_item_gre(const struct rte_flow_item *item,
1661                    struct rte_flow *flow, const size_t flow_size,
1662                    struct rte_flow_error *error)
1663 {
1664         struct mlx5_flow_verbs *verbs = flow->cur_verbs;
1665         const struct rte_flow_item_gre *spec = item->spec;
1666         const struct rte_flow_item_gre *mask = item->mask;
1667 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
1668         unsigned int size = sizeof(struct ibv_flow_spec_gre);
1669         struct ibv_flow_spec_gre tunnel = {
1670                 .type = IBV_FLOW_SPEC_GRE,
1671                 .size = size,
1672         };
1673 #else
1674         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1675         struct ibv_flow_spec_tunnel tunnel = {
1676                 .type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
1677                 .size = size,
1678         };
1679 #endif
1680         int ret;
1681
1682         if (flow->l3_protocol_en && flow->l3_protocol != MLX5_IP_PROTOCOL_GRE)
1683                 return rte_flow_error_set(error, ENOTSUP,
1684                                           RTE_FLOW_ERROR_TYPE_ITEM,
1685                                           item,
1686                                           "protocol filtering not compatible"
1687                                           " with this GRE layer");
1688         if (flow->layers & MLX5_FLOW_LAYER_TUNNEL)
1689                 return rte_flow_error_set(error, ENOTSUP,
1690                                           RTE_FLOW_ERROR_TYPE_ITEM,
1691                                           item,
1692                                           "a tunnel is already present");
1693         if (!(flow->layers & MLX5_FLOW_LAYER_OUTER_L3))
1694                 return rte_flow_error_set(error, ENOTSUP,
1695                                           RTE_FLOW_ERROR_TYPE_ITEM,
1696                                           item,
1697                                           "L3 Layer is missing");
1698         if (!mask)
1699                 mask = &rte_flow_item_gre_mask;
1700         ret = mlx5_flow_item_acceptable
1701                 (item, (const uint8_t *)mask,
1702                  (const uint8_t *)&rte_flow_item_gre_mask,
1703                  sizeof(struct rte_flow_item_gre), error);
1704         if (ret < 0)
1705                 return ret;
1706 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
1707         if (spec) {
1708                 tunnel.val.c_ks_res0_ver = spec->c_rsvd0_ver;
1709                 tunnel.val.protocol = spec->protocol;
1710                 tunnel.mask.c_ks_res0_ver = mask->c_rsvd0_ver;
1711                 tunnel.mask.protocol = mask->protocol;
1712                 /* Remove unwanted bits from values. */
1713                 tunnel.val.c_ks_res0_ver &= tunnel.mask.c_ks_res0_ver;
1714                 tunnel.val.protocol &= tunnel.mask.protocol;
1715                 tunnel.val.key &= tunnel.mask.key;
1716         }
1717 #else
1718         if (spec && (spec->protocol & mask->protocol))
1719                 return rte_flow_error_set(error, ENOTSUP,
1720                                           RTE_FLOW_ERROR_TYPE_ITEM,
1721                                           item,
1722                                           "without MPLS support the"
1723                                           " specification cannot be used for"
1724                                           " filtering");
1725 #endif /* !HAVE_IBV_DEVICE_MPLS_SUPPORT */
1726         if (size <= flow_size) {
1727                 if (flow->layers & MLX5_FLOW_LAYER_OUTER_L3_IPV4)
1728                         mlx5_flow_item_gre_ip_protocol_update
1729                                 (verbs->attr, IBV_FLOW_SPEC_IPV4_EXT,
1730                                  MLX5_IP_PROTOCOL_GRE);
1731                 else
1732                         mlx5_flow_item_gre_ip_protocol_update
1733                                 (verbs->attr, IBV_FLOW_SPEC_IPV6,
1734                                  MLX5_IP_PROTOCOL_GRE);
1735                 mlx5_flow_spec_verbs_add(flow, &tunnel, size);
1736                 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2;
1737         }
1738         flow->layers |= MLX5_FLOW_LAYER_GRE;
1739         return size;
1740 }
1741
1742 /**
1743  * Convert the @p item into a Verbs specification after ensuring the NIC
1744  * will understand and process it correctly.
1745  * If the necessary size for the conversion is greater than the @p flow_size,
1746  * nothing is written in @p flow, the validation is still performed.
1747  *
1748  * @param[in] item
1749  *   Item specification.
1750  * @param[in, out] flow
1751  *   Pointer to flow structure.
1752  * @param[in] flow_size
1753  *   Size in bytes of the available space in @p flow, if too small, nothing is
1754  *   written.
1755  * @param[out] error
1756  *   Pointer to error structure.
1757  *
1758  * @return
1759  *   On success the number of bytes consumed/necessary, if the returned value
1760  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
1761  *   otherwise another call with this returned memory size should be done.
1762  *   On error, a negative errno value is returned and rte_errno is set.
1763  */
1764 static int
1765 mlx5_flow_item_mpls(const struct rte_flow_item *item __rte_unused,
1766                     struct rte_flow *flow __rte_unused,
1767                     const size_t flow_size __rte_unused,
1768                     struct rte_flow_error *error)
1769 {
1770 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
1771         const struct rte_flow_item_mpls *spec = item->spec;
1772         const struct rte_flow_item_mpls *mask = item->mask;
1773         unsigned int size = sizeof(struct ibv_flow_spec_mpls);
1774         struct ibv_flow_spec_mpls mpls = {
1775                 .type = IBV_FLOW_SPEC_MPLS,
1776                 .size = size,
1777         };
1778         int ret;
1779
1780         if (flow->l3_protocol_en && flow->l3_protocol != MLX5_IP_PROTOCOL_MPLS)
1781                 return rte_flow_error_set(error, ENOTSUP,
1782                                           RTE_FLOW_ERROR_TYPE_ITEM,
1783                                           item,
1784                                           "protocol filtering not compatible"
1785                                           " with MPLS layer");
1786         /* Multi-tunnel isn't allowed but MPLS over GRE is an exception. */
1787         if (flow->layers & MLX5_FLOW_LAYER_TUNNEL &&
1788             (flow->layers & MLX5_FLOW_LAYER_GRE) != MLX5_FLOW_LAYER_GRE)
1789                 return rte_flow_error_set(error, ENOTSUP,
1790                                           RTE_FLOW_ERROR_TYPE_ITEM,
1791                                           item,
1792                                           "a tunnel is already"
1793                                           " present");
1794         if (!mask)
1795                 mask = &rte_flow_item_mpls_mask;
1796         ret = mlx5_flow_item_acceptable
1797                 (item, (const uint8_t *)mask,
1798                  (const uint8_t *)&rte_flow_item_mpls_mask,
1799                  sizeof(struct rte_flow_item_mpls), error);
1800         if (ret < 0)
1801                 return ret;
1802         if (spec) {
1803                 memcpy(&mpls.val.label, spec, sizeof(mpls.val.label));
1804                 memcpy(&mpls.mask.label, mask, sizeof(mpls.mask.label));
1805                 /* Remove unwanted bits from values.  */
1806                 mpls.val.label &= mpls.mask.label;
1807         }
1808         if (size <= flow_size) {
1809                 mlx5_flow_spec_verbs_add(flow, &mpls, size);
1810                 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2;
1811         }
1812         flow->layers |= MLX5_FLOW_LAYER_MPLS;
1813         return size;
1814 #endif /* !HAVE_IBV_DEVICE_MPLS_SUPPORT */
1815         return rte_flow_error_set(error, ENOTSUP,
1816                                   RTE_FLOW_ERROR_TYPE_ITEM,
1817                                   item,
1818                                   "MPLS is not supported by Verbs, please"
1819                                   " update.");
1820 }
1821
1822 /**
1823  * Convert the @p pattern into a Verbs specifications after ensuring the NIC
1824  * will understand and process it correctly.
1825  * The conversion is performed item per item, each of them is written into
1826  * the @p flow if its size is lesser or equal to @p flow_size.
1827  * Validation and memory consumption computation are still performed until the
1828  * end of @p pattern, unless an error is encountered.
1829  *
1830  * @param[in] pattern
1831  *   Flow pattern.
1832  * @param[in, out] flow
1833  *   Pointer to the rte_flow structure.
1834  * @param[in] flow_size
1835  *   Size in bytes of the available space in @p flow, if too small some
1836  *   garbage may be present.
1837  * @param[out] error
1838  *   Pointer to error structure.
1839  *
1840  * @return
1841  *   On success the number of bytes consumed/necessary, if the returned value
1842  *   is lesser or equal to @p flow_size, the @pattern  has fully been
1843  *   converted, otherwise another call with this returned memory size should
1844  *   be done.
1845  *   On error, a negative errno value is returned and rte_errno is set.
1846  */
1847 static int
1848 mlx5_flow_items(struct rte_eth_dev *dev,
1849                 const struct rte_flow_item pattern[],
1850                 struct rte_flow *flow, const size_t flow_size,
1851                 struct rte_flow_error *error)
1852 {
1853         int remain = flow_size;
1854         size_t size = 0;
1855
1856         for (; pattern->type != RTE_FLOW_ITEM_TYPE_END; pattern++) {
1857                 int ret = 0;
1858
1859                 switch (pattern->type) {
1860                 case RTE_FLOW_ITEM_TYPE_VOID:
1861                         break;
1862                 case RTE_FLOW_ITEM_TYPE_ETH:
1863                         ret = mlx5_flow_item_eth(pattern, flow, remain, error);
1864                         break;
1865                 case RTE_FLOW_ITEM_TYPE_VLAN:
1866                         ret = mlx5_flow_item_vlan(pattern, flow, remain, error);
1867                         break;
1868                 case RTE_FLOW_ITEM_TYPE_IPV4:
1869                         ret = mlx5_flow_item_ipv4(pattern, flow, remain, error);
1870                         break;
1871                 case RTE_FLOW_ITEM_TYPE_IPV6:
1872                         ret = mlx5_flow_item_ipv6(pattern, flow, remain, error);
1873                         break;
1874                 case RTE_FLOW_ITEM_TYPE_UDP:
1875                         ret = mlx5_flow_item_udp(pattern, flow, remain, error);
1876                         break;
1877                 case RTE_FLOW_ITEM_TYPE_TCP:
1878                         ret = mlx5_flow_item_tcp(pattern, flow, remain, error);
1879                         break;
1880                 case RTE_FLOW_ITEM_TYPE_VXLAN:
1881                         ret = mlx5_flow_item_vxlan(pattern, flow, remain,
1882                                                    error);
1883                         break;
1884                 case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
1885                         ret = mlx5_flow_item_vxlan_gpe(dev, pattern, flow,
1886                                                        remain, error);
1887                         break;
1888                 case RTE_FLOW_ITEM_TYPE_GRE:
1889                         ret = mlx5_flow_item_gre(pattern, flow, remain, error);
1890                         break;
1891                 case RTE_FLOW_ITEM_TYPE_MPLS:
1892                         ret = mlx5_flow_item_mpls(pattern, flow, remain, error);
1893                         break;
1894                 default:
1895                         return rte_flow_error_set(error, ENOTSUP,
1896                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1897                                                   pattern,
1898                                                   "item not supported");
1899                 }
1900                 if (ret < 0)
1901                         return ret;
1902                 if (remain > ret)
1903                         remain -= ret;
1904                 else
1905                         remain = 0;
1906                 size += ret;
1907         }
1908         if (!flow->layers) {
1909                 const struct rte_flow_item item = {
1910                         .type = RTE_FLOW_ITEM_TYPE_ETH,
1911                 };
1912
1913                 return mlx5_flow_item_eth(&item, flow, flow_size, error);
1914         }
1915         return size;
1916 }
1917
1918 /**
1919  * Convert the @p action into a Verbs specification after ensuring the NIC
1920  * will understand and process it correctly.
1921  * If the necessary size for the conversion is greater than the @p flow_size,
1922  * nothing is written in @p flow, the validation is still performed.
1923  *
1924  * @param[in] action
1925  *   Action configuration.
1926  * @param[in, out] flow
1927  *   Pointer to flow structure.
1928  * @param[in] flow_size
1929  *   Size in bytes of the available space in @p flow, if too small, nothing is
1930  *   written.
1931  * @param[out] error
1932  *   Pointer to error structure.
1933  *
1934  * @return
1935  *   On success the number of bytes consumed/necessary, if the returned value
1936  *   is lesser or equal to @p flow_size, the @p action has fully been
1937  *   converted, otherwise another call with this returned memory size should
1938  *   be done.
1939  *   On error, a negative errno value is returned and rte_errno is set.
1940  */
1941 static int
1942 mlx5_flow_action_drop(const struct rte_flow_action *action,
1943                       struct rte_flow *flow, const size_t flow_size,
1944                       struct rte_flow_error *error)
1945 {
1946         unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
1947         struct ibv_flow_spec_action_drop drop = {
1948                         .type = IBV_FLOW_SPEC_ACTION_DROP,
1949                         .size = size,
1950         };
1951
1952         if (flow->fate)
1953                 return rte_flow_error_set(error, ENOTSUP,
1954                                           RTE_FLOW_ERROR_TYPE_ACTION,
1955                                           action,
1956                                           "multiple fate actions are not"
1957                                           " supported");
1958         if (flow->modifier & (MLX5_FLOW_MOD_FLAG | MLX5_FLOW_MOD_MARK))
1959                 return rte_flow_error_set(error, ENOTSUP,
1960                                           RTE_FLOW_ERROR_TYPE_ACTION,
1961                                           action,
1962                                           "drop is not compatible with"
1963                                           " flag/mark action");
1964         if (size < flow_size)
1965                 mlx5_flow_spec_verbs_add(flow, &drop, size);
1966         flow->fate |= MLX5_FLOW_FATE_DROP;
1967         return size;
1968 }
1969
1970 /**
1971  * Convert the @p action into @p flow after ensuring the NIC will understand
1972  * and process it correctly.
1973  *
1974  * @param[in] dev
1975  *   Pointer to Ethernet device structure.
1976  * @param[in] action
1977  *   Action configuration.
1978  * @param[in, out] flow
1979  *   Pointer to flow structure.
1980  * @param[out] error
1981  *   Pointer to error structure.
1982  *
1983  * @return
1984  *   0 on success, a negative errno value otherwise and rte_errno is set.
1985  */
1986 static int
1987 mlx5_flow_action_queue(struct rte_eth_dev *dev,
1988                        const struct rte_flow_action *action,
1989                        struct rte_flow *flow,
1990                        struct rte_flow_error *error)
1991 {
1992         struct priv *priv = dev->data->dev_private;
1993         const struct rte_flow_action_queue *queue = action->conf;
1994
1995         if (flow->fate)
1996                 return rte_flow_error_set(error, ENOTSUP,
1997                                           RTE_FLOW_ERROR_TYPE_ACTION,
1998                                           action,
1999                                           "multiple fate actions are not"
2000                                           " supported");
2001         if (queue->index >= priv->rxqs_n)
2002                 return rte_flow_error_set(error, EINVAL,
2003                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
2004                                           &queue->index,
2005                                           "queue index out of range");
2006         if (!(*priv->rxqs)[queue->index])
2007                 return rte_flow_error_set(error, EINVAL,
2008                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
2009                                           &queue->index,
2010                                           "queue is not configured");
2011         if (flow->queue)
2012                 (*flow->queue)[0] = queue->index;
2013         flow->rss.queue_num = 1;
2014         flow->fate |= MLX5_FLOW_FATE_QUEUE;
2015         return 0;
2016 }
2017
2018 /**
2019  * Ensure the @p action will be understood and used correctly by the  NIC.
2020  *
2021  * @param dev
2022  *   Pointer to Ethernet device structure.
2023  * @param action[in]
2024  *   Pointer to flow actions array.
2025  * @param flow[in, out]
2026  *   Pointer to the rte_flow structure.
2027  * @param error[in, out]
2028  *   Pointer to error structure.
2029  *
2030  * @return
2031  *   On success @p flow->queue array and @p flow->rss are filled and valid.
2032  *   On error, a negative errno value is returned and rte_errno is set.
2033  */
2034 static int
2035 mlx5_flow_action_rss(struct rte_eth_dev *dev,
2036                      const struct rte_flow_action *action,
2037                      struct rte_flow *flow,
2038                      struct rte_flow_error *error)
2039 {
2040         struct priv *priv = dev->data->dev_private;
2041         const struct rte_flow_action_rss *rss = action->conf;
2042         unsigned int i;
2043
2044         if (flow->fate)
2045                 return rte_flow_error_set(error, ENOTSUP,
2046                                           RTE_FLOW_ERROR_TYPE_ACTION,
2047                                           action,
2048                                           "multiple fate actions are not"
2049                                           " supported");
2050         if (rss->func != RTE_ETH_HASH_FUNCTION_DEFAULT &&
2051             rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ)
2052                 return rte_flow_error_set(error, ENOTSUP,
2053                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
2054                                           &rss->func,
2055                                           "RSS hash function not supported");
2056 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
2057         if (rss->level > 2)
2058 #else
2059         if (rss->level > 1)
2060 #endif
2061                 return rte_flow_error_set(error, ENOTSUP,
2062                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
2063                                           &rss->level,
2064                                           "tunnel RSS is not supported");
2065         if (rss->key_len < MLX5_RSS_HASH_KEY_LEN)
2066                 return rte_flow_error_set(error, ENOTSUP,
2067                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
2068                                           &rss->key_len,
2069                                           "RSS hash key too small");
2070         if (rss->key_len > MLX5_RSS_HASH_KEY_LEN)
2071                 return rte_flow_error_set(error, ENOTSUP,
2072                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
2073                                           &rss->key_len,
2074                                           "RSS hash key too large");
2075         if (!rss->queue_num)
2076                 return rte_flow_error_set(error, ENOTSUP,
2077                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
2078                                           rss,
2079                                           "no queues were provided for RSS");
2080         if (rss->queue_num > priv->config.ind_table_max_size)
2081                 return rte_flow_error_set(error, ENOTSUP,
2082                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
2083                                           &rss->queue_num,
2084                                           "number of queues too large");
2085         if (rss->types & MLX5_RSS_HF_MASK)
2086                 return rte_flow_error_set(error, ENOTSUP,
2087                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
2088                                           &rss->types,
2089                                           "some RSS protocols are not"
2090                                           " supported");
2091         for (i = 0; i != rss->queue_num; ++i) {
2092                 if (rss->queue[i] >= priv->rxqs_n)
2093                         return rte_flow_error_set
2094                                 (error, EINVAL,
2095                                  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
2096                                  rss,
2097                                  "queue index out of range");
2098                 if (!(*priv->rxqs)[rss->queue[i]])
2099                         return rte_flow_error_set
2100                                 (error, EINVAL,
2101                                  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
2102                                  &rss->queue[i],
2103                                  "queue is not configured");
2104         }
2105         if (flow->queue)
2106                 memcpy((*flow->queue), rss->queue,
2107                        rss->queue_num * sizeof(uint16_t));
2108         flow->rss.queue_num = rss->queue_num;
2109         memcpy(flow->key, rss->key, MLX5_RSS_HASH_KEY_LEN);
2110         flow->rss.types = rss->types;
2111         flow->rss.level = rss->level;
2112         flow->fate |= MLX5_FLOW_FATE_RSS;
2113         return 0;
2114 }
2115
2116 /**
2117  * Convert the @p action into a Verbs specification after ensuring the NIC
2118  * will understand and process it correctly.
2119  * If the necessary size for the conversion is greater than the @p flow_size,
2120  * nothing is written in @p flow, the validation is still performed.
2121  *
2122  * @param[in] action
2123  *   Action configuration.
2124  * @param[in, out] flow
2125  *   Pointer to flow structure.
2126  * @param[in] flow_size
2127  *   Size in bytes of the available space in @p flow, if too small, nothing is
2128  *   written.
2129  * @param[out] error
2130  *   Pointer to error structure.
2131  *
2132  * @return
2133  *   On success the number of bytes consumed/necessary, if the returned value
2134  *   is lesser or equal to @p flow_size, the @p action has fully been
2135  *   converted, otherwise another call with this returned memory size should
2136  *   be done.
2137  *   On error, a negative errno value is returned and rte_errno is set.
2138  */
2139 static int
2140 mlx5_flow_action_flag(const struct rte_flow_action *action,
2141                       struct rte_flow *flow, const size_t flow_size,
2142                       struct rte_flow_error *error)
2143 {
2144         unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
2145         struct ibv_flow_spec_action_tag tag = {
2146                 .type = IBV_FLOW_SPEC_ACTION_TAG,
2147                 .size = size,
2148                 .tag_id = mlx5_flow_mark_set(MLX5_FLOW_MARK_DEFAULT),
2149         };
2150         struct mlx5_flow_verbs *verbs = flow->cur_verbs;
2151
2152         if (flow->modifier & MLX5_FLOW_MOD_FLAG)
2153                 return rte_flow_error_set(error, ENOTSUP,
2154                                           RTE_FLOW_ERROR_TYPE_ACTION,
2155                                           action,
2156                                           "flag action already present");
2157         if (flow->fate & MLX5_FLOW_FATE_DROP)
2158                 return rte_flow_error_set(error, ENOTSUP,
2159                                           RTE_FLOW_ERROR_TYPE_ACTION,
2160                                           action,
2161                                           "flag is not compatible with drop"
2162                                           " action");
2163         if (flow->modifier & MLX5_FLOW_MOD_MARK)
2164                 size = 0;
2165         else if (size <= flow_size && verbs)
2166                 mlx5_flow_spec_verbs_add(flow, &tag, size);
2167         flow->modifier |= MLX5_FLOW_MOD_FLAG;
2168         return size;
2169 }
2170
2171 /**
2172  * Update verbs specification to modify the flag to mark.
2173  *
2174  * @param[in, out] verbs
2175  *   Pointer to the mlx5_flow_verbs structure.
2176  * @param[in] mark_id
2177  *   Mark identifier to replace the flag.
2178  */
2179 static void
2180 mlx5_flow_verbs_mark_update(struct mlx5_flow_verbs *verbs, uint32_t mark_id)
2181 {
2182         struct ibv_spec_header *hdr;
2183         int i;
2184
2185         if (!verbs)
2186                 return;
2187         /* Update Verbs specification. */
2188         hdr = (struct ibv_spec_header *)verbs->specs;
2189         if (!hdr)
2190                 return;
2191         for (i = 0; i != verbs->attr->num_of_specs; ++i) {
2192                 if (hdr->type == IBV_FLOW_SPEC_ACTION_TAG) {
2193                         struct ibv_flow_spec_action_tag *t =
2194                                 (struct ibv_flow_spec_action_tag *)hdr;
2195
2196                         t->tag_id = mlx5_flow_mark_set(mark_id);
2197                 }
2198                 hdr = (struct ibv_spec_header *)((uintptr_t)hdr + hdr->size);
2199         }
2200 }
2201
2202 /**
2203  * Convert the @p action into @p flow (or by updating the already present
2204  * Flag Verbs specification) after ensuring the NIC will understand and
2205  * process it correctly.
2206  * If the necessary size for the conversion is greater than the @p flow_size,
2207  * nothing is written in @p flow, the validation is still performed.
2208  *
2209  * @param[in] action
2210  *   Action configuration.
2211  * @param[in, out] flow
2212  *   Pointer to flow structure.
2213  * @param[in] flow_size
2214  *   Size in bytes of the available space in @p flow, if too small, nothing is
2215  *   written.
2216  * @param[out] error
2217  *   Pointer to error structure.
2218  *
2219  * @return
2220  *   On success the number of bytes consumed/necessary, if the returned value
2221  *   is lesser or equal to @p flow_size, the @p action has fully been
2222  *   converted, otherwise another call with this returned memory size should
2223  *   be done.
2224  *   On error, a negative errno value is returned and rte_errno is set.
2225  */
2226 static int
2227 mlx5_flow_action_mark(const struct rte_flow_action *action,
2228                       struct rte_flow *flow, const size_t flow_size,
2229                       struct rte_flow_error *error)
2230 {
2231         const struct rte_flow_action_mark *mark = action->conf;
2232         unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
2233         struct ibv_flow_spec_action_tag tag = {
2234                 .type = IBV_FLOW_SPEC_ACTION_TAG,
2235                 .size = size,
2236         };
2237         struct mlx5_flow_verbs *verbs = flow->cur_verbs;
2238
2239         if (!mark)
2240                 return rte_flow_error_set(error, EINVAL,
2241                                           RTE_FLOW_ERROR_TYPE_ACTION,
2242                                           action,
2243                                           "configuration cannot be null");
2244         if (mark->id >= MLX5_FLOW_MARK_MAX)
2245                 return rte_flow_error_set(error, EINVAL,
2246                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
2247                                           &mark->id,
2248                                           "mark id must in 0 <= id < "
2249                                           RTE_STR(MLX5_FLOW_MARK_MAX));
2250         if (flow->modifier & MLX5_FLOW_MOD_MARK)
2251                 return rte_flow_error_set(error, ENOTSUP,
2252                                           RTE_FLOW_ERROR_TYPE_ACTION,
2253                                           action,
2254                                           "mark action already present");
2255         if (flow->fate & MLX5_FLOW_FATE_DROP)
2256                 return rte_flow_error_set(error, ENOTSUP,
2257                                           RTE_FLOW_ERROR_TYPE_ACTION,
2258                                           action,
2259                                           "mark is not compatible with drop"
2260                                           " action");
2261         if (flow->modifier & MLX5_FLOW_MOD_FLAG) {
2262                 mlx5_flow_verbs_mark_update(verbs, mark->id);
2263                 size = 0;
2264         } else if (size <= flow_size) {
2265                 tag.tag_id = mlx5_flow_mark_set(mark->id);
2266                 mlx5_flow_spec_verbs_add(flow, &tag, size);
2267         }
2268         flow->modifier |= MLX5_FLOW_MOD_MARK;
2269         return size;
2270 }
2271
2272 /**
2273  * Convert the @p action into a Verbs specification after ensuring the NIC
2274  * will understand and process it correctly.
2275  * If the necessary size for the conversion is greater than the @p flow_size,
2276  * nothing is written in @p flow, the validation is still performed.
2277  *
2278  * @param action[in]
2279  *   Action configuration.
2280  * @param flow[in, out]
2281  *   Pointer to flow structure.
2282  * @param flow_size[in]
2283  *   Size in bytes of the available space in @p flow, if too small, nothing is
2284  *   written.
2285  * @param error[int, out]
2286  *   Pointer to error structure.
2287  *
2288  * @return
2289  *   On success the number of bytes consumed/necessary, if the returned value
2290  *   is lesser or equal to @p flow_size, the @p action has fully been
2291  *   converted, otherwise another call with this returned memory size should
2292  *   be done.
2293  *   On error, a negative errno value is returned and rte_errno is set.
2294  */
2295 static int
2296 mlx5_flow_action_count(struct rte_eth_dev *dev,
2297                        const struct rte_flow_action *action,
2298                        struct rte_flow *flow,
2299                        const size_t flow_size __rte_unused,
2300                        struct rte_flow_error *error)
2301 {
2302         const struct rte_flow_action_count *count = action->conf;
2303 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
2304         unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
2305         struct ibv_flow_spec_counter_action counter = {
2306                 .type = IBV_FLOW_SPEC_ACTION_COUNT,
2307                 .size = size,
2308         };
2309 #endif
2310
2311         if (!flow->counter) {
2312                 flow->counter = mlx5_flow_counter_new(dev, count->shared,
2313                                                       count->id);
2314                 if (!flow->counter)
2315                         return rte_flow_error_set(error, ENOTSUP,
2316                                                   RTE_FLOW_ERROR_TYPE_ACTION,
2317                                                   action,
2318                                                   "cannot get counter"
2319                                                   " context.");
2320         }
2321         if (!((struct priv *)dev->data->dev_private)->config.flow_counter_en)
2322                 return rte_flow_error_set(error, ENOTSUP,
2323                                           RTE_FLOW_ERROR_TYPE_ACTION,
2324                                           action,
2325                                           "flow counters are not supported.");
2326         flow->modifier |= MLX5_FLOW_MOD_COUNT;
2327 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
2328         counter.counter_set_handle = flow->counter->cs->handle;
2329         if (size <= flow_size)
2330                 mlx5_flow_spec_verbs_add(flow, &counter, size);
2331         return size;
2332 #endif
2333         return 0;
2334 }
2335
2336 /**
2337  * Convert the @p action into @p flow after ensuring the NIC will understand
2338  * and process it correctly.
2339  * The conversion is performed action per action, each of them is written into
2340  * the @p flow if its size is lesser or equal to @p flow_size.
2341  * Validation and memory consumption computation are still performed until the
2342  * end of @p action, unless an error is encountered.
2343  *
2344  * @param[in] dev
2345  *   Pointer to Ethernet device structure.
2346  * @param[in] actions
2347  *   Pointer to flow actions array.
2348  * @param[in, out] flow
2349  *   Pointer to the rte_flow structure.
2350  * @param[in] flow_size
2351  *   Size in bytes of the available space in @p flow, if too small some
2352  *   garbage may be present.
2353  * @param[out] error
2354  *   Pointer to error structure.
2355  *
2356  * @return
2357  *   On success the number of bytes consumed/necessary, if the returned value
2358  *   is lesser or equal to @p flow_size, the @p actions has fully been
2359  *   converted, otherwise another call with this returned memory size should
2360  *   be done.
2361  *   On error, a negative errno value is returned and rte_errno is set.
2362  */
2363 static int
2364 mlx5_flow_actions(struct rte_eth_dev *dev,
2365                   const struct rte_flow_action actions[],
2366                   struct rte_flow *flow, const size_t flow_size,
2367                   struct rte_flow_error *error)
2368 {
2369         size_t size = 0;
2370         int remain = flow_size;
2371         int ret = 0;
2372
2373         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
2374                 switch (actions->type) {
2375                 case RTE_FLOW_ACTION_TYPE_VOID:
2376                         break;
2377                 case RTE_FLOW_ACTION_TYPE_FLAG:
2378                         ret = mlx5_flow_action_flag(actions, flow, remain,
2379                                                     error);
2380                         break;
2381                 case RTE_FLOW_ACTION_TYPE_MARK:
2382                         ret = mlx5_flow_action_mark(actions, flow, remain,
2383                                                     error);
2384                         break;
2385                 case RTE_FLOW_ACTION_TYPE_DROP:
2386                         ret = mlx5_flow_action_drop(actions, flow, remain,
2387                                                     error);
2388                         break;
2389                 case RTE_FLOW_ACTION_TYPE_QUEUE:
2390                         ret = mlx5_flow_action_queue(dev, actions, flow, error);
2391                         break;
2392                 case RTE_FLOW_ACTION_TYPE_RSS:
2393                         ret = mlx5_flow_action_rss(dev, actions, flow, error);
2394                         break;
2395                 case RTE_FLOW_ACTION_TYPE_COUNT:
2396                         ret = mlx5_flow_action_count(dev, actions, flow, remain,
2397                                                      error);
2398                         break;
2399                 default:
2400                         return rte_flow_error_set(error, ENOTSUP,
2401                                                   RTE_FLOW_ERROR_TYPE_ACTION,
2402                                                   actions,
2403                                                   "action not supported");
2404                 }
2405                 if (ret < 0)
2406                         return ret;
2407                 if (remain > ret)
2408                         remain -= ret;
2409                 else
2410                         remain = 0;
2411                 size += ret;
2412         }
2413         if (!flow->fate)
2414                 return rte_flow_error_set(error, ENOTSUP,
2415                                           RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2416                                           NULL,
2417                                           "no fate action found");
2418         return size;
2419 }
2420
2421 /**
2422  * Validate flow rule and fill flow structure accordingly.
2423  *
2424  * @param dev
2425  *   Pointer to Ethernet device.
2426  * @param[out] flow
2427  *   Pointer to flow structure.
2428  * @param flow_size
2429  *   Size of allocated space for @p flow.
2430  * @param[in] attr
2431  *   Flow rule attributes.
2432  * @param[in] pattern
2433  *   Pattern specification (list terminated by the END pattern item).
2434  * @param[in] actions
2435  *   Associated actions (list terminated by the END action).
2436  * @param[out] error
2437  *   Perform verbose error reporting if not NULL.
2438  *
2439  * @return
2440  *   A positive value representing the size of the flow object in bytes
2441  *   regardless of @p flow_size on success, a negative errno value otherwise
2442  *   and rte_errno is set.
2443  */
2444 static int
2445 mlx5_flow_merge_switch(struct rte_eth_dev *dev,
2446                        struct rte_flow *flow,
2447                        size_t flow_size,
2448                        const struct rte_flow_attr *attr,
2449                        const struct rte_flow_item pattern[],
2450                        const struct rte_flow_action actions[],
2451                        struct rte_flow_error *error)
2452 {
2453         unsigned int n = mlx5_dev_to_port_id(dev->device, NULL, 0);
2454         uint16_t port_id[!n + n];
2455         struct mlx5_nl_flow_ptoi ptoi[!n + n + 1];
2456         size_t off = RTE_ALIGN_CEIL(sizeof(*flow), alignof(max_align_t));
2457         unsigned int i;
2458         unsigned int own = 0;
2459         int ret;
2460
2461         /* At least one port is needed when no switch domain is present. */
2462         if (!n) {
2463                 n = 1;
2464                 port_id[0] = dev->data->port_id;
2465         } else {
2466                 n = RTE_MIN(mlx5_dev_to_port_id(dev->device, port_id, n), n);
2467         }
2468         for (i = 0; i != n; ++i) {
2469                 struct rte_eth_dev_info dev_info;
2470
2471                 rte_eth_dev_info_get(port_id[i], &dev_info);
2472                 if (port_id[i] == dev->data->port_id)
2473                         own = i;
2474                 ptoi[i].port_id = port_id[i];
2475                 ptoi[i].ifindex = dev_info.if_index;
2476         }
2477         /* Ensure first entry of ptoi[] is the current device. */
2478         if (own) {
2479                 ptoi[n] = ptoi[0];
2480                 ptoi[0] = ptoi[own];
2481                 ptoi[own] = ptoi[n];
2482         }
2483         /* An entry with zero ifindex terminates ptoi[]. */
2484         ptoi[n].port_id = 0;
2485         ptoi[n].ifindex = 0;
2486         if (flow_size < off)
2487                 flow_size = 0;
2488         ret = mlx5_nl_flow_transpose((uint8_t *)flow + off,
2489                                      flow_size ? flow_size - off : 0,
2490                                      ptoi, attr, pattern, actions, error);
2491         if (ret < 0)
2492                 return ret;
2493         if (flow_size) {
2494                 *flow = (struct rte_flow){
2495                         .attributes = *attr,
2496                         .nl_flow = (uint8_t *)flow + off,
2497                 };
2498                 /*
2499                  * Generate a reasonably unique handle based on the address
2500                  * of the target buffer.
2501                  *
2502                  * This is straightforward on 32-bit systems where the flow
2503                  * pointer can be used directly. Otherwise, its least
2504                  * significant part is taken after shifting it by the
2505                  * previous power of two of the pointed buffer size.
2506                  */
2507                 if (sizeof(flow) <= 4)
2508                         mlx5_nl_flow_brand(flow->nl_flow, (uintptr_t)flow);
2509                 else
2510                         mlx5_nl_flow_brand
2511                                 (flow->nl_flow,
2512                                  (uintptr_t)flow >>
2513                                  rte_log2_u32(rte_align32prevpow2(flow_size)));
2514         }
2515         return off + ret;
2516 }
2517
2518 static unsigned int
2519 mlx5_find_graph_root(const struct rte_flow_item pattern[], uint32_t rss_level)
2520 {
2521         const struct rte_flow_item *item;
2522         unsigned int has_vlan = 0;
2523
2524         for (item = pattern; item->type != RTE_FLOW_ITEM_TYPE_END; item++) {
2525                 if (item->type == RTE_FLOW_ITEM_TYPE_VLAN) {
2526                         has_vlan = 1;
2527                         break;
2528                 }
2529         }
2530         if (has_vlan)
2531                 return rss_level < 2 ? MLX5_EXPANSION_ROOT_ETH_VLAN :
2532                                        MLX5_EXPANSION_ROOT_OUTER_ETH_VLAN;
2533         return rss_level < 2 ? MLX5_EXPANSION_ROOT :
2534                                MLX5_EXPANSION_ROOT_OUTER;
2535 }
2536
2537 /**
2538  * Convert the @p attributes, @p pattern, @p action, into an flow for the NIC
2539  * after ensuring the NIC will understand and process it correctly.
2540  * The conversion is only performed item/action per item/action, each of
2541  * them is written into the @p flow if its size is lesser or equal to @p
2542  * flow_size.
2543  * Validation and memory consumption computation are still performed until the
2544  * end, unless an error is encountered.
2545  *
2546  * @param[in] dev
2547  *   Pointer to Ethernet device.
2548  * @param[in, out] flow
2549  *   Pointer to flow structure.
2550  * @param[in] flow_size
2551  *   Size in bytes of the available space in @p flow, if too small some
2552  *   garbage may be present.
2553  * @param[in] attributes
2554  *   Flow rule attributes.
2555  * @param[in] pattern
2556  *   Pattern specification (list terminated by the END pattern item).
2557  * @param[in] actions
2558  *   Associated actions (list terminated by the END action).
2559  * @param[out] error
2560  *   Perform verbose error reporting if not NULL.
2561  *
2562  * @return
2563  *   On success the number of bytes consumed/necessary, if the returned value
2564  *   is lesser or equal to @p flow_size, the flow has fully been converted and
2565  *   can be applied, otherwise another call with this returned memory size
2566  *   should be done.
2567  *   On error, a negative errno value is returned and rte_errno is set.
2568  */
2569 static int
2570 mlx5_flow_merge(struct rte_eth_dev *dev, struct rte_flow *flow,
2571                 const size_t flow_size,
2572                 const struct rte_flow_attr *attributes,
2573                 const struct rte_flow_item pattern[],
2574                 const struct rte_flow_action actions[],
2575                 struct rte_flow_error *error)
2576 {
2577         struct rte_flow local_flow = { .layers = 0, };
2578         size_t size = sizeof(*flow);
2579         union {
2580                 struct rte_flow_expand_rss buf;
2581                 uint8_t buffer[2048];
2582         } expand_buffer;
2583         struct rte_flow_expand_rss *buf = &expand_buffer.buf;
2584         struct mlx5_flow_verbs *original_verbs = NULL;
2585         size_t original_verbs_size = 0;
2586         uint32_t original_layers = 0;
2587         int expanded_pattern_idx = 0;
2588         int ret;
2589         uint32_t i;
2590
2591         if (attributes->transfer)
2592                 return mlx5_flow_merge_switch(dev, flow, flow_size,
2593                                               attributes, pattern,
2594                                               actions, error);
2595         if (size > flow_size)
2596                 flow = &local_flow;
2597         ret = mlx5_flow_attributes(dev, attributes, flow, error);
2598         if (ret < 0)
2599                 return ret;
2600         ret = mlx5_flow_actions(dev, actions, &local_flow, 0, error);
2601         if (ret < 0)
2602                 return ret;
2603         if (local_flow.rss.types) {
2604                 unsigned int graph_root;
2605
2606                 graph_root = mlx5_find_graph_root(pattern,
2607                                                   local_flow.rss.level);
2608                 ret = rte_flow_expand_rss(buf, sizeof(expand_buffer.buffer),
2609                                           pattern, local_flow.rss.types,
2610                                           mlx5_support_expansion,
2611                                           graph_root);
2612                 assert(ret > 0 &&
2613                        (unsigned int)ret < sizeof(expand_buffer.buffer));
2614         } else {
2615                 buf->entries = 1;
2616                 buf->entry[0].pattern = (void *)(uintptr_t)pattern;
2617         }
2618         size += RTE_ALIGN_CEIL(local_flow.rss.queue_num * sizeof(uint16_t),
2619                                sizeof(void *));
2620         if (size <= flow_size)
2621                 flow->queue = (void *)(flow + 1);
2622         LIST_INIT(&flow->verbs);
2623         flow->layers = 0;
2624         flow->modifier = 0;
2625         flow->fate = 0;
2626         for (i = 0; i != buf->entries; ++i) {
2627                 size_t off = size;
2628                 size_t off2;
2629
2630                 flow->layers = original_layers;
2631                 size += sizeof(struct ibv_flow_attr) +
2632                         sizeof(struct mlx5_flow_verbs);
2633                 off2 = size;
2634                 if (size < flow_size) {
2635                         flow->cur_verbs = (void *)((uintptr_t)flow + off);
2636                         flow->cur_verbs->attr = (void *)(flow->cur_verbs + 1);
2637                         flow->cur_verbs->specs =
2638                                 (void *)(flow->cur_verbs->attr + 1);
2639                 }
2640                 /* First iteration convert the pattern into Verbs. */
2641                 if (i == 0) {
2642                         /* Actions don't need to be converted several time. */
2643                         ret = mlx5_flow_actions(dev, actions, flow,
2644                                                 (size < flow_size) ?
2645                                                 flow_size - size : 0,
2646                                                 error);
2647                         if (ret < 0)
2648                                 return ret;
2649                         size += ret;
2650                 } else {
2651                         /*
2652                          * Next iteration means the pattern has already been
2653                          * converted and an expansion is necessary to match
2654                          * the user RSS request.  For that only the expanded
2655                          * items will be converted, the common part with the
2656                          * user pattern are just copied into the next buffer
2657                          * zone.
2658                          */
2659                         size += original_verbs_size;
2660                         if (size < flow_size) {
2661                                 rte_memcpy(flow->cur_verbs->attr,
2662                                            original_verbs->attr,
2663                                            original_verbs_size +
2664                                            sizeof(struct ibv_flow_attr));
2665                                 flow->cur_verbs->size = original_verbs_size;
2666                         }
2667                 }
2668                 ret = mlx5_flow_items
2669                         (dev,
2670                          (const struct rte_flow_item *)
2671                          &buf->entry[i].pattern[expanded_pattern_idx],
2672                          flow,
2673                          (size < flow_size) ? flow_size - size : 0, error);
2674                 if (ret < 0)
2675                         return ret;
2676                 size += ret;
2677                 if (size <= flow_size) {
2678                         mlx5_flow_adjust_priority(dev, flow);
2679                         LIST_INSERT_HEAD(&flow->verbs, flow->cur_verbs, next);
2680                 }
2681                 /*
2682                  * Keep a pointer of the first verbs conversion and the layers
2683                  * it has encountered.
2684                  */
2685                 if (i == 0) {
2686                         original_verbs = flow->cur_verbs;
2687                         original_verbs_size = size - off2;
2688                         original_layers = flow->layers;
2689                         /*
2690                          * move the index of the expanded pattern to the
2691                          * first item not addressed yet.
2692                          */
2693                         if (pattern->type == RTE_FLOW_ITEM_TYPE_END) {
2694                                 expanded_pattern_idx++;
2695                         } else {
2696                                 const struct rte_flow_item *item = pattern;
2697
2698                                 for (item = pattern;
2699                                      item->type != RTE_FLOW_ITEM_TYPE_END;
2700                                      ++item)
2701                                         expanded_pattern_idx++;
2702                         }
2703                 }
2704         }
2705         /* Restore the origin layers in the flow. */
2706         flow->layers = original_layers;
2707         return size;
2708 }
2709
2710 /**
2711  * Lookup and set the ptype in the data Rx part.  A single Ptype can be used,
2712  * if several tunnel rules are used on this queue, the tunnel ptype will be
2713  * cleared.
2714  *
2715  * @param rxq_ctrl
2716  *   Rx queue to update.
2717  */
2718 static void
2719 mlx5_flow_rxq_tunnel_ptype_update(struct mlx5_rxq_ctrl *rxq_ctrl)
2720 {
2721         unsigned int i;
2722         uint32_t tunnel_ptype = 0;
2723
2724         /* Look up for the ptype to use. */
2725         for (i = 0; i != MLX5_FLOW_TUNNEL; ++i) {
2726                 if (!rxq_ctrl->flow_tunnels_n[i])
2727                         continue;
2728                 if (!tunnel_ptype) {
2729                         tunnel_ptype = tunnels_info[i].ptype;
2730                 } else {
2731                         tunnel_ptype = 0;
2732                         break;
2733                 }
2734         }
2735         rxq_ctrl->rxq.tunnel = tunnel_ptype;
2736 }
2737
2738 /**
2739  * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) according to the flow.
2740  *
2741  * @param[in] dev
2742  *   Pointer to Ethernet device.
2743  * @param[in] flow
2744  *   Pointer to flow structure.
2745  */
2746 static void
2747 mlx5_flow_rxq_flags_set(struct rte_eth_dev *dev, struct rte_flow *flow)
2748 {
2749         struct priv *priv = dev->data->dev_private;
2750         const int mark = !!(flow->modifier &
2751                             (MLX5_FLOW_MOD_FLAG | MLX5_FLOW_MOD_MARK));
2752         const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
2753         unsigned int i;
2754
2755         for (i = 0; i != flow->rss.queue_num; ++i) {
2756                 int idx = (*flow->queue)[i];
2757                 struct mlx5_rxq_ctrl *rxq_ctrl =
2758                         container_of((*priv->rxqs)[idx],
2759                                      struct mlx5_rxq_ctrl, rxq);
2760
2761                 if (mark) {
2762                         rxq_ctrl->rxq.mark = 1;
2763                         rxq_ctrl->flow_mark_n++;
2764                 }
2765                 if (tunnel) {
2766                         unsigned int j;
2767
2768                         /* Increase the counter matching the flow. */
2769                         for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) {
2770                                 if ((tunnels_info[j].tunnel & flow->layers) ==
2771                                     tunnels_info[j].tunnel) {
2772                                         rxq_ctrl->flow_tunnels_n[j]++;
2773                                         break;
2774                                 }
2775                         }
2776                         mlx5_flow_rxq_tunnel_ptype_update(rxq_ctrl);
2777                 }
2778         }
2779 }
2780
2781 /**
2782  * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the
2783  * @p flow if no other flow uses it with the same kind of request.
2784  *
2785  * @param dev
2786  *   Pointer to Ethernet device.
2787  * @param[in] flow
2788  *   Pointer to the flow.
2789  */
2790 static void
2791 mlx5_flow_rxq_flags_trim(struct rte_eth_dev *dev, struct rte_flow *flow)
2792 {
2793         struct priv *priv = dev->data->dev_private;
2794         const int mark = !!(flow->modifier &
2795                             (MLX5_FLOW_MOD_FLAG | MLX5_FLOW_MOD_MARK));
2796         const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
2797         unsigned int i;
2798
2799         assert(dev->data->dev_started);
2800         for (i = 0; i != flow->rss.queue_num; ++i) {
2801                 int idx = (*flow->queue)[i];
2802                 struct mlx5_rxq_ctrl *rxq_ctrl =
2803                         container_of((*priv->rxqs)[idx],
2804                                      struct mlx5_rxq_ctrl, rxq);
2805
2806                 if (mark) {
2807                         rxq_ctrl->flow_mark_n--;
2808                         rxq_ctrl->rxq.mark = !!rxq_ctrl->flow_mark_n;
2809                 }
2810                 if (tunnel) {
2811                         unsigned int j;
2812
2813                         /* Decrease the counter matching the flow. */
2814                         for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) {
2815                                 if ((tunnels_info[j].tunnel & flow->layers) ==
2816                                     tunnels_info[j].tunnel) {
2817                                         rxq_ctrl->flow_tunnels_n[j]--;
2818                                         break;
2819                                 }
2820                         }
2821                         mlx5_flow_rxq_tunnel_ptype_update(rxq_ctrl);
2822                 }
2823         }
2824 }
2825
2826 /**
2827  * Clear the Mark/Flag and Tunnel ptype information in all Rx queues.
2828  *
2829  * @param dev
2830  *   Pointer to Ethernet device.
2831  */
2832 static void
2833 mlx5_flow_rxq_flags_clear(struct rte_eth_dev *dev)
2834 {
2835         struct priv *priv = dev->data->dev_private;
2836         unsigned int i;
2837
2838         for (i = 0; i != priv->rxqs_n; ++i) {
2839                 struct mlx5_rxq_ctrl *rxq_ctrl;
2840                 unsigned int j;
2841
2842                 if (!(*priv->rxqs)[i])
2843                         continue;
2844                 rxq_ctrl = container_of((*priv->rxqs)[i],
2845                                         struct mlx5_rxq_ctrl, rxq);
2846                 rxq_ctrl->flow_mark_n = 0;
2847                 rxq_ctrl->rxq.mark = 0;
2848                 for (j = 0; j != MLX5_FLOW_TUNNEL; ++j)
2849                         rxq_ctrl->flow_tunnels_n[j] = 0;
2850                 rxq_ctrl->rxq.tunnel = 0;
2851         }
2852 }
2853
2854 /**
2855  * Validate a flow supported by the NIC.
2856  *
2857  * @see rte_flow_validate()
2858  * @see rte_flow_ops
2859  */
2860 int
2861 mlx5_flow_validate(struct rte_eth_dev *dev,
2862                    const struct rte_flow_attr *attr,
2863                    const struct rte_flow_item items[],
2864                    const struct rte_flow_action actions[],
2865                    struct rte_flow_error *error)
2866 {
2867         int ret = mlx5_flow_merge(dev, NULL, 0, attr, items, actions, error);
2868
2869         if (ret < 0)
2870                 return ret;
2871         return 0;
2872 }
2873
2874 /**
2875  * Remove the flow.
2876  *
2877  * @param[in] dev
2878  *   Pointer to Ethernet device.
2879  * @param[in, out] flow
2880  *   Pointer to flow structure.
2881  */
2882 static void
2883 mlx5_flow_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
2884 {
2885         struct priv *priv = dev->data->dev_private;
2886         struct mlx5_flow_verbs *verbs;
2887
2888         if (flow->nl_flow && priv->mnl_socket)
2889                 mlx5_nl_flow_destroy(priv->mnl_socket, flow->nl_flow, NULL);
2890         LIST_FOREACH(verbs, &flow->verbs, next) {
2891                 if (verbs->flow) {
2892                         claim_zero(mlx5_glue->destroy_flow(verbs->flow));
2893                         verbs->flow = NULL;
2894                 }
2895                 if (verbs->hrxq) {
2896                         if (flow->fate & MLX5_FLOW_FATE_DROP)
2897                                 mlx5_hrxq_drop_release(dev);
2898                         else
2899                                 mlx5_hrxq_release(dev, verbs->hrxq);
2900                         verbs->hrxq = NULL;
2901                 }
2902         }
2903         if (flow->counter) {
2904                 mlx5_flow_counter_release(flow->counter);
2905                 flow->counter = NULL;
2906         }
2907 }
2908
2909 /**
2910  * Apply the flow.
2911  *
2912  * @param[in] dev
2913  *   Pointer to Ethernet device structure.
2914  * @param[in, out] flow
2915  *   Pointer to flow structure.
2916  * @param[out] error
2917  *   Pointer to error structure.
2918  *
2919  * @return
2920  *   0 on success, a negative errno value otherwise and rte_errno is set.
2921  */
2922 static int
2923 mlx5_flow_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
2924                 struct rte_flow_error *error)
2925 {
2926         struct priv *priv = dev->data->dev_private;
2927         struct mlx5_flow_verbs *verbs;
2928         int err;
2929
2930         LIST_FOREACH(verbs, &flow->verbs, next) {
2931                 if (flow->fate & MLX5_FLOW_FATE_DROP) {
2932                         verbs->hrxq = mlx5_hrxq_drop_new(dev);
2933                         if (!verbs->hrxq) {
2934                                 rte_flow_error_set
2935                                         (error, errno,
2936                                          RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2937                                          NULL,
2938                                          "cannot get drop hash queue");
2939                                 goto error;
2940                         }
2941                 } else {
2942                         struct mlx5_hrxq *hrxq;
2943
2944                         hrxq = mlx5_hrxq_get(dev, flow->key,
2945                                              MLX5_RSS_HASH_KEY_LEN,
2946                                              verbs->hash_fields,
2947                                              (*flow->queue),
2948                                              flow->rss.queue_num);
2949                         if (!hrxq)
2950                                 hrxq = mlx5_hrxq_new(dev, flow->key,
2951                                                      MLX5_RSS_HASH_KEY_LEN,
2952                                                      verbs->hash_fields,
2953                                                      (*flow->queue),
2954                                                      flow->rss.queue_num,
2955                                                      !!(flow->layers &
2956                                                       MLX5_FLOW_LAYER_TUNNEL));
2957                         if (!hrxq) {
2958                                 rte_flow_error_set
2959                                         (error, rte_errno,
2960                                          RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2961                                          NULL,
2962                                          "cannot get hash queue");
2963                                 goto error;
2964                         }
2965                         verbs->hrxq = hrxq;
2966                 }
2967                 verbs->flow =
2968                         mlx5_glue->create_flow(verbs->hrxq->qp, verbs->attr);
2969                 if (!verbs->flow) {
2970                         rte_flow_error_set(error, errno,
2971                                            RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2972                                            NULL,
2973                                            "hardware refuses to create flow");
2974                         goto error;
2975                 }
2976         }
2977         if (flow->nl_flow &&
2978             priv->mnl_socket &&
2979             mlx5_nl_flow_create(priv->mnl_socket, flow->nl_flow, error))
2980                 goto error;
2981         return 0;
2982 error:
2983         err = rte_errno; /* Save rte_errno before cleanup. */
2984         LIST_FOREACH(verbs, &flow->verbs, next) {
2985                 if (verbs->hrxq) {
2986                         if (flow->fate & MLX5_FLOW_FATE_DROP)
2987                                 mlx5_hrxq_drop_release(dev);
2988                         else
2989                                 mlx5_hrxq_release(dev, verbs->hrxq);
2990                         verbs->hrxq = NULL;
2991                 }
2992         }
2993         rte_errno = err; /* Restore rte_errno. */
2994         return -rte_errno;
2995 }
2996
2997 /**
2998  * Create a flow and add it to @p list.
2999  *
3000  * @param dev
3001  *   Pointer to Ethernet device.
3002  * @param list
3003  *   Pointer to a TAILQ flow list.
3004  * @param[in] attr
3005  *   Flow rule attributes.
3006  * @param[in] items
3007  *   Pattern specification (list terminated by the END pattern item).
3008  * @param[in] actions
3009  *   Associated actions (list terminated by the END action).
3010  * @param[out] error
3011  *   Perform verbose error reporting if not NULL.
3012  *
3013  * @return
3014  *   A flow on success, NULL otherwise and rte_errno is set.
3015  */
3016 static struct rte_flow *
3017 mlx5_flow_list_create(struct rte_eth_dev *dev,
3018                       struct mlx5_flows *list,
3019                       const struct rte_flow_attr *attr,
3020                       const struct rte_flow_item items[],
3021                       const struct rte_flow_action actions[],
3022                       struct rte_flow_error *error)
3023 {
3024         struct rte_flow *flow = NULL;
3025         size_t size = 0;
3026         int ret;
3027
3028         ret = mlx5_flow_merge(dev, flow, size, attr, items, actions, error);
3029         if (ret < 0)
3030                 return NULL;
3031         size = ret;
3032         flow = rte_calloc(__func__, 1, size, 0);
3033         if (!flow) {
3034                 rte_flow_error_set(error, ENOMEM,
3035                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
3036                                    NULL,
3037                                    "not enough memory to create flow");
3038                 return NULL;
3039         }
3040         ret = mlx5_flow_merge(dev, flow, size, attr, items, actions, error);
3041         if (ret < 0) {
3042                 rte_free(flow);
3043                 return NULL;
3044         }
3045         assert((size_t)ret == size);
3046         if (dev->data->dev_started) {
3047                 ret = mlx5_flow_apply(dev, flow, error);
3048                 if (ret < 0) {
3049                         ret = rte_errno; /* Save rte_errno before cleanup. */
3050                         if (flow) {
3051                                 mlx5_flow_remove(dev, flow);
3052                                 rte_free(flow);
3053                         }
3054                         rte_errno = ret; /* Restore rte_errno. */
3055                         return NULL;
3056                 }
3057         }
3058         TAILQ_INSERT_TAIL(list, flow, next);
3059         mlx5_flow_rxq_flags_set(dev, flow);
3060         return flow;
3061 }
3062
3063 /**
3064  * Create a flow.
3065  *
3066  * @see rte_flow_create()
3067  * @see rte_flow_ops
3068  */
3069 struct rte_flow *
3070 mlx5_flow_create(struct rte_eth_dev *dev,
3071                  const struct rte_flow_attr *attr,
3072                  const struct rte_flow_item items[],
3073                  const struct rte_flow_action actions[],
3074                  struct rte_flow_error *error)
3075 {
3076         return mlx5_flow_list_create
3077                 (dev, &((struct priv *)dev->data->dev_private)->flows,
3078                  attr, items, actions, error);
3079 }
3080
3081 /**
3082  * Destroy a flow in a list.
3083  *
3084  * @param dev
3085  *   Pointer to Ethernet device.
3086  * @param list
3087  *   Pointer to a TAILQ flow list.
3088  * @param[in] flow
3089  *   Flow to destroy.
3090  */
3091 static void
3092 mlx5_flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list,
3093                        struct rte_flow *flow)
3094 {
3095         mlx5_flow_remove(dev, flow);
3096         TAILQ_REMOVE(list, flow, next);
3097         /*
3098          * Update RX queue flags only if port is started, otherwise it is
3099          * already clean.
3100          */
3101         if (dev->data->dev_started)
3102                 mlx5_flow_rxq_flags_trim(dev, flow);
3103         rte_free(flow);
3104 }
3105
3106 /**
3107  * Destroy all flows.
3108  *
3109  * @param dev
3110  *   Pointer to Ethernet device.
3111  * @param list
3112  *   Pointer to a TAILQ flow list.
3113  */
3114 void
3115 mlx5_flow_list_flush(struct rte_eth_dev *dev, struct mlx5_flows *list)
3116 {
3117         while (!TAILQ_EMPTY(list)) {
3118                 struct rte_flow *flow;
3119
3120                 flow = TAILQ_FIRST(list);
3121                 mlx5_flow_list_destroy(dev, list, flow);
3122         }
3123 }
3124
3125 /**
3126  * Remove all flows.
3127  *
3128  * @param dev
3129  *   Pointer to Ethernet device.
3130  * @param list
3131  *   Pointer to a TAILQ flow list.
3132  */
3133 void
3134 mlx5_flow_stop(struct rte_eth_dev *dev, struct mlx5_flows *list)
3135 {
3136         struct rte_flow *flow;
3137
3138         TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next)
3139                 mlx5_flow_remove(dev, flow);
3140         mlx5_flow_rxq_flags_clear(dev);
3141 }
3142
3143 /**
3144  * Add all flows.
3145  *
3146  * @param dev
3147  *   Pointer to Ethernet device.
3148  * @param list
3149  *   Pointer to a TAILQ flow list.
3150  *
3151  * @return
3152  *   0 on success, a negative errno value otherwise and rte_errno is set.
3153  */
3154 int
3155 mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list)
3156 {
3157         struct rte_flow *flow;
3158         struct rte_flow_error error;
3159         int ret = 0;
3160
3161         TAILQ_FOREACH(flow, list, next) {
3162                 ret = mlx5_flow_apply(dev, flow, &error);
3163                 if (ret < 0)
3164                         goto error;
3165                 mlx5_flow_rxq_flags_set(dev, flow);
3166         }
3167         return 0;
3168 error:
3169         ret = rte_errno; /* Save rte_errno before cleanup. */
3170         mlx5_flow_stop(dev, list);
3171         rte_errno = ret; /* Restore rte_errno. */
3172         return -rte_errno;
3173 }
3174
3175 /**
3176  * Verify the flow list is empty
3177  *
3178  * @param dev
3179  *  Pointer to Ethernet device.
3180  *
3181  * @return the number of flows not released.
3182  */
3183 int
3184 mlx5_flow_verify(struct rte_eth_dev *dev)
3185 {
3186         struct priv *priv = dev->data->dev_private;
3187         struct rte_flow *flow;
3188         int ret = 0;
3189
3190         TAILQ_FOREACH(flow, &priv->flows, next) {
3191                 DRV_LOG(DEBUG, "port %u flow %p still referenced",
3192                         dev->data->port_id, (void *)flow);
3193                 ++ret;
3194         }
3195         return ret;
3196 }
3197
3198 /**
3199  * Enable a control flow configured from the control plane.
3200  *
3201  * @param dev
3202  *   Pointer to Ethernet device.
3203  * @param eth_spec
3204  *   An Ethernet flow spec to apply.
3205  * @param eth_mask
3206  *   An Ethernet flow mask to apply.
3207  * @param vlan_spec
3208  *   A VLAN flow spec to apply.
3209  * @param vlan_mask
3210  *   A VLAN flow mask to apply.
3211  *
3212  * @return
3213  *   0 on success, a negative errno value otherwise and rte_errno is set.
3214  */
3215 int
3216 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
3217                     struct rte_flow_item_eth *eth_spec,
3218                     struct rte_flow_item_eth *eth_mask,
3219                     struct rte_flow_item_vlan *vlan_spec,
3220                     struct rte_flow_item_vlan *vlan_mask)
3221 {
3222         struct priv *priv = dev->data->dev_private;
3223         const struct rte_flow_attr attr = {
3224                 .ingress = 1,
3225                 .priority = MLX5_FLOW_PRIO_RSVD,
3226         };
3227         struct rte_flow_item items[] = {
3228                 {
3229                         .type = RTE_FLOW_ITEM_TYPE_ETH,
3230                         .spec = eth_spec,
3231                         .last = NULL,
3232                         .mask = eth_mask,
3233                 },
3234                 {
3235                         .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
3236                                 RTE_FLOW_ITEM_TYPE_END,
3237                         .spec = vlan_spec,
3238                         .last = NULL,
3239                         .mask = vlan_mask,
3240                 },
3241                 {
3242                         .type = RTE_FLOW_ITEM_TYPE_END,
3243                 },
3244         };
3245         uint16_t queue[priv->reta_idx_n];
3246         struct rte_flow_action_rss action_rss = {
3247                 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
3248                 .level = 0,
3249                 .types = priv->rss_conf.rss_hf,
3250                 .key_len = priv->rss_conf.rss_key_len,
3251                 .queue_num = priv->reta_idx_n,
3252                 .key = priv->rss_conf.rss_key,
3253                 .queue = queue,
3254         };
3255         struct rte_flow_action actions[] = {
3256                 {
3257                         .type = RTE_FLOW_ACTION_TYPE_RSS,
3258                         .conf = &action_rss,
3259                 },
3260                 {
3261                         .type = RTE_FLOW_ACTION_TYPE_END,
3262                 },
3263         };
3264         struct rte_flow *flow;
3265         struct rte_flow_error error;
3266         unsigned int i;
3267
3268         if (!priv->reta_idx_n) {
3269                 rte_errno = EINVAL;
3270                 return -rte_errno;
3271         }
3272         for (i = 0; i != priv->reta_idx_n; ++i)
3273                 queue[i] = (*priv->reta_idx)[i];
3274         flow = mlx5_flow_list_create(dev, &priv->ctrl_flows, &attr, items,
3275                                      actions, &error);
3276         if (!flow)
3277                 return -rte_errno;
3278         return 0;
3279 }
3280
3281 /**
3282  * Enable a flow control configured from the control plane.
3283  *
3284  * @param dev
3285  *   Pointer to Ethernet device.
3286  * @param eth_spec
3287  *   An Ethernet flow spec to apply.
3288  * @param eth_mask
3289  *   An Ethernet flow mask to apply.
3290  *
3291  * @return
3292  *   0 on success, a negative errno value otherwise and rte_errno is set.
3293  */
3294 int
3295 mlx5_ctrl_flow(struct rte_eth_dev *dev,
3296                struct rte_flow_item_eth *eth_spec,
3297                struct rte_flow_item_eth *eth_mask)
3298 {
3299         return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
3300 }
3301
3302 /**
3303  * Destroy a flow.
3304  *
3305  * @see rte_flow_destroy()
3306  * @see rte_flow_ops
3307  */
3308 int
3309 mlx5_flow_destroy(struct rte_eth_dev *dev,
3310                   struct rte_flow *flow,
3311                   struct rte_flow_error *error __rte_unused)
3312 {
3313         struct priv *priv = dev->data->dev_private;
3314
3315         mlx5_flow_list_destroy(dev, &priv->flows, flow);
3316         return 0;
3317 }
3318
3319 /**
3320  * Destroy all flows.
3321  *
3322  * @see rte_flow_flush()
3323  * @see rte_flow_ops
3324  */
3325 int
3326 mlx5_flow_flush(struct rte_eth_dev *dev,
3327                 struct rte_flow_error *error __rte_unused)
3328 {
3329         struct priv *priv = dev->data->dev_private;
3330
3331         mlx5_flow_list_flush(dev, &priv->flows);
3332         return 0;
3333 }
3334
3335 /**
3336  * Isolated mode.
3337  *
3338  * @see rte_flow_isolate()
3339  * @see rte_flow_ops
3340  */
3341 int
3342 mlx5_flow_isolate(struct rte_eth_dev *dev,
3343                   int enable,
3344                   struct rte_flow_error *error)
3345 {
3346         struct priv *priv = dev->data->dev_private;
3347
3348         if (dev->data->dev_started) {
3349                 rte_flow_error_set(error, EBUSY,
3350                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
3351                                    NULL,
3352                                    "port must be stopped first");
3353                 return -rte_errno;
3354         }
3355         priv->isolated = !!enable;
3356         if (enable)
3357                 dev->dev_ops = &mlx5_dev_ops_isolate;
3358         else
3359                 dev->dev_ops = &mlx5_dev_ops;
3360         return 0;
3361 }
3362
3363 /**
3364  * Query flow counter.
3365  *
3366  * @param flow
3367  *   Pointer to the flow.
3368  *
3369  * @return
3370  *   0 on success, a negative errno value otherwise and rte_errno is set.
3371  */
3372 static int
3373 mlx5_flow_query_count(struct rte_flow *flow __rte_unused,
3374                       void *data __rte_unused,
3375                       struct rte_flow_error *error)
3376 {
3377 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
3378         if (flow->modifier & MLX5_FLOW_MOD_COUNT) {
3379                 struct rte_flow_query_count *qc = data;
3380                 uint64_t counters[2] = {0, 0};
3381                 struct ibv_query_counter_set_attr query_cs_attr = {
3382                         .cs = flow->counter->cs,
3383                         .query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
3384                 };
3385                 struct ibv_counter_set_data query_out = {
3386                         .out = counters,
3387                         .outlen = 2 * sizeof(uint64_t),
3388                 };
3389                 int err = mlx5_glue->query_counter_set(&query_cs_attr,
3390                                                        &query_out);
3391
3392                 if (err)
3393                         return rte_flow_error_set
3394                                 (error, err,
3395                                  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
3396                                  NULL,
3397                                  "cannot read counter");
3398                 qc->hits_set = 1;
3399                 qc->bytes_set = 1;
3400                 qc->hits = counters[0] - flow->counter->hits;
3401                 qc->bytes = counters[1] - flow->counter->bytes;
3402                 if (qc->reset) {
3403                         flow->counter->hits = counters[0];
3404                         flow->counter->bytes = counters[1];
3405                 }
3406                 return 0;
3407         }
3408         return rte_flow_error_set(error, ENOTSUP,
3409                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
3410                                   NULL,
3411                                   "flow does not have counter");
3412 #endif
3413         return rte_flow_error_set(error, ENOTSUP,
3414                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
3415                                   NULL,
3416                                   "counters are not available");
3417 }
3418
3419 /**
3420  * Query a flows.
3421  *
3422  * @see rte_flow_query()
3423  * @see rte_flow_ops
3424  */
3425 int
3426 mlx5_flow_query(struct rte_eth_dev *dev __rte_unused,
3427                 struct rte_flow *flow,
3428                 const struct rte_flow_action *actions,
3429                 void *data,
3430                 struct rte_flow_error *error)
3431 {
3432         int ret = 0;
3433
3434         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
3435                 switch (actions->type) {
3436                 case RTE_FLOW_ACTION_TYPE_VOID:
3437                         break;
3438                 case RTE_FLOW_ACTION_TYPE_COUNT:
3439                         ret = mlx5_flow_query_count(flow, data, error);
3440                         break;
3441                 default:
3442                         return rte_flow_error_set(error, ENOTSUP,
3443                                                   RTE_FLOW_ERROR_TYPE_ACTION,
3444                                                   actions,
3445                                                   "action not supported");
3446                 }
3447                 if (ret < 0)
3448                         return ret;
3449         }
3450         return 0;
3451 }
3452
3453 /**
3454  * Convert a flow director filter to a generic flow.
3455  *
3456  * @param dev
3457  *   Pointer to Ethernet device.
3458  * @param fdir_filter
3459  *   Flow director filter to add.
3460  * @param attributes
3461  *   Generic flow parameters structure.
3462  *
3463  * @return
3464  *   0 on success, a negative errno value otherwise and rte_errno is set.
3465  */
3466 static int
3467 mlx5_fdir_filter_convert(struct rte_eth_dev *dev,
3468                          const struct rte_eth_fdir_filter *fdir_filter,
3469                          struct mlx5_fdir *attributes)
3470 {
3471         struct priv *priv = dev->data->dev_private;
3472         const struct rte_eth_fdir_input *input = &fdir_filter->input;
3473         const struct rte_eth_fdir_masks *mask =
3474                 &dev->data->dev_conf.fdir_conf.mask;
3475
3476         /* Validate queue number. */
3477         if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
3478                 DRV_LOG(ERR, "port %u invalid queue number %d",
3479                         dev->data->port_id, fdir_filter->action.rx_queue);
3480                 rte_errno = EINVAL;
3481                 return -rte_errno;
3482         }
3483         attributes->attr.ingress = 1;
3484         attributes->items[0] = (struct rte_flow_item) {
3485                 .type = RTE_FLOW_ITEM_TYPE_ETH,
3486                 .spec = &attributes->l2,
3487                 .mask = &attributes->l2_mask,
3488         };
3489         switch (fdir_filter->action.behavior) {
3490         case RTE_ETH_FDIR_ACCEPT:
3491                 attributes->actions[0] = (struct rte_flow_action){
3492                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
3493                         .conf = &attributes->queue,
3494                 };
3495                 break;
3496         case RTE_ETH_FDIR_REJECT:
3497                 attributes->actions[0] = (struct rte_flow_action){
3498                         .type = RTE_FLOW_ACTION_TYPE_DROP,
3499                 };
3500                 break;
3501         default:
3502                 DRV_LOG(ERR, "port %u invalid behavior %d",
3503                         dev->data->port_id,
3504                         fdir_filter->action.behavior);
3505                 rte_errno = ENOTSUP;
3506                 return -rte_errno;
3507         }
3508         attributes->queue.index = fdir_filter->action.rx_queue;
3509         /* Handle L3. */
3510         switch (fdir_filter->input.flow_type) {
3511         case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
3512         case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
3513         case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
3514                 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
3515                         .src_addr = input->flow.ip4_flow.src_ip,
3516                         .dst_addr = input->flow.ip4_flow.dst_ip,
3517                         .time_to_live = input->flow.ip4_flow.ttl,
3518                         .type_of_service = input->flow.ip4_flow.tos,
3519                         .next_proto_id = input->flow.ip4_flow.proto,
3520                 };
3521                 attributes->l3_mask.ipv4.hdr = (struct ipv4_hdr){
3522                         .src_addr = mask->ipv4_mask.src_ip,
3523                         .dst_addr = mask->ipv4_mask.dst_ip,
3524                         .time_to_live = mask->ipv4_mask.ttl,
3525                         .type_of_service = mask->ipv4_mask.tos,
3526                         .next_proto_id = mask->ipv4_mask.proto,
3527                 };
3528                 attributes->items[1] = (struct rte_flow_item){
3529                         .type = RTE_FLOW_ITEM_TYPE_IPV4,
3530                         .spec = &attributes->l3,
3531                         .mask = &attributes->l3_mask,
3532                 };
3533                 break;
3534         case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
3535         case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
3536         case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
3537                 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
3538                         .hop_limits = input->flow.ipv6_flow.hop_limits,
3539                         .proto = input->flow.ipv6_flow.proto,
3540                 };
3541
3542                 memcpy(attributes->l3.ipv6.hdr.src_addr,
3543                        input->flow.ipv6_flow.src_ip,
3544                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
3545                 memcpy(attributes->l3.ipv6.hdr.dst_addr,
3546                        input->flow.ipv6_flow.dst_ip,
3547                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
3548                 memcpy(attributes->l3_mask.ipv6.hdr.src_addr,
3549                        mask->ipv6_mask.src_ip,
3550                        RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
3551                 memcpy(attributes->l3_mask.ipv6.hdr.dst_addr,
3552                        mask->ipv6_mask.dst_ip,
3553                        RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
3554                 attributes->items[1] = (struct rte_flow_item){
3555                         .type = RTE_FLOW_ITEM_TYPE_IPV6,
3556                         .spec = &attributes->l3,
3557                         .mask = &attributes->l3_mask,
3558                 };
3559                 break;
3560         default:
3561                 DRV_LOG(ERR, "port %u invalid flow type%d",
3562                         dev->data->port_id, fdir_filter->input.flow_type);
3563                 rte_errno = ENOTSUP;
3564                 return -rte_errno;
3565         }
3566         /* Handle L4. */
3567         switch (fdir_filter->input.flow_type) {
3568         case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
3569                 attributes->l4.udp.hdr = (struct udp_hdr){
3570                         .src_port = input->flow.udp4_flow.src_port,
3571                         .dst_port = input->flow.udp4_flow.dst_port,
3572                 };
3573                 attributes->l4_mask.udp.hdr = (struct udp_hdr){
3574                         .src_port = mask->src_port_mask,
3575                         .dst_port = mask->dst_port_mask,
3576                 };
3577                 attributes->items[2] = (struct rte_flow_item){
3578                         .type = RTE_FLOW_ITEM_TYPE_UDP,
3579                         .spec = &attributes->l4,
3580                         .mask = &attributes->l4_mask,
3581                 };
3582                 break;
3583         case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
3584                 attributes->l4.tcp.hdr = (struct tcp_hdr){
3585                         .src_port = input->flow.tcp4_flow.src_port,
3586                         .dst_port = input->flow.tcp4_flow.dst_port,
3587                 };
3588                 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
3589                         .src_port = mask->src_port_mask,
3590                         .dst_port = mask->dst_port_mask,
3591                 };
3592                 attributes->items[2] = (struct rte_flow_item){
3593                         .type = RTE_FLOW_ITEM_TYPE_TCP,
3594                         .spec = &attributes->l4,
3595                         .mask = &attributes->l4_mask,
3596                 };
3597                 break;
3598         case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
3599                 attributes->l4.udp.hdr = (struct udp_hdr){
3600                         .src_port = input->flow.udp6_flow.src_port,
3601                         .dst_port = input->flow.udp6_flow.dst_port,
3602                 };
3603                 attributes->l4_mask.udp.hdr = (struct udp_hdr){
3604                         .src_port = mask->src_port_mask,
3605                         .dst_port = mask->dst_port_mask,
3606                 };
3607                 attributes->items[2] = (struct rte_flow_item){
3608                         .type = RTE_FLOW_ITEM_TYPE_UDP,
3609                         .spec = &attributes->l4,
3610                         .mask = &attributes->l4_mask,
3611                 };
3612                 break;
3613         case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
3614                 attributes->l4.tcp.hdr = (struct tcp_hdr){
3615                         .src_port = input->flow.tcp6_flow.src_port,
3616                         .dst_port = input->flow.tcp6_flow.dst_port,
3617                 };
3618                 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
3619                         .src_port = mask->src_port_mask,
3620                         .dst_port = mask->dst_port_mask,
3621                 };
3622                 attributes->items[2] = (struct rte_flow_item){
3623                         .type = RTE_FLOW_ITEM_TYPE_TCP,
3624                         .spec = &attributes->l4,
3625                         .mask = &attributes->l4_mask,
3626                 };
3627                 break;
3628         case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
3629         case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
3630                 break;
3631         default:
3632                 DRV_LOG(ERR, "port %u invalid flow type%d",
3633                         dev->data->port_id, fdir_filter->input.flow_type);
3634                 rte_errno = ENOTSUP;
3635                 return -rte_errno;
3636         }
3637         return 0;
3638 }
3639
3640 /**
3641  * Add new flow director filter and store it in list.
3642  *
3643  * @param dev
3644  *   Pointer to Ethernet device.
3645  * @param fdir_filter
3646  *   Flow director filter to add.
3647  *
3648  * @return
3649  *   0 on success, a negative errno value otherwise and rte_errno is set.
3650  */
3651 static int
3652 mlx5_fdir_filter_add(struct rte_eth_dev *dev,
3653                      const struct rte_eth_fdir_filter *fdir_filter)
3654 {
3655         struct priv *priv = dev->data->dev_private;
3656         struct mlx5_fdir attributes = {
3657                 .attr.group = 0,
3658                 .l2_mask = {
3659                         .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
3660                         .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
3661                         .type = 0,
3662                 },
3663         };
3664         struct rte_flow_error error;
3665         struct rte_flow *flow;
3666         int ret;
3667
3668         ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
3669         if (ret)
3670                 return ret;
3671         flow = mlx5_flow_list_create(dev, &priv->flows, &attributes.attr,
3672                                      attributes.items, attributes.actions,
3673                                      &error);
3674         if (flow) {
3675                 DRV_LOG(DEBUG, "port %u FDIR created %p", dev->data->port_id,
3676                         (void *)flow);
3677                 return 0;
3678         }
3679         return -rte_errno;
3680 }
3681
3682 /**
3683  * Delete specific filter.
3684  *
3685  * @param dev
3686  *   Pointer to Ethernet device.
3687  * @param fdir_filter
3688  *   Filter to be deleted.
3689  *
3690  * @return
3691  *   0 on success, a negative errno value otherwise and rte_errno is set.
3692  */
3693 static int
3694 mlx5_fdir_filter_delete(struct rte_eth_dev *dev __rte_unused,
3695                         const struct rte_eth_fdir_filter *fdir_filter
3696                         __rte_unused)
3697 {
3698         rte_errno = ENOTSUP;
3699         return -rte_errno;
3700 }
3701
3702 /**
3703  * Update queue for specific filter.
3704  *
3705  * @param dev
3706  *   Pointer to Ethernet device.
3707  * @param fdir_filter
3708  *   Filter to be updated.
3709  *
3710  * @return
3711  *   0 on success, a negative errno value otherwise and rte_errno is set.
3712  */
3713 static int
3714 mlx5_fdir_filter_update(struct rte_eth_dev *dev,
3715                         const struct rte_eth_fdir_filter *fdir_filter)
3716 {
3717         int ret;
3718
3719         ret = mlx5_fdir_filter_delete(dev, fdir_filter);
3720         if (ret)
3721                 return ret;
3722         return mlx5_fdir_filter_add(dev, fdir_filter);
3723 }
3724
3725 /**
3726  * Flush all filters.
3727  *
3728  * @param dev
3729  *   Pointer to Ethernet device.
3730  */
3731 static void
3732 mlx5_fdir_filter_flush(struct rte_eth_dev *dev)
3733 {
3734         struct priv *priv = dev->data->dev_private;
3735
3736         mlx5_flow_list_flush(dev, &priv->flows);
3737 }
3738
3739 /**
3740  * Get flow director information.
3741  *
3742  * @param dev
3743  *   Pointer to Ethernet device.
3744  * @param[out] fdir_info
3745  *   Resulting flow director information.
3746  */
3747 static void
3748 mlx5_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info)
3749 {
3750         struct rte_eth_fdir_masks *mask =
3751                 &dev->data->dev_conf.fdir_conf.mask;
3752
3753         fdir_info->mode = dev->data->dev_conf.fdir_conf.mode;
3754         fdir_info->guarant_spc = 0;
3755         rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
3756         fdir_info->max_flexpayload = 0;
3757         fdir_info->flow_types_mask[0] = 0;
3758         fdir_info->flex_payload_unit = 0;
3759         fdir_info->max_flex_payload_segment_num = 0;
3760         fdir_info->flex_payload_limit = 0;
3761         memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
3762 }
3763
3764 /**
3765  * Deal with flow director operations.
3766  *
3767  * @param dev
3768  *   Pointer to Ethernet device.
3769  * @param filter_op
3770  *   Operation to perform.
3771  * @param arg
3772  *   Pointer to operation-specific structure.
3773  *
3774  * @return
3775  *   0 on success, a negative errno value otherwise and rte_errno is set.
3776  */
3777 static int
3778 mlx5_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op,
3779                     void *arg)
3780 {
3781         enum rte_fdir_mode fdir_mode =
3782                 dev->data->dev_conf.fdir_conf.mode;
3783
3784         if (filter_op == RTE_ETH_FILTER_NOP)
3785                 return 0;
3786         if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
3787             fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
3788                 DRV_LOG(ERR, "port %u flow director mode %d not supported",
3789                         dev->data->port_id, fdir_mode);
3790                 rte_errno = EINVAL;
3791                 return -rte_errno;
3792         }
3793         switch (filter_op) {
3794         case RTE_ETH_FILTER_ADD:
3795                 return mlx5_fdir_filter_add(dev, arg);
3796         case RTE_ETH_FILTER_UPDATE:
3797                 return mlx5_fdir_filter_update(dev, arg);
3798         case RTE_ETH_FILTER_DELETE:
3799                 return mlx5_fdir_filter_delete(dev, arg);
3800         case RTE_ETH_FILTER_FLUSH:
3801                 mlx5_fdir_filter_flush(dev);
3802                 break;
3803         case RTE_ETH_FILTER_INFO:
3804                 mlx5_fdir_info_get(dev, arg);
3805                 break;
3806         default:
3807                 DRV_LOG(DEBUG, "port %u unknown operation %u",
3808                         dev->data->port_id, filter_op);
3809                 rte_errno = EINVAL;
3810                 return -rte_errno;
3811         }
3812         return 0;
3813 }
3814
3815 /**
3816  * Manage filter operations.
3817  *
3818  * @param dev
3819  *   Pointer to Ethernet device structure.
3820  * @param filter_type
3821  *   Filter type.
3822  * @param filter_op
3823  *   Operation to perform.
3824  * @param arg
3825  *   Pointer to operation-specific structure.
3826  *
3827  * @return
3828  *   0 on success, a negative errno value otherwise and rte_errno is set.
3829  */
3830 int
3831 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
3832                      enum rte_filter_type filter_type,
3833                      enum rte_filter_op filter_op,
3834                      void *arg)
3835 {
3836         switch (filter_type) {
3837         case RTE_ETH_FILTER_GENERIC:
3838                 if (filter_op != RTE_ETH_FILTER_GET) {
3839                         rte_errno = EINVAL;
3840                         return -rte_errno;
3841                 }
3842                 *(const void **)arg = &mlx5_flow_ops;
3843                 return 0;
3844         case RTE_ETH_FILTER_FDIR:
3845                 return mlx5_fdir_ctrl_func(dev, filter_op, arg);
3846         default:
3847                 DRV_LOG(ERR, "port %u filter type (%d) not supported",
3848                         dev->data->port_id, filter_type);
3849                 rte_errno = ENOTSUP;
3850                 return -rte_errno;
3851         }
3852         return 0;
3853 }