net/mlx5: fix flow count action for shared counter
[dpdk.git] / drivers / net / mlx5 / mlx5_flow.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2016 6WIND S.A.
3  * Copyright 2016 Mellanox Technologies, Ltd
4  */
5
6 #include <sys/queue.h>
7 #include <stdalign.h>
8 #include <stdint.h>
9 #include <string.h>
10
11 /* Verbs header. */
12 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
13 #ifdef PEDANTIC
14 #pragma GCC diagnostic ignored "-Wpedantic"
15 #endif
16 #include <infiniband/verbs.h>
17 #ifdef PEDANTIC
18 #pragma GCC diagnostic error "-Wpedantic"
19 #endif
20
21 #include <rte_common.h>
22 #include <rte_ether.h>
23 #include <rte_eth_ctrl.h>
24 #include <rte_ethdev_driver.h>
25 #include <rte_flow.h>
26 #include <rte_flow_driver.h>
27 #include <rte_malloc.h>
28 #include <rte_ip.h>
29
30 #include "mlx5.h"
31 #include "mlx5_defs.h"
32 #include "mlx5_prm.h"
33 #include "mlx5_glue.h"
34
35 /* Dev ops structure defined in mlx5.c */
36 extern const struct eth_dev_ops mlx5_dev_ops;
37 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
38
39 /* Pattern outer Layer bits. */
40 #define MLX5_FLOW_LAYER_OUTER_L2 (1u << 0)
41 #define MLX5_FLOW_LAYER_OUTER_L3_IPV4 (1u << 1)
42 #define MLX5_FLOW_LAYER_OUTER_L3_IPV6 (1u << 2)
43 #define MLX5_FLOW_LAYER_OUTER_L4_UDP (1u << 3)
44 #define MLX5_FLOW_LAYER_OUTER_L4_TCP (1u << 4)
45 #define MLX5_FLOW_LAYER_OUTER_VLAN (1u << 5)
46
47 /* Pattern inner Layer bits. */
48 #define MLX5_FLOW_LAYER_INNER_L2 (1u << 6)
49 #define MLX5_FLOW_LAYER_INNER_L3_IPV4 (1u << 7)
50 #define MLX5_FLOW_LAYER_INNER_L3_IPV6 (1u << 8)
51 #define MLX5_FLOW_LAYER_INNER_L4_UDP (1u << 9)
52 #define MLX5_FLOW_LAYER_INNER_L4_TCP (1u << 10)
53 #define MLX5_FLOW_LAYER_INNER_VLAN (1u << 11)
54
55 /* Pattern tunnel Layer bits. */
56 #define MLX5_FLOW_LAYER_VXLAN (1u << 12)
57 #define MLX5_FLOW_LAYER_VXLAN_GPE (1u << 13)
58 #define MLX5_FLOW_LAYER_GRE (1u << 14)
59 #define MLX5_FLOW_LAYER_MPLS (1u << 15)
60
61 /* Outer Masks. */
62 #define MLX5_FLOW_LAYER_OUTER_L3 \
63         (MLX5_FLOW_LAYER_OUTER_L3_IPV4 | MLX5_FLOW_LAYER_OUTER_L3_IPV6)
64 #define MLX5_FLOW_LAYER_OUTER_L4 \
65         (MLX5_FLOW_LAYER_OUTER_L4_UDP | MLX5_FLOW_LAYER_OUTER_L4_TCP)
66 #define MLX5_FLOW_LAYER_OUTER \
67         (MLX5_FLOW_LAYER_OUTER_L2 | MLX5_FLOW_LAYER_OUTER_L3 | \
68          MLX5_FLOW_LAYER_OUTER_L4)
69
70 /* Tunnel Masks. */
71 #define MLX5_FLOW_LAYER_TUNNEL \
72         (MLX5_FLOW_LAYER_VXLAN | MLX5_FLOW_LAYER_VXLAN_GPE | \
73          MLX5_FLOW_LAYER_GRE | MLX5_FLOW_LAYER_MPLS)
74
75 /* Inner Masks. */
76 #define MLX5_FLOW_LAYER_INNER_L3 \
77         (MLX5_FLOW_LAYER_INNER_L3_IPV4 | MLX5_FLOW_LAYER_INNER_L3_IPV6)
78 #define MLX5_FLOW_LAYER_INNER_L4 \
79         (MLX5_FLOW_LAYER_INNER_L4_UDP | MLX5_FLOW_LAYER_INNER_L4_TCP)
80 #define MLX5_FLOW_LAYER_INNER \
81         (MLX5_FLOW_LAYER_INNER_L2 | MLX5_FLOW_LAYER_INNER_L3 | \
82          MLX5_FLOW_LAYER_INNER_L4)
83
84 /* Actions that modify the fate of matching traffic. */
85 #define MLX5_FLOW_FATE_DROP (1u << 0)
86 #define MLX5_FLOW_FATE_QUEUE (1u << 1)
87 #define MLX5_FLOW_FATE_RSS (1u << 2)
88
89 /* Modify a packet. */
90 #define MLX5_FLOW_MOD_FLAG (1u << 0)
91 #define MLX5_FLOW_MOD_MARK (1u << 1)
92 #define MLX5_FLOW_MOD_COUNT (1u << 2)
93
94 /* possible L3 layers protocols filtering. */
95 #define MLX5_IP_PROTOCOL_TCP 6
96 #define MLX5_IP_PROTOCOL_UDP 17
97 #define MLX5_IP_PROTOCOL_GRE 47
98 #define MLX5_IP_PROTOCOL_MPLS 147
99
100 /* Priority reserved for default flows. */
101 #define MLX5_FLOW_PRIO_RSVD ((uint32_t)-1)
102
103 enum mlx5_expansion {
104         MLX5_EXPANSION_ROOT,
105         MLX5_EXPANSION_ROOT_OUTER,
106         MLX5_EXPANSION_OUTER_ETH,
107         MLX5_EXPANSION_OUTER_IPV4,
108         MLX5_EXPANSION_OUTER_IPV4_UDP,
109         MLX5_EXPANSION_OUTER_IPV4_TCP,
110         MLX5_EXPANSION_OUTER_IPV6,
111         MLX5_EXPANSION_OUTER_IPV6_UDP,
112         MLX5_EXPANSION_OUTER_IPV6_TCP,
113         MLX5_EXPANSION_VXLAN,
114         MLX5_EXPANSION_VXLAN_GPE,
115         MLX5_EXPANSION_GRE,
116         MLX5_EXPANSION_MPLS,
117         MLX5_EXPANSION_ETH,
118         MLX5_EXPANSION_IPV4,
119         MLX5_EXPANSION_IPV4_UDP,
120         MLX5_EXPANSION_IPV4_TCP,
121         MLX5_EXPANSION_IPV6,
122         MLX5_EXPANSION_IPV6_UDP,
123         MLX5_EXPANSION_IPV6_TCP,
124 };
125
126 /** Supported expansion of items. */
127 static const struct rte_flow_expand_node mlx5_support_expansion[] = {
128         [MLX5_EXPANSION_ROOT] = {
129                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
130                                                  MLX5_EXPANSION_IPV4,
131                                                  MLX5_EXPANSION_IPV6),
132                 .type = RTE_FLOW_ITEM_TYPE_END,
133         },
134         [MLX5_EXPANSION_ROOT_OUTER] = {
135                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_ETH,
136                                                  MLX5_EXPANSION_OUTER_IPV4,
137                                                  MLX5_EXPANSION_OUTER_IPV6),
138                 .type = RTE_FLOW_ITEM_TYPE_END,
139         },
140         [MLX5_EXPANSION_OUTER_ETH] = {
141                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_IPV4,
142                                                  MLX5_EXPANSION_OUTER_IPV6,
143                                                  MLX5_EXPANSION_MPLS),
144                 .type = RTE_FLOW_ITEM_TYPE_ETH,
145                 .rss_types = 0,
146         },
147         [MLX5_EXPANSION_OUTER_IPV4] = {
148                 .next = RTE_FLOW_EXPAND_RSS_NEXT
149                         (MLX5_EXPANSION_OUTER_IPV4_UDP,
150                          MLX5_EXPANSION_OUTER_IPV4_TCP,
151                          MLX5_EXPANSION_GRE),
152                 .type = RTE_FLOW_ITEM_TYPE_IPV4,
153                 .rss_types = ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
154                         ETH_RSS_NONFRAG_IPV4_OTHER,
155         },
156         [MLX5_EXPANSION_OUTER_IPV4_UDP] = {
157                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN,
158                                                  MLX5_EXPANSION_VXLAN_GPE),
159                 .type = RTE_FLOW_ITEM_TYPE_UDP,
160                 .rss_types = ETH_RSS_NONFRAG_IPV4_UDP,
161         },
162         [MLX5_EXPANSION_OUTER_IPV4_TCP] = {
163                 .type = RTE_FLOW_ITEM_TYPE_TCP,
164                 .rss_types = ETH_RSS_NONFRAG_IPV4_TCP,
165         },
166         [MLX5_EXPANSION_OUTER_IPV6] = {
167                 .next = RTE_FLOW_EXPAND_RSS_NEXT
168                         (MLX5_EXPANSION_OUTER_IPV6_UDP,
169                          MLX5_EXPANSION_OUTER_IPV6_TCP),
170                 .type = RTE_FLOW_ITEM_TYPE_IPV6,
171                 .rss_types = ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 |
172                         ETH_RSS_NONFRAG_IPV6_OTHER,
173         },
174         [MLX5_EXPANSION_OUTER_IPV6_UDP] = {
175                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN,
176                                                  MLX5_EXPANSION_VXLAN_GPE),
177                 .type = RTE_FLOW_ITEM_TYPE_UDP,
178                 .rss_types = ETH_RSS_NONFRAG_IPV6_UDP,
179         },
180         [MLX5_EXPANSION_OUTER_IPV6_TCP] = {
181                 .type = RTE_FLOW_ITEM_TYPE_TCP,
182                 .rss_types = ETH_RSS_NONFRAG_IPV6_TCP,
183         },
184         [MLX5_EXPANSION_VXLAN] = {
185                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH),
186                 .type = RTE_FLOW_ITEM_TYPE_VXLAN,
187         },
188         [MLX5_EXPANSION_VXLAN_GPE] = {
189                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
190                                                  MLX5_EXPANSION_IPV4,
191                                                  MLX5_EXPANSION_IPV6),
192                 .type = RTE_FLOW_ITEM_TYPE_VXLAN_GPE,
193         },
194         [MLX5_EXPANSION_GRE] = {
195                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4),
196                 .type = RTE_FLOW_ITEM_TYPE_GRE,
197         },
198         [MLX5_EXPANSION_MPLS] = {
199                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
200                                                  MLX5_EXPANSION_IPV6),
201                 .type = RTE_FLOW_ITEM_TYPE_MPLS,
202         },
203         [MLX5_EXPANSION_ETH] = {
204                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
205                                                  MLX5_EXPANSION_IPV6),
206                 .type = RTE_FLOW_ITEM_TYPE_ETH,
207         },
208         [MLX5_EXPANSION_IPV4] = {
209                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4_UDP,
210                                                  MLX5_EXPANSION_IPV4_TCP),
211                 .type = RTE_FLOW_ITEM_TYPE_IPV4,
212                 .rss_types = ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
213                         ETH_RSS_NONFRAG_IPV4_OTHER,
214         },
215         [MLX5_EXPANSION_IPV4_UDP] = {
216                 .type = RTE_FLOW_ITEM_TYPE_UDP,
217                 .rss_types = ETH_RSS_NONFRAG_IPV4_UDP,
218         },
219         [MLX5_EXPANSION_IPV4_TCP] = {
220                 .type = RTE_FLOW_ITEM_TYPE_TCP,
221                 .rss_types = ETH_RSS_NONFRAG_IPV4_TCP,
222         },
223         [MLX5_EXPANSION_IPV6] = {
224                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV6_UDP,
225                                                  MLX5_EXPANSION_IPV6_TCP),
226                 .type = RTE_FLOW_ITEM_TYPE_IPV6,
227                 .rss_types = ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 |
228                         ETH_RSS_NONFRAG_IPV6_OTHER,
229         },
230         [MLX5_EXPANSION_IPV6_UDP] = {
231                 .type = RTE_FLOW_ITEM_TYPE_UDP,
232                 .rss_types = ETH_RSS_NONFRAG_IPV6_UDP,
233         },
234         [MLX5_EXPANSION_IPV6_TCP] = {
235                 .type = RTE_FLOW_ITEM_TYPE_TCP,
236                 .rss_types = ETH_RSS_NONFRAG_IPV6_TCP,
237         },
238 };
239
240 /** Handles information leading to a drop fate. */
241 struct mlx5_flow_verbs {
242         LIST_ENTRY(mlx5_flow_verbs) next;
243         unsigned int size; /**< Size of the attribute. */
244         struct {
245                 struct ibv_flow_attr *attr;
246                 /**< Pointer to the Specification buffer. */
247                 uint8_t *specs; /**< Pointer to the specifications. */
248         };
249         struct ibv_flow *flow; /**< Verbs flow pointer. */
250         struct mlx5_hrxq *hrxq; /**< Hash Rx queue object. */
251         uint64_t hash_fields; /**< Verbs hash Rx queue hash fields. */
252 };
253
254 /* Counters information. */
255 struct mlx5_flow_counter {
256         LIST_ENTRY(mlx5_flow_counter) next; /**< Pointer to the next counter. */
257         uint32_t shared:1; /**< Share counter ID with other flow rules. */
258         uint32_t ref_cnt:31; /**< Reference counter. */
259         uint32_t id; /**< Counter ID. */
260         struct ibv_counter_set *cs; /**< Holds the counters for the rule. */
261         uint64_t hits; /**< Number of packets matched by the rule. */
262         uint64_t bytes; /**< Number of bytes matched by the rule. */
263 };
264
265 /* Flow structure. */
266 struct rte_flow {
267         TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
268         struct rte_flow_attr attributes; /**< User flow attribute. */
269         uint32_t l3_protocol_en:1; /**< Protocol filtering requested. */
270         uint32_t layers;
271         /**< Bit-fields of present layers see MLX5_FLOW_LAYER_*. */
272         uint32_t modifier;
273         /**< Bit-fields of present modifier see MLX5_FLOW_MOD_*. */
274         uint32_t fate;
275         /**< Bit-fields of present fate see MLX5_FLOW_FATE_*. */
276         uint8_t l3_protocol; /**< valid when l3_protocol_en is set. */
277         LIST_HEAD(verbs, mlx5_flow_verbs) verbs; /**< Verbs flows list. */
278         struct mlx5_flow_verbs *cur_verbs;
279         /**< Current Verbs flow structure being filled. */
280         struct mlx5_flow_counter *counter; /**< Holds Verbs flow counter. */
281         struct rte_flow_action_rss rss;/**< RSS context. */
282         uint8_t key[MLX5_RSS_HASH_KEY_LEN]; /**< RSS hash key. */
283         uint16_t (*queue)[]; /**< Destination queues to redirect traffic to. */
284         void *nl_flow; /**< Netlink flow buffer if relevant. */
285 };
286
287 static const struct rte_flow_ops mlx5_flow_ops = {
288         .validate = mlx5_flow_validate,
289         .create = mlx5_flow_create,
290         .destroy = mlx5_flow_destroy,
291         .flush = mlx5_flow_flush,
292         .isolate = mlx5_flow_isolate,
293         .query = mlx5_flow_query,
294 };
295
296 /* Convert FDIR request to Generic flow. */
297 struct mlx5_fdir {
298         struct rte_flow_attr attr;
299         struct rte_flow_action actions[2];
300         struct rte_flow_item items[4];
301         struct rte_flow_item_eth l2;
302         struct rte_flow_item_eth l2_mask;
303         union {
304                 struct rte_flow_item_ipv4 ipv4;
305                 struct rte_flow_item_ipv6 ipv6;
306         } l3;
307         union {
308                 struct rte_flow_item_ipv4 ipv4;
309                 struct rte_flow_item_ipv6 ipv6;
310         } l3_mask;
311         union {
312                 struct rte_flow_item_udp udp;
313                 struct rte_flow_item_tcp tcp;
314         } l4;
315         union {
316                 struct rte_flow_item_udp udp;
317                 struct rte_flow_item_tcp tcp;
318         } l4_mask;
319         struct rte_flow_action_queue queue;
320 };
321
322 /* Verbs specification header. */
323 struct ibv_spec_header {
324         enum ibv_flow_spec_type type;
325         uint16_t size;
326 };
327
328 /*
329  * Number of sub priorities.
330  * For each kind of pattern matching i.e. L2, L3, L4 to have a correct
331  * matching on the NIC (firmware dependent) L4 most have the higher priority
332  * followed by L3 and ending with L2.
333  */
334 #define MLX5_PRIORITY_MAP_L2 2
335 #define MLX5_PRIORITY_MAP_L3 1
336 #define MLX5_PRIORITY_MAP_L4 0
337 #define MLX5_PRIORITY_MAP_MAX 3
338
339 /* Map of Verbs to Flow priority with 8 Verbs priorities. */
340 static const uint32_t priority_map_3[][MLX5_PRIORITY_MAP_MAX] = {
341         { 0, 1, 2 }, { 2, 3, 4 }, { 5, 6, 7 },
342 };
343
344 /* Map of Verbs to Flow priority with 16 Verbs priorities. */
345 static const uint32_t priority_map_5[][MLX5_PRIORITY_MAP_MAX] = {
346         { 0, 1, 2 }, { 3, 4, 5 }, { 6, 7, 8 },
347         { 9, 10, 11 }, { 12, 13, 14 },
348 };
349
350 /* Tunnel information. */
351 struct mlx5_flow_tunnel_info {
352         uint32_t tunnel; /**< Tunnel bit (see MLX5_FLOW_*). */
353         uint32_t ptype; /**< Tunnel Ptype (see RTE_PTYPE_*). */
354 };
355
356 static struct mlx5_flow_tunnel_info tunnels_info[] = {
357         {
358                 .tunnel = MLX5_FLOW_LAYER_VXLAN,
359                 .ptype = RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L4_UDP,
360         },
361         {
362                 .tunnel = MLX5_FLOW_LAYER_VXLAN_GPE,
363                 .ptype = RTE_PTYPE_TUNNEL_VXLAN_GPE | RTE_PTYPE_L4_UDP,
364         },
365         {
366                 .tunnel = MLX5_FLOW_LAYER_GRE,
367                 .ptype = RTE_PTYPE_TUNNEL_GRE,
368         },
369         {
370                 .tunnel = MLX5_FLOW_LAYER_MPLS | MLX5_FLOW_LAYER_OUTER_L4_UDP,
371                 .ptype = RTE_PTYPE_TUNNEL_MPLS_IN_GRE | RTE_PTYPE_L4_UDP,
372         },
373         {
374                 .tunnel = MLX5_FLOW_LAYER_MPLS,
375                 .ptype = RTE_PTYPE_TUNNEL_MPLS_IN_GRE,
376         },
377 };
378
379 /**
380  * Discover the maximum number of priority available.
381  *
382  * @param[in] dev
383  *   Pointer to Ethernet device.
384  *
385  * @return
386  *   number of supported flow priority on success, a negative errno
387  *   value otherwise and rte_errno is set.
388  */
389 int
390 mlx5_flow_discover_priorities(struct rte_eth_dev *dev)
391 {
392         struct {
393                 struct ibv_flow_attr attr;
394                 struct ibv_flow_spec_eth eth;
395                 struct ibv_flow_spec_action_drop drop;
396         } flow_attr = {
397                 .attr = {
398                         .num_of_specs = 2,
399                 },
400                 .eth = {
401                         .type = IBV_FLOW_SPEC_ETH,
402                         .size = sizeof(struct ibv_flow_spec_eth),
403                 },
404                 .drop = {
405                         .size = sizeof(struct ibv_flow_spec_action_drop),
406                         .type = IBV_FLOW_SPEC_ACTION_DROP,
407                 },
408         };
409         struct ibv_flow *flow;
410         struct mlx5_hrxq *drop = mlx5_hrxq_drop_new(dev);
411         uint16_t vprio[] = { 8, 16 };
412         int i;
413         int priority = 0;
414
415         if (!drop) {
416                 rte_errno = ENOTSUP;
417                 return -rte_errno;
418         }
419         for (i = 0; i != RTE_DIM(vprio); i++) {
420                 flow_attr.attr.priority = vprio[i] - 1;
421                 flow = mlx5_glue->create_flow(drop->qp, &flow_attr.attr);
422                 if (!flow)
423                         break;
424                 claim_zero(mlx5_glue->destroy_flow(flow));
425                 priority = vprio[i];
426         }
427         switch (priority) {
428         case 8:
429                 priority = RTE_DIM(priority_map_3);
430                 break;
431         case 16:
432                 priority = RTE_DIM(priority_map_5);
433                 break;
434         default:
435                 rte_errno = ENOTSUP;
436                 DRV_LOG(ERR,
437                         "port %u verbs maximum priority: %d expected 8/16",
438                         dev->data->port_id, vprio[i]);
439                 return -rte_errno;
440         }
441         mlx5_hrxq_drop_release(dev);
442         DRV_LOG(INFO, "port %u flow maximum priority: %d",
443                 dev->data->port_id, priority);
444         return priority;
445 }
446
447 /**
448  * Adjust flow priority.
449  *
450  * @param dev
451  *   Pointer to Ethernet device.
452  * @param flow
453  *   Pointer to an rte flow.
454  */
455 static void
456 mlx5_flow_adjust_priority(struct rte_eth_dev *dev, struct rte_flow *flow)
457 {
458         struct priv *priv = dev->data->dev_private;
459         uint32_t priority = flow->attributes.priority;
460         uint32_t subpriority = flow->cur_verbs->attr->priority;
461
462         switch (priv->config.flow_prio) {
463         case RTE_DIM(priority_map_3):
464                 priority = priority_map_3[priority][subpriority];
465                 break;
466         case RTE_DIM(priority_map_5):
467                 priority = priority_map_5[priority][subpriority];
468                 break;
469         }
470         flow->cur_verbs->attr->priority = priority;
471 }
472
473 /**
474  * Get a flow counter.
475  *
476  * @param[in] dev
477  *   Pointer to Ethernet device.
478  * @param[in] shared
479  *   Indicate if this counter is shared with other flows.
480  * @param[in] id
481  *   Counter identifier.
482  *
483  * @return
484  *   A pointer to the counter, NULL otherwise and rte_errno is set.
485  */
486 static struct mlx5_flow_counter *
487 mlx5_flow_counter_new(struct rte_eth_dev *dev, uint32_t shared, uint32_t id)
488 {
489         struct priv *priv = dev->data->dev_private;
490         struct mlx5_flow_counter *cnt;
491
492         LIST_FOREACH(cnt, &priv->flow_counters, next) {
493                 if (!cnt->shared || cnt->shared != shared)
494                         continue;
495                 if (cnt->id != id)
496                         continue;
497                 cnt->ref_cnt++;
498                 return cnt;
499         }
500 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
501
502         struct mlx5_flow_counter tmpl = {
503                 .shared = shared,
504                 .id = id,
505                 .cs = mlx5_glue->create_counter_set
506                         (priv->ctx,
507                          &(struct ibv_counter_set_init_attr){
508                                  .counter_set_id = id,
509                          }),
510                 .hits = 0,
511                 .bytes = 0,
512         };
513
514         if (!tmpl.cs) {
515                 rte_errno = errno;
516                 return NULL;
517         }
518         cnt = rte_calloc(__func__, 1, sizeof(*cnt), 0);
519         if (!cnt) {
520                 rte_errno = ENOMEM;
521                 return NULL;
522         }
523         *cnt = tmpl;
524         LIST_INSERT_HEAD(&priv->flow_counters, cnt, next);
525         return cnt;
526 #endif
527         rte_errno = ENOTSUP;
528         return NULL;
529 }
530
531 /**
532  * Release a flow counter.
533  *
534  * @param[in] counter
535  *   Pointer to the counter handler.
536  */
537 static void
538 mlx5_flow_counter_release(struct mlx5_flow_counter *counter)
539 {
540         if (--counter->ref_cnt == 0) {
541                 claim_zero(mlx5_glue->destroy_counter_set(counter->cs));
542                 LIST_REMOVE(counter, next);
543                 rte_free(counter);
544         }
545 }
546
547 /**
548  * Verify the @p attributes will be correctly understood by the NIC and store
549  * them in the @p flow if everything is correct.
550  *
551  * @param[in] dev
552  *   Pointer to Ethernet device.
553  * @param[in] attributes
554  *   Pointer to flow attributes
555  * @param[in, out] flow
556  *   Pointer to the rte_flow structure.
557  * @param[out] error
558  *   Pointer to error structure.
559  *
560  * @return
561  *   0 on success, a negative errno value otherwise and rte_errno is set.
562  */
563 static int
564 mlx5_flow_attributes(struct rte_eth_dev *dev,
565                      const struct rte_flow_attr *attributes,
566                      struct rte_flow *flow,
567                      struct rte_flow_error *error)
568 {
569         uint32_t priority_max =
570                 ((struct priv *)dev->data->dev_private)->config.flow_prio - 1;
571
572         if (attributes->group)
573                 return rte_flow_error_set(error, ENOTSUP,
574                                           RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
575                                           NULL,
576                                           "groups is not supported");
577         if (attributes->priority != MLX5_FLOW_PRIO_RSVD &&
578             attributes->priority >= priority_max)
579                 return rte_flow_error_set(error, ENOTSUP,
580                                           RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
581                                           NULL,
582                                           "priority out of range");
583         if (attributes->egress)
584                 return rte_flow_error_set(error, ENOTSUP,
585                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
586                                           NULL,
587                                           "egress is not supported");
588         if (attributes->transfer)
589                 return rte_flow_error_set(error, ENOTSUP,
590                                           RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
591                                           NULL,
592                                           "transfer is not supported");
593         if (!attributes->ingress)
594                 return rte_flow_error_set(error, ENOTSUP,
595                                           RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
596                                           NULL,
597                                           "ingress attribute is mandatory");
598         flow->attributes = *attributes;
599         if (attributes->priority == MLX5_FLOW_PRIO_RSVD)
600                 flow->attributes.priority = priority_max;
601         return 0;
602 }
603
604 /**
605  * Verify the @p item specifications (spec, last, mask) are compatible with the
606  * NIC capabilities.
607  *
608  * @param[in] item
609  *   Item specification.
610  * @param[in] mask
611  *   @p item->mask or flow default bit-masks.
612  * @param[in] nic_mask
613  *   Bit-masks covering supported fields by the NIC to compare with user mask.
614  * @param[in] size
615  *   Bit-masks size in bytes.
616  * @param[out] error
617  *   Pointer to error structure.
618  *
619  * @return
620  *   0 on success, a negative errno value otherwise and rte_errno is set.
621  */
622 static int
623 mlx5_flow_item_acceptable(const struct rte_flow_item *item,
624                           const uint8_t *mask,
625                           const uint8_t *nic_mask,
626                           unsigned int size,
627                           struct rte_flow_error *error)
628 {
629         unsigned int i;
630
631         assert(nic_mask);
632         for (i = 0; i < size; ++i)
633                 if ((nic_mask[i] | mask[i]) != nic_mask[i])
634                         return rte_flow_error_set(error, ENOTSUP,
635                                                   RTE_FLOW_ERROR_TYPE_ITEM,
636                                                   item,
637                                                   "mask enables non supported"
638                                                   " bits");
639         if (!item->spec && (item->mask || item->last))
640                 return rte_flow_error_set(error, EINVAL,
641                                           RTE_FLOW_ERROR_TYPE_ITEM,
642                                           item,
643                                           "mask/last without a spec is not"
644                                           " supported");
645         if (item->spec && item->last) {
646                 uint8_t spec[size];
647                 uint8_t last[size];
648                 unsigned int i;
649                 int ret;
650
651                 for (i = 0; i < size; ++i) {
652                         spec[i] = ((const uint8_t *)item->spec)[i] & mask[i];
653                         last[i] = ((const uint8_t *)item->last)[i] & mask[i];
654                 }
655                 ret = memcmp(spec, last, size);
656                 if (ret != 0)
657                         return rte_flow_error_set(error, ENOTSUP,
658                                                   RTE_FLOW_ERROR_TYPE_ITEM,
659                                                   item,
660                                                   "range is not supported");
661         }
662         return 0;
663 }
664
665 /**
666  * Add a verbs item specification into @p flow.
667  *
668  * @param[in, out] flow
669  *   Pointer to flow structure.
670  * @param[in] src
671  *   Create specification.
672  * @param[in] size
673  *   Size in bytes of the specification to copy.
674  */
675 static void
676 mlx5_flow_spec_verbs_add(struct rte_flow *flow, void *src, unsigned int size)
677 {
678         struct mlx5_flow_verbs *verbs = flow->cur_verbs;
679
680         if (verbs->specs) {
681                 void *dst;
682
683                 dst = (void *)(verbs->specs + verbs->size);
684                 memcpy(dst, src, size);
685                 ++verbs->attr->num_of_specs;
686         }
687         verbs->size += size;
688 }
689
690 /**
691  * Adjust verbs hash fields according to the @p flow information.
692  *
693  * @param[in, out] flow.
694  *   Pointer to flow structure.
695  * @param[in] tunnel
696  *   1 when the hash field is for a tunnel item.
697  * @param[in] layer_types
698  *   ETH_RSS_* types.
699  * @param[in] hash_fields
700  *   Item hash fields.
701  */
702 static void
703 mlx5_flow_verbs_hashfields_adjust(struct rte_flow *flow,
704                                   int tunnel __rte_unused,
705                                   uint32_t layer_types, uint64_t hash_fields)
706 {
707 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
708         hash_fields |= (tunnel ? IBV_RX_HASH_INNER : 0);
709         if (flow->rss.level == 2 && !tunnel)
710                 hash_fields = 0;
711         else if (flow->rss.level < 2 && tunnel)
712                 hash_fields = 0;
713 #endif
714         if (!(flow->rss.types & layer_types))
715                 hash_fields = 0;
716         flow->cur_verbs->hash_fields |= hash_fields;
717 }
718
719 /**
720  * Convert the @p item into a Verbs specification after ensuring the NIC
721  * will understand and process it correctly.
722  * If the necessary size for the conversion is greater than the @p flow_size,
723  * nothing is written in @p flow, the validation is still performed.
724  *
725  * @param[in] item
726  *   Item specification.
727  * @param[in, out] flow
728  *   Pointer to flow structure.
729  * @param[in] flow_size
730  *   Size in bytes of the available space in @p flow, if too small, nothing is
731  *   written.
732  * @param[out] error
733  *   Pointer to error structure.
734  *
735  * @return
736  *   On success the number of bytes consumed/necessary, if the returned value
737  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
738  *   otherwise another call with this returned memory size should be done.
739  *   On error, a negative errno value is returned and rte_errno is set.
740  */
741 static int
742 mlx5_flow_item_eth(const struct rte_flow_item *item, struct rte_flow *flow,
743                    const size_t flow_size, struct rte_flow_error *error)
744 {
745         const struct rte_flow_item_eth *spec = item->spec;
746         const struct rte_flow_item_eth *mask = item->mask;
747         const struct rte_flow_item_eth nic_mask = {
748                 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
749                 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
750                 .type = RTE_BE16(0xffff),
751         };
752         const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
753         const unsigned int size = sizeof(struct ibv_flow_spec_eth);
754         struct ibv_flow_spec_eth eth = {
755                 .type = IBV_FLOW_SPEC_ETH | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
756                 .size = size,
757         };
758         int ret;
759
760         if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
761                             MLX5_FLOW_LAYER_OUTER_L2))
762                 return rte_flow_error_set(error, ENOTSUP,
763                                           RTE_FLOW_ERROR_TYPE_ITEM,
764                                           item,
765                                           "L2 layers already configured");
766         if (!mask)
767                 mask = &rte_flow_item_eth_mask;
768         ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
769                                         (const uint8_t *)&nic_mask,
770                                         sizeof(struct rte_flow_item_eth),
771                                         error);
772         if (ret)
773                 return ret;
774         flow->layers |= tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
775                 MLX5_FLOW_LAYER_OUTER_L2;
776         if (size > flow_size)
777                 return size;
778         if (spec) {
779                 unsigned int i;
780
781                 memcpy(&eth.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
782                 memcpy(&eth.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
783                 eth.val.ether_type = spec->type;
784                 memcpy(&eth.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
785                 memcpy(&eth.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
786                 eth.mask.ether_type = mask->type;
787                 /* Remove unwanted bits from values. */
788                 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
789                         eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
790                         eth.val.src_mac[i] &= eth.mask.src_mac[i];
791                 }
792                 eth.val.ether_type &= eth.mask.ether_type;
793         }
794         flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2;
795         mlx5_flow_spec_verbs_add(flow, &eth, size);
796         return size;
797 }
798
799 /**
800  * Update the VLAN tag in the Verbs Ethernet specification.
801  *
802  * @param[in, out] attr
803  *   Pointer to Verbs attributes structure.
804  * @param[in] eth
805  *   Verbs structure containing the VLAN information to copy.
806  */
807 static void
808 mlx5_flow_item_vlan_update(struct ibv_flow_attr *attr,
809                            struct ibv_flow_spec_eth *eth)
810 {
811         unsigned int i;
812         const enum ibv_flow_spec_type search = eth->type;
813         struct ibv_spec_header *hdr = (struct ibv_spec_header *)
814                 ((uint8_t *)attr + sizeof(struct ibv_flow_attr));
815
816         for (i = 0; i != attr->num_of_specs; ++i) {
817                 if (hdr->type == search) {
818                         struct ibv_flow_spec_eth *e =
819                                 (struct ibv_flow_spec_eth *)hdr;
820
821                         e->val.vlan_tag = eth->val.vlan_tag;
822                         e->mask.vlan_tag = eth->mask.vlan_tag;
823                         e->val.ether_type = eth->val.ether_type;
824                         e->mask.ether_type = eth->mask.ether_type;
825                         break;
826                 }
827                 hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size);
828         }
829 }
830
831 /**
832  * Convert the @p item into @p flow (or by updating the already present
833  * Ethernet Verbs) specification after ensuring the NIC will understand and
834  * process it correctly.
835  * If the necessary size for the conversion is greater than the @p flow_size,
836  * nothing is written in @p flow, the validation is still performed.
837  *
838  * @param[in] item
839  *   Item specification.
840  * @param[in, out] flow
841  *   Pointer to flow structure.
842  * @param[in] flow_size
843  *   Size in bytes of the available space in @p flow, if too small, nothing is
844  *   written.
845  * @param[out] error
846  *   Pointer to error structure.
847  *
848  * @return
849  *   On success the number of bytes consumed/necessary, if the returned value
850  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
851  *   otherwise another call with this returned memory size should be done.
852  *   On error, a negative errno value is returned and rte_errno is set.
853  */
854 static int
855 mlx5_flow_item_vlan(const struct rte_flow_item *item, struct rte_flow *flow,
856                     const size_t flow_size, struct rte_flow_error *error)
857 {
858         const struct rte_flow_item_vlan *spec = item->spec;
859         const struct rte_flow_item_vlan *mask = item->mask;
860         const struct rte_flow_item_vlan nic_mask = {
861                 .tci = RTE_BE16(0x0fff),
862                 .inner_type = RTE_BE16(0xffff),
863         };
864         unsigned int size = sizeof(struct ibv_flow_spec_eth);
865         const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
866         struct ibv_flow_spec_eth eth = {
867                 .type = IBV_FLOW_SPEC_ETH | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
868                 .size = size,
869         };
870         int ret;
871         const uint32_t l34m = tunnel ? (MLX5_FLOW_LAYER_INNER_L3 |
872                                         MLX5_FLOW_LAYER_INNER_L4) :
873                 (MLX5_FLOW_LAYER_OUTER_L3 | MLX5_FLOW_LAYER_OUTER_L4);
874         const uint32_t vlanm = tunnel ? MLX5_FLOW_LAYER_INNER_VLAN :
875                 MLX5_FLOW_LAYER_OUTER_VLAN;
876         const uint32_t l2m = tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
877                 MLX5_FLOW_LAYER_OUTER_L2;
878
879         if (flow->layers & vlanm)
880                 return rte_flow_error_set(error, ENOTSUP,
881                                           RTE_FLOW_ERROR_TYPE_ITEM,
882                                           item,
883                                           "VLAN layer already configured");
884         else if ((flow->layers & l34m) != 0)
885                 return rte_flow_error_set(error, ENOTSUP,
886                                           RTE_FLOW_ERROR_TYPE_ITEM,
887                                           item,
888                                           "L2 layer cannot follow L3/L4 layer");
889         if (!mask)
890                 mask = &rte_flow_item_vlan_mask;
891         ret = mlx5_flow_item_acceptable
892                 (item, (const uint8_t *)mask,
893                  (const uint8_t *)&nic_mask,
894                  sizeof(struct rte_flow_item_vlan), error);
895         if (ret)
896                 return ret;
897         if (spec) {
898                 eth.val.vlan_tag = spec->tci;
899                 eth.mask.vlan_tag = mask->tci;
900                 eth.val.vlan_tag &= eth.mask.vlan_tag;
901                 eth.val.ether_type = spec->inner_type;
902                 eth.mask.ether_type = mask->inner_type;
903                 eth.val.ether_type &= eth.mask.ether_type;
904         }
905         /*
906          * From verbs perspective an empty VLAN is equivalent
907          * to a packet without VLAN layer.
908          */
909         if (!eth.mask.vlan_tag)
910                 return rte_flow_error_set(error, EINVAL,
911                                           RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
912                                           item->spec,
913                                           "VLAN cannot be empty");
914         if (!(flow->layers & l2m)) {
915                 if (size <= flow_size) {
916                         flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2;
917                         mlx5_flow_spec_verbs_add(flow, &eth, size);
918                 }
919         } else {
920                 if (flow->cur_verbs)
921                         mlx5_flow_item_vlan_update(flow->cur_verbs->attr,
922                                                    &eth);
923                 size = 0; /* Only an update is done in eth specification. */
924         }
925         flow->layers |= tunnel ?
926                 (MLX5_FLOW_LAYER_INNER_L2 | MLX5_FLOW_LAYER_INNER_VLAN) :
927                 (MLX5_FLOW_LAYER_OUTER_L2 | MLX5_FLOW_LAYER_OUTER_VLAN);
928         return size;
929 }
930
931 /**
932  * Convert the @p item into a Verbs specification after ensuring the NIC
933  * will understand and process it correctly.
934  * If the necessary size for the conversion is greater than the @p flow_size,
935  * nothing is written in @p flow, the validation is still performed.
936  *
937  * @param[in] item
938  *   Item specification.
939  * @param[in, out] flow
940  *   Pointer to flow structure.
941  * @param[in] flow_size
942  *   Size in bytes of the available space in @p flow, if too small, nothing is
943  *   written.
944  * @param[out] error
945  *   Pointer to error structure.
946  *
947  * @return
948  *   On success the number of bytes consumed/necessary, if the returned value
949  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
950  *   otherwise another call with this returned memory size should be done.
951  *   On error, a negative errno value is returned and rte_errno is set.
952  */
953 static int
954 mlx5_flow_item_ipv4(const struct rte_flow_item *item, struct rte_flow *flow,
955                     const size_t flow_size, struct rte_flow_error *error)
956 {
957         const struct rte_flow_item_ipv4 *spec = item->spec;
958         const struct rte_flow_item_ipv4 *mask = item->mask;
959         const struct rte_flow_item_ipv4 nic_mask = {
960                 .hdr = {
961                         .src_addr = RTE_BE32(0xffffffff),
962                         .dst_addr = RTE_BE32(0xffffffff),
963                         .type_of_service = 0xff,
964                         .next_proto_id = 0xff,
965                 },
966         };
967         const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
968         unsigned int size = sizeof(struct ibv_flow_spec_ipv4_ext);
969         struct ibv_flow_spec_ipv4_ext ipv4 = {
970                 .type = IBV_FLOW_SPEC_IPV4_EXT |
971                         (tunnel ? IBV_FLOW_SPEC_INNER : 0),
972                 .size = size,
973         };
974         int ret;
975
976         if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
977                             MLX5_FLOW_LAYER_OUTER_L3))
978                 return rte_flow_error_set(error, ENOTSUP,
979                                           RTE_FLOW_ERROR_TYPE_ITEM,
980                                           item,
981                                           "multiple L3 layers not supported");
982         else if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
983                                  MLX5_FLOW_LAYER_OUTER_L4))
984                 return rte_flow_error_set(error, ENOTSUP,
985                                           RTE_FLOW_ERROR_TYPE_ITEM,
986                                           item,
987                                           "L3 cannot follow an L4 layer.");
988         if (!mask)
989                 mask = &rte_flow_item_ipv4_mask;
990         ret = mlx5_flow_item_acceptable
991                 (item, (const uint8_t *)mask,
992                  (const uint8_t *)&nic_mask,
993                  sizeof(struct rte_flow_item_ipv4), error);
994         if (ret < 0)
995                 return ret;
996         flow->layers |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
997                 MLX5_FLOW_LAYER_OUTER_L3_IPV4;
998         if (spec) {
999                 ipv4.val = (struct ibv_flow_ipv4_ext_filter){
1000                         .src_ip = spec->hdr.src_addr,
1001                         .dst_ip = spec->hdr.dst_addr,
1002                         .proto = spec->hdr.next_proto_id,
1003                         .tos = spec->hdr.type_of_service,
1004                 };
1005                 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
1006                         .src_ip = mask->hdr.src_addr,
1007                         .dst_ip = mask->hdr.dst_addr,
1008                         .proto = mask->hdr.next_proto_id,
1009                         .tos = mask->hdr.type_of_service,
1010                 };
1011                 /* Remove unwanted bits from values. */
1012                 ipv4.val.src_ip &= ipv4.mask.src_ip;
1013                 ipv4.val.dst_ip &= ipv4.mask.dst_ip;
1014                 ipv4.val.proto &= ipv4.mask.proto;
1015                 ipv4.val.tos &= ipv4.mask.tos;
1016         }
1017         flow->l3_protocol_en = !!ipv4.mask.proto;
1018         flow->l3_protocol = ipv4.val.proto;
1019         if (size <= flow_size) {
1020                 mlx5_flow_verbs_hashfields_adjust
1021                         (flow, tunnel,
1022                          (ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
1023                           ETH_RSS_NONFRAG_IPV4_OTHER),
1024                          (IBV_RX_HASH_SRC_IPV4 | IBV_RX_HASH_DST_IPV4));
1025                 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L3;
1026                 mlx5_flow_spec_verbs_add(flow, &ipv4, size);
1027         }
1028         return size;
1029 }
1030
1031 /**
1032  * Convert the @p item into a Verbs specification after ensuring the NIC
1033  * will understand and process it correctly.
1034  * If the necessary size for the conversion is greater than the @p flow_size,
1035  * nothing is written in @p flow, the validation is still performed.
1036  *
1037  * @param[in] item
1038  *   Item specification.
1039  * @param[in, out] flow
1040  *   Pointer to flow structure.
1041  * @param[in] flow_size
1042  *   Size in bytes of the available space in @p flow, if too small, nothing is
1043  *   written.
1044  * @param[out] error
1045  *   Pointer to error structure.
1046  *
1047  * @return
1048  *   On success the number of bytes consumed/necessary, if the returned value
1049  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
1050  *   otherwise another call with this returned memory size should be done.
1051  *   On error, a negative errno value is returned and rte_errno is set.
1052  */
1053 static int
1054 mlx5_flow_item_ipv6(const struct rte_flow_item *item, struct rte_flow *flow,
1055                     const size_t flow_size, struct rte_flow_error *error)
1056 {
1057         const struct rte_flow_item_ipv6 *spec = item->spec;
1058         const struct rte_flow_item_ipv6 *mask = item->mask;
1059         const struct rte_flow_item_ipv6 nic_mask = {
1060                 .hdr = {
1061                         .src_addr =
1062                                 "\xff\xff\xff\xff\xff\xff\xff\xff"
1063                                 "\xff\xff\xff\xff\xff\xff\xff\xff",
1064                         .dst_addr =
1065                                 "\xff\xff\xff\xff\xff\xff\xff\xff"
1066                                 "\xff\xff\xff\xff\xff\xff\xff\xff",
1067                         .vtc_flow = RTE_BE32(0xffffffff),
1068                         .proto = 0xff,
1069                         .hop_limits = 0xff,
1070                 },
1071         };
1072         const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
1073         unsigned int size = sizeof(struct ibv_flow_spec_ipv6);
1074         struct ibv_flow_spec_ipv6 ipv6 = {
1075                 .type = IBV_FLOW_SPEC_IPV6 | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
1076                 .size = size,
1077         };
1078         int ret;
1079
1080         if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
1081                             MLX5_FLOW_LAYER_OUTER_L3))
1082                 return rte_flow_error_set(error, ENOTSUP,
1083                                           RTE_FLOW_ERROR_TYPE_ITEM,
1084                                           item,
1085                                           "multiple L3 layers not supported");
1086         else if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
1087                                  MLX5_FLOW_LAYER_OUTER_L4))
1088                 return rte_flow_error_set(error, ENOTSUP,
1089                                           RTE_FLOW_ERROR_TYPE_ITEM,
1090                                           item,
1091                                           "L3 cannot follow an L4 layer.");
1092         /*
1093          * IPv6 is not recognised by the NIC inside a GRE tunnel.
1094          * Such support has to be disabled as the rule will be
1095          * accepted.  Issue reproduced with Mellanox OFED 4.3-3.0.2.1 and
1096          * Mellanox OFED 4.4-1.0.0.0.
1097          */
1098         if (tunnel && flow->layers & MLX5_FLOW_LAYER_GRE)
1099                 return rte_flow_error_set(error, ENOTSUP,
1100                                           RTE_FLOW_ERROR_TYPE_ITEM,
1101                                           item,
1102                                           "IPv6 inside a GRE tunnel is"
1103                                           " not recognised.");
1104         if (!mask)
1105                 mask = &rte_flow_item_ipv6_mask;
1106         ret = mlx5_flow_item_acceptable
1107                 (item, (const uint8_t *)mask,
1108                  (const uint8_t *)&nic_mask,
1109                  sizeof(struct rte_flow_item_ipv6), error);
1110         if (ret < 0)
1111                 return ret;
1112         flow->layers |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
1113                 MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1114         if (spec) {
1115                 unsigned int i;
1116                 uint32_t vtc_flow_val;
1117                 uint32_t vtc_flow_mask;
1118
1119                 memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
1120                        RTE_DIM(ipv6.val.src_ip));
1121                 memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
1122                        RTE_DIM(ipv6.val.dst_ip));
1123                 memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
1124                        RTE_DIM(ipv6.mask.src_ip));
1125                 memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
1126                        RTE_DIM(ipv6.mask.dst_ip));
1127                 vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
1128                 vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
1129                 ipv6.val.flow_label =
1130                         rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >>
1131                                          IPV6_HDR_FL_SHIFT);
1132                 ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >>
1133                                          IPV6_HDR_TC_SHIFT;
1134                 ipv6.val.next_hdr = spec->hdr.proto;
1135                 ipv6.val.hop_limit = spec->hdr.hop_limits;
1136                 ipv6.mask.flow_label =
1137                         rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >>
1138                                          IPV6_HDR_FL_SHIFT);
1139                 ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >>
1140                                           IPV6_HDR_TC_SHIFT;
1141                 ipv6.mask.next_hdr = mask->hdr.proto;
1142                 ipv6.mask.hop_limit = mask->hdr.hop_limits;
1143                 /* Remove unwanted bits from values. */
1144                 for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
1145                         ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
1146                         ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
1147                 }
1148                 ipv6.val.flow_label &= ipv6.mask.flow_label;
1149                 ipv6.val.traffic_class &= ipv6.mask.traffic_class;
1150                 ipv6.val.next_hdr &= ipv6.mask.next_hdr;
1151                 ipv6.val.hop_limit &= ipv6.mask.hop_limit;
1152         }
1153         flow->l3_protocol_en = !!ipv6.mask.next_hdr;
1154         flow->l3_protocol = ipv6.val.next_hdr;
1155         if (size <= flow_size) {
1156                 mlx5_flow_verbs_hashfields_adjust
1157                         (flow, tunnel,
1158                          (ETH_RSS_IPV6 | ETH_RSS_NONFRAG_IPV6_OTHER),
1159                          (IBV_RX_HASH_SRC_IPV6 | IBV_RX_HASH_DST_IPV6));
1160                 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L3;
1161                 mlx5_flow_spec_verbs_add(flow, &ipv6, size);
1162         }
1163         return size;
1164 }
1165
1166 /**
1167  * Convert the @p item into a Verbs specification after ensuring the NIC
1168  * will understand and process it correctly.
1169  * If the necessary size for the conversion is greater than the @p flow_size,
1170  * nothing is written in @p flow, the validation is still performed.
1171  *
1172  * @param[in] item
1173  *   Item specification.
1174  * @param[in, out] flow
1175  *   Pointer to flow structure.
1176  * @param[in] flow_size
1177  *   Size in bytes of the available space in @p flow, if too small, nothing is
1178  *   written.
1179  * @param[out] error
1180  *   Pointer to error structure.
1181  *
1182  * @return
1183  *   On success the number of bytes consumed/necessary, if the returned value
1184  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
1185  *   otherwise another call with this returned memory size should be done.
1186  *   On error, a negative errno value is returned and rte_errno is set.
1187  */
1188 static int
1189 mlx5_flow_item_udp(const struct rte_flow_item *item, struct rte_flow *flow,
1190                    const size_t flow_size, struct rte_flow_error *error)
1191 {
1192         const struct rte_flow_item_udp *spec = item->spec;
1193         const struct rte_flow_item_udp *mask = item->mask;
1194         const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
1195         unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
1196         struct ibv_flow_spec_tcp_udp udp = {
1197                 .type = IBV_FLOW_SPEC_UDP | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
1198                 .size = size,
1199         };
1200         int ret;
1201
1202         if (flow->l3_protocol_en && flow->l3_protocol != MLX5_IP_PROTOCOL_UDP)
1203                 return rte_flow_error_set(error, ENOTSUP,
1204                                           RTE_FLOW_ERROR_TYPE_ITEM,
1205                                           item,
1206                                           "protocol filtering not compatible"
1207                                           " with UDP layer");
1208         if (!(flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
1209                               MLX5_FLOW_LAYER_OUTER_L3)))
1210                 return rte_flow_error_set(error, ENOTSUP,
1211                                           RTE_FLOW_ERROR_TYPE_ITEM,
1212                                           item,
1213                                           "L3 is mandatory to filter"
1214                                           " on L4");
1215         if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
1216                             MLX5_FLOW_LAYER_OUTER_L4))
1217                 return rte_flow_error_set(error, ENOTSUP,
1218                                           RTE_FLOW_ERROR_TYPE_ITEM,
1219                                           item,
1220                                           "L4 layer is already"
1221                                           " present");
1222         if (!mask)
1223                 mask = &rte_flow_item_udp_mask;
1224         ret = mlx5_flow_item_acceptable
1225                 (item, (const uint8_t *)mask,
1226                  (const uint8_t *)&rte_flow_item_udp_mask,
1227                  sizeof(struct rte_flow_item_udp), error);
1228         if (ret < 0)
1229                 return ret;
1230         flow->layers |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP :
1231                 MLX5_FLOW_LAYER_OUTER_L4_UDP;
1232         if (spec) {
1233                 udp.val.dst_port = spec->hdr.dst_port;
1234                 udp.val.src_port = spec->hdr.src_port;
1235                 udp.mask.dst_port = mask->hdr.dst_port;
1236                 udp.mask.src_port = mask->hdr.src_port;
1237                 /* Remove unwanted bits from values. */
1238                 udp.val.src_port &= udp.mask.src_port;
1239                 udp.val.dst_port &= udp.mask.dst_port;
1240         }
1241         if (size <= flow_size) {
1242                 mlx5_flow_verbs_hashfields_adjust(flow, tunnel, ETH_RSS_UDP,
1243                                                   (IBV_RX_HASH_SRC_PORT_UDP |
1244                                                    IBV_RX_HASH_DST_PORT_UDP));
1245                 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L4;
1246                 mlx5_flow_spec_verbs_add(flow, &udp, size);
1247         }
1248         return size;
1249 }
1250
1251 /**
1252  * Convert the @p item into a Verbs specification after ensuring the NIC
1253  * will understand and process it correctly.
1254  * If the necessary size for the conversion is greater than the @p flow_size,
1255  * nothing is written in @p flow, the validation is still performed.
1256  *
1257  * @param[in] item
1258  *   Item specification.
1259  * @param[in, out] flow
1260  *   Pointer to flow structure.
1261  * @param[in] flow_size
1262  *   Size in bytes of the available space in @p flow, if too small, nothing is
1263  *   written.
1264  * @param[out] error
1265  *   Pointer to error structure.
1266  *
1267  * @return
1268  *   On success the number of bytes consumed/necessary, if the returned value
1269  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
1270  *   otherwise another call with this returned memory size should be done.
1271  *   On error, a negative errno value is returned and rte_errno is set.
1272  */
1273 static int
1274 mlx5_flow_item_tcp(const struct rte_flow_item *item, struct rte_flow *flow,
1275                    const size_t flow_size, struct rte_flow_error *error)
1276 {
1277         const struct rte_flow_item_tcp *spec = item->spec;
1278         const struct rte_flow_item_tcp *mask = item->mask;
1279         const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
1280         unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
1281         struct ibv_flow_spec_tcp_udp tcp = {
1282                 .type = IBV_FLOW_SPEC_TCP | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
1283                 .size = size,
1284         };
1285         int ret;
1286
1287         if (flow->l3_protocol_en && flow->l3_protocol != MLX5_IP_PROTOCOL_TCP)
1288                 return rte_flow_error_set(error, ENOTSUP,
1289                                           RTE_FLOW_ERROR_TYPE_ITEM,
1290                                           item,
1291                                           "protocol filtering not compatible"
1292                                           " with TCP layer");
1293         if (!(flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
1294                               MLX5_FLOW_LAYER_OUTER_L3)))
1295                 return rte_flow_error_set(error, ENOTSUP,
1296                                           RTE_FLOW_ERROR_TYPE_ITEM,
1297                                           item,
1298                                           "L3 is mandatory to filter on L4");
1299         if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
1300                             MLX5_FLOW_LAYER_OUTER_L4))
1301                 return rte_flow_error_set(error, ENOTSUP,
1302                                           RTE_FLOW_ERROR_TYPE_ITEM,
1303                                           item,
1304                                           "L4 layer is already present");
1305         if (!mask)
1306                 mask = &rte_flow_item_tcp_mask;
1307         ret = mlx5_flow_item_acceptable
1308                 (item, (const uint8_t *)mask,
1309                  (const uint8_t *)&rte_flow_item_tcp_mask,
1310                  sizeof(struct rte_flow_item_tcp), error);
1311         if (ret < 0)
1312                 return ret;
1313         flow->layers |=  tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP :
1314                 MLX5_FLOW_LAYER_OUTER_L4_TCP;
1315         if (spec) {
1316                 tcp.val.dst_port = spec->hdr.dst_port;
1317                 tcp.val.src_port = spec->hdr.src_port;
1318                 tcp.mask.dst_port = mask->hdr.dst_port;
1319                 tcp.mask.src_port = mask->hdr.src_port;
1320                 /* Remove unwanted bits from values. */
1321                 tcp.val.src_port &= tcp.mask.src_port;
1322                 tcp.val.dst_port &= tcp.mask.dst_port;
1323         }
1324         if (size <= flow_size) {
1325                 mlx5_flow_verbs_hashfields_adjust(flow, tunnel, ETH_RSS_TCP,
1326                                                   (IBV_RX_HASH_SRC_PORT_TCP |
1327                                                    IBV_RX_HASH_DST_PORT_TCP));
1328                 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L4;
1329                 mlx5_flow_spec_verbs_add(flow, &tcp, size);
1330         }
1331         return size;
1332 }
1333
1334 /**
1335  * Convert the @p item into a Verbs specification after ensuring the NIC
1336  * will understand and process it correctly.
1337  * If the necessary size for the conversion is greater than the @p flow_size,
1338  * nothing is written in @p flow, the validation is still performed.
1339  *
1340  * @param[in] item
1341  *   Item specification.
1342  * @param[in, out] flow
1343  *   Pointer to flow structure.
1344  * @param[in] flow_size
1345  *   Size in bytes of the available space in @p flow, if too small, nothing is
1346  *   written.
1347  * @param[out] error
1348  *   Pointer to error structure.
1349  *
1350  * @return
1351  *   On success the number of bytes consumed/necessary, if the returned value
1352  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
1353  *   otherwise another call with this returned memory size should be done.
1354  *   On error, a negative errno value is returned and rte_errno is set.
1355  */
1356 static int
1357 mlx5_flow_item_vxlan(const struct rte_flow_item *item, struct rte_flow *flow,
1358                      const size_t flow_size, struct rte_flow_error *error)
1359 {
1360         const struct rte_flow_item_vxlan *spec = item->spec;
1361         const struct rte_flow_item_vxlan *mask = item->mask;
1362         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1363         struct ibv_flow_spec_tunnel vxlan = {
1364                 .type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
1365                 .size = size,
1366         };
1367         int ret;
1368         union vni {
1369                 uint32_t vlan_id;
1370                 uint8_t vni[4];
1371         } id = { .vlan_id = 0, };
1372
1373         if (flow->layers & MLX5_FLOW_LAYER_TUNNEL)
1374                 return rte_flow_error_set(error, ENOTSUP,
1375                                           RTE_FLOW_ERROR_TYPE_ITEM,
1376                                           item,
1377                                           "a tunnel is already present");
1378         /*
1379          * Verify only UDPv4 is present as defined in
1380          * https://tools.ietf.org/html/rfc7348
1381          */
1382         if (!(flow->layers & MLX5_FLOW_LAYER_OUTER_L4_UDP))
1383                 return rte_flow_error_set(error, ENOTSUP,
1384                                           RTE_FLOW_ERROR_TYPE_ITEM,
1385                                           item,
1386                                           "no outer UDP layer found");
1387         if (!mask)
1388                 mask = &rte_flow_item_vxlan_mask;
1389         ret = mlx5_flow_item_acceptable
1390                 (item, (const uint8_t *)mask,
1391                  (const uint8_t *)&rte_flow_item_vxlan_mask,
1392                  sizeof(struct rte_flow_item_vxlan), error);
1393         if (ret < 0)
1394                 return ret;
1395         if (spec) {
1396                 memcpy(&id.vni[1], spec->vni, 3);
1397                 vxlan.val.tunnel_id = id.vlan_id;
1398                 memcpy(&id.vni[1], mask->vni, 3);
1399                 vxlan.mask.tunnel_id = id.vlan_id;
1400                 /* Remove unwanted bits from values. */
1401                 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
1402         }
1403         /*
1404          * Tunnel id 0 is equivalent as not adding a VXLAN layer, if
1405          * only this layer is defined in the Verbs specification it is
1406          * interpreted as wildcard and all packets will match this
1407          * rule, if it follows a full stack layer (ex: eth / ipv4 /
1408          * udp), all packets matching the layers before will also
1409          * match this rule.  To avoid such situation, VNI 0 is
1410          * currently refused.
1411          */
1412         if (!vxlan.val.tunnel_id)
1413                 return rte_flow_error_set(error, EINVAL,
1414                                           RTE_FLOW_ERROR_TYPE_ITEM,
1415                                           item,
1416                                           "VXLAN vni cannot be 0");
1417         if (!(flow->layers & MLX5_FLOW_LAYER_OUTER))
1418                 return rte_flow_error_set(error, EINVAL,
1419                                           RTE_FLOW_ERROR_TYPE_ITEM,
1420                                           item,
1421                                           "VXLAN tunnel must be fully defined");
1422         if (size <= flow_size) {
1423                 mlx5_flow_spec_verbs_add(flow, &vxlan, size);
1424                 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2;
1425         }
1426         flow->layers |= MLX5_FLOW_LAYER_VXLAN;
1427         return size;
1428 }
1429
1430 /**
1431  * Convert the @p item into a Verbs specification after ensuring the NIC
1432  * will understand and process it correctly.
1433  * If the necessary size for the conversion is greater than the @p flow_size,
1434  * nothing is written in @p flow, the validation is still performed.
1435  *
1436  * @param dev
1437  *   Pointer to Ethernet device.
1438  * @param[in] item
1439  *   Item specification.
1440  * @param[in, out] flow
1441  *   Pointer to flow structure.
1442  * @param[in] flow_size
1443  *   Size in bytes of the available space in @p flow, if too small, nothing is
1444  *   written.
1445  * @param[out] error
1446  *   Pointer to error structure.
1447  *
1448  * @return
1449  *   On success the number of bytes consumed/necessary, if the returned value
1450  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
1451  *   otherwise another call with this returned memory size should be done.
1452  *   On error, a negative errno value is returned and rte_errno is set.
1453  */
1454 static int
1455 mlx5_flow_item_vxlan_gpe(struct rte_eth_dev *dev,
1456                          const struct rte_flow_item *item,
1457                          struct rte_flow *flow, const size_t flow_size,
1458                          struct rte_flow_error *error)
1459 {
1460         const struct rte_flow_item_vxlan_gpe *spec = item->spec;
1461         const struct rte_flow_item_vxlan_gpe *mask = item->mask;
1462         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1463         struct ibv_flow_spec_tunnel vxlan_gpe = {
1464                 .type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
1465                 .size = size,
1466         };
1467         int ret;
1468         union vni {
1469                 uint32_t vlan_id;
1470                 uint8_t vni[4];
1471         } id = { .vlan_id = 0, };
1472
1473         if (!((struct priv *)dev->data->dev_private)->config.l3_vxlan_en)
1474                 return rte_flow_error_set(error, ENOTSUP,
1475                                           RTE_FLOW_ERROR_TYPE_ITEM,
1476                                           item,
1477                                           "L3 VXLAN is not enabled by device"
1478                                           " parameter and/or not configured in"
1479                                           " firmware");
1480         if (flow->layers & MLX5_FLOW_LAYER_TUNNEL)
1481                 return rte_flow_error_set(error, ENOTSUP,
1482                                           RTE_FLOW_ERROR_TYPE_ITEM,
1483                                           item,
1484                                           "a tunnel is already present");
1485         /*
1486          * Verify only UDPv4 is present as defined in
1487          * https://tools.ietf.org/html/rfc7348
1488          */
1489         if (!(flow->layers & MLX5_FLOW_LAYER_OUTER_L4_UDP))
1490                 return rte_flow_error_set(error, ENOTSUP,
1491                                           RTE_FLOW_ERROR_TYPE_ITEM,
1492                                           item,
1493                                           "no outer UDP layer found");
1494         if (!mask)
1495                 mask = &rte_flow_item_vxlan_gpe_mask;
1496         ret = mlx5_flow_item_acceptable
1497                 (item, (const uint8_t *)mask,
1498                  (const uint8_t *)&rte_flow_item_vxlan_gpe_mask,
1499                  sizeof(struct rte_flow_item_vxlan_gpe), error);
1500         if (ret < 0)
1501                 return ret;
1502         if (spec) {
1503                 memcpy(&id.vni[1], spec->vni, 3);
1504                 vxlan_gpe.val.tunnel_id = id.vlan_id;
1505                 memcpy(&id.vni[1], mask->vni, 3);
1506                 vxlan_gpe.mask.tunnel_id = id.vlan_id;
1507                 if (spec->protocol)
1508                         return rte_flow_error_set
1509                                 (error, EINVAL,
1510                                  RTE_FLOW_ERROR_TYPE_ITEM,
1511                                  item,
1512                                  "VxLAN-GPE protocol not supported");
1513                 /* Remove unwanted bits from values. */
1514                 vxlan_gpe.val.tunnel_id &= vxlan_gpe.mask.tunnel_id;
1515         }
1516         /*
1517          * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
1518          * layer is defined in the Verbs specification it is interpreted as
1519          * wildcard and all packets will match this rule, if it follows a full
1520          * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
1521          * before will also match this rule.  To avoid such situation, VNI 0
1522          * is currently refused.
1523          */
1524         if (!vxlan_gpe.val.tunnel_id)
1525                 return rte_flow_error_set(error, EINVAL,
1526                                           RTE_FLOW_ERROR_TYPE_ITEM,
1527                                           item,
1528                                           "VXLAN-GPE vni cannot be 0");
1529         if (!(flow->layers & MLX5_FLOW_LAYER_OUTER))
1530                 return rte_flow_error_set(error, EINVAL,
1531                                           RTE_FLOW_ERROR_TYPE_ITEM,
1532                                           item,
1533                                           "VXLAN-GPE tunnel must be fully"
1534                                           " defined");
1535         if (size <= flow_size) {
1536                 mlx5_flow_spec_verbs_add(flow, &vxlan_gpe, size);
1537                 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2;
1538         }
1539         flow->layers |= MLX5_FLOW_LAYER_VXLAN_GPE;
1540         return size;
1541 }
1542
1543 /**
1544  * Update the protocol in Verbs IPv4/IPv6 spec.
1545  *
1546  * @param[in, out] attr
1547  *   Pointer to Verbs attributes structure.
1548  * @param[in] search
1549  *   Specification type to search in order to update the IP protocol.
1550  * @param[in] protocol
1551  *   Protocol value to set if none is present in the specification.
1552  */
1553 static void
1554 mlx5_flow_item_gre_ip_protocol_update(struct ibv_flow_attr *attr,
1555                                       enum ibv_flow_spec_type search,
1556                                       uint8_t protocol)
1557 {
1558         unsigned int i;
1559         struct ibv_spec_header *hdr = (struct ibv_spec_header *)
1560                 ((uint8_t *)attr + sizeof(struct ibv_flow_attr));
1561
1562         if (!attr)
1563                 return;
1564         for (i = 0; i != attr->num_of_specs; ++i) {
1565                 if (hdr->type == search) {
1566                         union {
1567                                 struct ibv_flow_spec_ipv4_ext *ipv4;
1568                                 struct ibv_flow_spec_ipv6 *ipv6;
1569                         } ip;
1570
1571                         switch (search) {
1572                         case IBV_FLOW_SPEC_IPV4_EXT:
1573                                 ip.ipv4 = (struct ibv_flow_spec_ipv4_ext *)hdr;
1574                                 if (!ip.ipv4->val.proto) {
1575                                         ip.ipv4->val.proto = protocol;
1576                                         ip.ipv4->mask.proto = 0xff;
1577                                 }
1578                                 break;
1579                         case IBV_FLOW_SPEC_IPV6:
1580                                 ip.ipv6 = (struct ibv_flow_spec_ipv6 *)hdr;
1581                                 if (!ip.ipv6->val.next_hdr) {
1582                                         ip.ipv6->val.next_hdr = protocol;
1583                                         ip.ipv6->mask.next_hdr = 0xff;
1584                                 }
1585                                 break;
1586                         default:
1587                                 break;
1588                         }
1589                         break;
1590                 }
1591                 hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size);
1592         }
1593 }
1594
1595 /**
1596  * Convert the @p item into a Verbs specification after ensuring the NIC
1597  * will understand and process it correctly.
1598  * It will also update the previous L3 layer with the protocol value matching
1599  * the GRE.
1600  * If the necessary size for the conversion is greater than the @p flow_size,
1601  * nothing is written in @p flow, the validation is still performed.
1602  *
1603  * @param dev
1604  *   Pointer to Ethernet device.
1605  * @param[in] item
1606  *   Item specification.
1607  * @param[in, out] flow
1608  *   Pointer to flow structure.
1609  * @param[in] flow_size
1610  *   Size in bytes of the available space in @p flow, if too small, nothing is
1611  *   written.
1612  * @param[out] error
1613  *   Pointer to error structure.
1614  *
1615  * @return
1616  *   On success the number of bytes consumed/necessary, if the returned value
1617  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
1618  *   otherwise another call with this returned memory size should be done.
1619  *   On error, a negative errno value is returned and rte_errno is set.
1620  */
1621 static int
1622 mlx5_flow_item_gre(const struct rte_flow_item *item,
1623                    struct rte_flow *flow, const size_t flow_size,
1624                    struct rte_flow_error *error)
1625 {
1626         struct mlx5_flow_verbs *verbs = flow->cur_verbs;
1627         const struct rte_flow_item_gre *spec = item->spec;
1628         const struct rte_flow_item_gre *mask = item->mask;
1629 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
1630         unsigned int size = sizeof(struct ibv_flow_spec_gre);
1631         struct ibv_flow_spec_gre tunnel = {
1632                 .type = IBV_FLOW_SPEC_GRE,
1633                 .size = size,
1634         };
1635 #else
1636         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1637         struct ibv_flow_spec_tunnel tunnel = {
1638                 .type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
1639                 .size = size,
1640         };
1641 #endif
1642         int ret;
1643
1644         if (flow->l3_protocol_en && flow->l3_protocol != MLX5_IP_PROTOCOL_GRE)
1645                 return rte_flow_error_set(error, ENOTSUP,
1646                                           RTE_FLOW_ERROR_TYPE_ITEM,
1647                                           item,
1648                                           "protocol filtering not compatible"
1649                                           " with this GRE layer");
1650         if (flow->layers & MLX5_FLOW_LAYER_TUNNEL)
1651                 return rte_flow_error_set(error, ENOTSUP,
1652                                           RTE_FLOW_ERROR_TYPE_ITEM,
1653                                           item,
1654                                           "a tunnel is already present");
1655         if (!(flow->layers & MLX5_FLOW_LAYER_OUTER_L3))
1656                 return rte_flow_error_set(error, ENOTSUP,
1657                                           RTE_FLOW_ERROR_TYPE_ITEM,
1658                                           item,
1659                                           "L3 Layer is missing");
1660         if (!mask)
1661                 mask = &rte_flow_item_gre_mask;
1662         ret = mlx5_flow_item_acceptable
1663                 (item, (const uint8_t *)mask,
1664                  (const uint8_t *)&rte_flow_item_gre_mask,
1665                  sizeof(struct rte_flow_item_gre), error);
1666         if (ret < 0)
1667                 return ret;
1668 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
1669         if (spec) {
1670                 tunnel.val.c_ks_res0_ver = spec->c_rsvd0_ver;
1671                 tunnel.val.protocol = spec->protocol;
1672                 tunnel.mask.c_ks_res0_ver = mask->c_rsvd0_ver;
1673                 tunnel.mask.protocol = mask->protocol;
1674                 /* Remove unwanted bits from values. */
1675                 tunnel.val.c_ks_res0_ver &= tunnel.mask.c_ks_res0_ver;
1676                 tunnel.val.protocol &= tunnel.mask.protocol;
1677                 tunnel.val.key &= tunnel.mask.key;
1678         }
1679 #else
1680         if (spec && (spec->protocol & mask->protocol))
1681                 return rte_flow_error_set(error, ENOTSUP,
1682                                           RTE_FLOW_ERROR_TYPE_ITEM,
1683                                           item,
1684                                           "without MPLS support the"
1685                                           " specification cannot be used for"
1686                                           " filtering");
1687 #endif /* !HAVE_IBV_DEVICE_MPLS_SUPPORT */
1688         if (size <= flow_size) {
1689                 if (flow->layers & MLX5_FLOW_LAYER_OUTER_L3_IPV4)
1690                         mlx5_flow_item_gre_ip_protocol_update
1691                                 (verbs->attr, IBV_FLOW_SPEC_IPV4_EXT,
1692                                  MLX5_IP_PROTOCOL_GRE);
1693                 else
1694                         mlx5_flow_item_gre_ip_protocol_update
1695                                 (verbs->attr, IBV_FLOW_SPEC_IPV6,
1696                                  MLX5_IP_PROTOCOL_GRE);
1697                 mlx5_flow_spec_verbs_add(flow, &tunnel, size);
1698                 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2;
1699         }
1700         flow->layers |= MLX5_FLOW_LAYER_GRE;
1701         return size;
1702 }
1703
1704 /**
1705  * Convert the @p item into a Verbs specification after ensuring the NIC
1706  * will understand and process it correctly.
1707  * If the necessary size for the conversion is greater than the @p flow_size,
1708  * nothing is written in @p flow, the validation is still performed.
1709  *
1710  * @param[in] item
1711  *   Item specification.
1712  * @param[in, out] flow
1713  *   Pointer to flow structure.
1714  * @param[in] flow_size
1715  *   Size in bytes of the available space in @p flow, if too small, nothing is
1716  *   written.
1717  * @param[out] error
1718  *   Pointer to error structure.
1719  *
1720  * @return
1721  *   On success the number of bytes consumed/necessary, if the returned value
1722  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
1723  *   otherwise another call with this returned memory size should be done.
1724  *   On error, a negative errno value is returned and rte_errno is set.
1725  */
1726 static int
1727 mlx5_flow_item_mpls(const struct rte_flow_item *item __rte_unused,
1728                     struct rte_flow *flow __rte_unused,
1729                     const size_t flow_size __rte_unused,
1730                     struct rte_flow_error *error)
1731 {
1732 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
1733         const struct rte_flow_item_mpls *spec = item->spec;
1734         const struct rte_flow_item_mpls *mask = item->mask;
1735         unsigned int size = sizeof(struct ibv_flow_spec_mpls);
1736         struct ibv_flow_spec_mpls mpls = {
1737                 .type = IBV_FLOW_SPEC_MPLS,
1738                 .size = size,
1739         };
1740         int ret;
1741
1742         if (flow->l3_protocol_en && flow->l3_protocol != MLX5_IP_PROTOCOL_MPLS)
1743                 return rte_flow_error_set(error, ENOTSUP,
1744                                           RTE_FLOW_ERROR_TYPE_ITEM,
1745                                           item,
1746                                           "protocol filtering not compatible"
1747                                           " with MPLS layer");
1748         if (flow->layers & MLX5_FLOW_LAYER_TUNNEL)
1749                 return rte_flow_error_set(error, ENOTSUP,
1750                                           RTE_FLOW_ERROR_TYPE_ITEM,
1751                                           item,
1752                                           "a tunnel is already"
1753                                           " present");
1754         if (!mask)
1755                 mask = &rte_flow_item_mpls_mask;
1756         ret = mlx5_flow_item_acceptable
1757                 (item, (const uint8_t *)mask,
1758                  (const uint8_t *)&rte_flow_item_mpls_mask,
1759                  sizeof(struct rte_flow_item_mpls), error);
1760         if (ret < 0)
1761                 return ret;
1762         if (spec) {
1763                 memcpy(&mpls.val.label, spec, sizeof(mpls.val.label));
1764                 memcpy(&mpls.mask.label, mask, sizeof(mpls.mask.label));
1765                 /* Remove unwanted bits from values.  */
1766                 mpls.val.label &= mpls.mask.label;
1767         }
1768         if (size <= flow_size) {
1769                 mlx5_flow_spec_verbs_add(flow, &mpls, size);
1770                 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2;
1771         }
1772         flow->layers |= MLX5_FLOW_LAYER_MPLS;
1773         return size;
1774 #endif /* !HAVE_IBV_DEVICE_MPLS_SUPPORT */
1775         return rte_flow_error_set(error, ENOTSUP,
1776                                   RTE_FLOW_ERROR_TYPE_ITEM,
1777                                   item,
1778                                   "MPLS is not supported by Verbs, please"
1779                                   " update.");
1780 }
1781
1782 /**
1783  * Convert the @p pattern into a Verbs specifications after ensuring the NIC
1784  * will understand and process it correctly.
1785  * The conversion is performed item per item, each of them is written into
1786  * the @p flow if its size is lesser or equal to @p flow_size.
1787  * Validation and memory consumption computation are still performed until the
1788  * end of @p pattern, unless an error is encountered.
1789  *
1790  * @param[in] pattern
1791  *   Flow pattern.
1792  * @param[in, out] flow
1793  *   Pointer to the rte_flow structure.
1794  * @param[in] flow_size
1795  *   Size in bytes of the available space in @p flow, if too small some
1796  *   garbage may be present.
1797  * @param[out] error
1798  *   Pointer to error structure.
1799  *
1800  * @return
1801  *   On success the number of bytes consumed/necessary, if the returned value
1802  *   is lesser or equal to @p flow_size, the @pattern  has fully been
1803  *   converted, otherwise another call with this returned memory size should
1804  *   be done.
1805  *   On error, a negative errno value is returned and rte_errno is set.
1806  */
1807 static int
1808 mlx5_flow_items(struct rte_eth_dev *dev,
1809                 const struct rte_flow_item pattern[],
1810                 struct rte_flow *flow, const size_t flow_size,
1811                 struct rte_flow_error *error)
1812 {
1813         int remain = flow_size;
1814         size_t size = 0;
1815
1816         for (; pattern->type != RTE_FLOW_ITEM_TYPE_END; pattern++) {
1817                 int ret = 0;
1818
1819                 switch (pattern->type) {
1820                 case RTE_FLOW_ITEM_TYPE_VOID:
1821                         break;
1822                 case RTE_FLOW_ITEM_TYPE_ETH:
1823                         ret = mlx5_flow_item_eth(pattern, flow, remain, error);
1824                         break;
1825                 case RTE_FLOW_ITEM_TYPE_VLAN:
1826                         ret = mlx5_flow_item_vlan(pattern, flow, remain, error);
1827                         break;
1828                 case RTE_FLOW_ITEM_TYPE_IPV4:
1829                         ret = mlx5_flow_item_ipv4(pattern, flow, remain, error);
1830                         break;
1831                 case RTE_FLOW_ITEM_TYPE_IPV6:
1832                         ret = mlx5_flow_item_ipv6(pattern, flow, remain, error);
1833                         break;
1834                 case RTE_FLOW_ITEM_TYPE_UDP:
1835                         ret = mlx5_flow_item_udp(pattern, flow, remain, error);
1836                         break;
1837                 case RTE_FLOW_ITEM_TYPE_TCP:
1838                         ret = mlx5_flow_item_tcp(pattern, flow, remain, error);
1839                         break;
1840                 case RTE_FLOW_ITEM_TYPE_VXLAN:
1841                         ret = mlx5_flow_item_vxlan(pattern, flow, remain,
1842                                                    error);
1843                         break;
1844                 case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
1845                         ret = mlx5_flow_item_vxlan_gpe(dev, pattern, flow,
1846                                                        remain, error);
1847                         break;
1848                 case RTE_FLOW_ITEM_TYPE_GRE:
1849                         ret = mlx5_flow_item_gre(pattern, flow, remain, error);
1850                         break;
1851                 case RTE_FLOW_ITEM_TYPE_MPLS:
1852                         ret = mlx5_flow_item_mpls(pattern, flow, remain, error);
1853                         break;
1854                 default:
1855                         return rte_flow_error_set(error, ENOTSUP,
1856                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1857                                                   pattern,
1858                                                   "item not supported");
1859                 }
1860                 if (ret < 0)
1861                         return ret;
1862                 if (remain > ret)
1863                         remain -= ret;
1864                 else
1865                         remain = 0;
1866                 size += ret;
1867         }
1868         if (!flow->layers) {
1869                 const struct rte_flow_item item = {
1870                         .type = RTE_FLOW_ITEM_TYPE_ETH,
1871                 };
1872
1873                 return mlx5_flow_item_eth(&item, flow, flow_size, error);
1874         }
1875         return size;
1876 }
1877
1878 /**
1879  * Convert the @p action into a Verbs specification after ensuring the NIC
1880  * will understand and process it correctly.
1881  * If the necessary size for the conversion is greater than the @p flow_size,
1882  * nothing is written in @p flow, the validation is still performed.
1883  *
1884  * @param[in] action
1885  *   Action configuration.
1886  * @param[in, out] flow
1887  *   Pointer to flow structure.
1888  * @param[in] flow_size
1889  *   Size in bytes of the available space in @p flow, if too small, nothing is
1890  *   written.
1891  * @param[out] error
1892  *   Pointer to error structure.
1893  *
1894  * @return
1895  *   On success the number of bytes consumed/necessary, if the returned value
1896  *   is lesser or equal to @p flow_size, the @p action has fully been
1897  *   converted, otherwise another call with this returned memory size should
1898  *   be done.
1899  *   On error, a negative errno value is returned and rte_errno is set.
1900  */
1901 static int
1902 mlx5_flow_action_drop(const struct rte_flow_action *action,
1903                       struct rte_flow *flow, const size_t flow_size,
1904                       struct rte_flow_error *error)
1905 {
1906         unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
1907         struct ibv_flow_spec_action_drop drop = {
1908                         .type = IBV_FLOW_SPEC_ACTION_DROP,
1909                         .size = size,
1910         };
1911
1912         if (flow->fate)
1913                 return rte_flow_error_set(error, ENOTSUP,
1914                                           RTE_FLOW_ERROR_TYPE_ACTION,
1915                                           action,
1916                                           "multiple fate actions are not"
1917                                           " supported");
1918         if (flow->modifier & (MLX5_FLOW_MOD_FLAG | MLX5_FLOW_MOD_MARK))
1919                 return rte_flow_error_set(error, ENOTSUP,
1920                                           RTE_FLOW_ERROR_TYPE_ACTION,
1921                                           action,
1922                                           "drop is not compatible with"
1923                                           " flag/mark action");
1924         if (size < flow_size)
1925                 mlx5_flow_spec_verbs_add(flow, &drop, size);
1926         flow->fate |= MLX5_FLOW_FATE_DROP;
1927         return size;
1928 }
1929
1930 /**
1931  * Convert the @p action into @p flow after ensuring the NIC will understand
1932  * and process it correctly.
1933  *
1934  * @param[in] dev
1935  *   Pointer to Ethernet device structure.
1936  * @param[in] action
1937  *   Action configuration.
1938  * @param[in, out] flow
1939  *   Pointer to flow structure.
1940  * @param[out] error
1941  *   Pointer to error structure.
1942  *
1943  * @return
1944  *   0 on success, a negative errno value otherwise and rte_errno is set.
1945  */
1946 static int
1947 mlx5_flow_action_queue(struct rte_eth_dev *dev,
1948                        const struct rte_flow_action *action,
1949                        struct rte_flow *flow,
1950                        struct rte_flow_error *error)
1951 {
1952         struct priv *priv = dev->data->dev_private;
1953         const struct rte_flow_action_queue *queue = action->conf;
1954
1955         if (flow->fate)
1956                 return rte_flow_error_set(error, ENOTSUP,
1957                                           RTE_FLOW_ERROR_TYPE_ACTION,
1958                                           action,
1959                                           "multiple fate actions are not"
1960                                           " supported");
1961         if (queue->index >= priv->rxqs_n)
1962                 return rte_flow_error_set(error, EINVAL,
1963                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1964                                           &queue->index,
1965                                           "queue index out of range");
1966         if (!(*priv->rxqs)[queue->index])
1967                 return rte_flow_error_set(error, EINVAL,
1968                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1969                                           &queue->index,
1970                                           "queue is not configured");
1971         if (flow->queue)
1972                 (*flow->queue)[0] = queue->index;
1973         flow->rss.queue_num = 1;
1974         flow->fate |= MLX5_FLOW_FATE_QUEUE;
1975         return 0;
1976 }
1977
1978 /**
1979  * Ensure the @p action will be understood and used correctly by the  NIC.
1980  *
1981  * @param dev
1982  *   Pointer to Ethernet device structure.
1983  * @param action[in]
1984  *   Pointer to flow actions array.
1985  * @param flow[in, out]
1986  *   Pointer to the rte_flow structure.
1987  * @param error[in, out]
1988  *   Pointer to error structure.
1989  *
1990  * @return
1991  *   On success @p flow->queue array and @p flow->rss are filled and valid.
1992  *   On error, a negative errno value is returned and rte_errno is set.
1993  */
1994 static int
1995 mlx5_flow_action_rss(struct rte_eth_dev *dev,
1996                      const struct rte_flow_action *action,
1997                      struct rte_flow *flow,
1998                      struct rte_flow_error *error)
1999 {
2000         struct priv *priv = dev->data->dev_private;
2001         const struct rte_flow_action_rss *rss = action->conf;
2002         unsigned int i;
2003
2004         if (flow->fate)
2005                 return rte_flow_error_set(error, ENOTSUP,
2006                                           RTE_FLOW_ERROR_TYPE_ACTION,
2007                                           action,
2008                                           "multiple fate actions are not"
2009                                           " supported");
2010         if (rss->func != RTE_ETH_HASH_FUNCTION_DEFAULT &&
2011             rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ)
2012                 return rte_flow_error_set(error, ENOTSUP,
2013                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
2014                                           &rss->func,
2015                                           "RSS hash function not supported");
2016 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
2017         if (rss->level > 2)
2018 #else
2019         if (rss->level > 1)
2020 #endif
2021                 return rte_flow_error_set(error, ENOTSUP,
2022                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
2023                                           &rss->level,
2024                                           "tunnel RSS is not supported");
2025         if (rss->key_len < MLX5_RSS_HASH_KEY_LEN)
2026                 return rte_flow_error_set(error, ENOTSUP,
2027                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
2028                                           &rss->key_len,
2029                                           "RSS hash key too small");
2030         if (rss->key_len > MLX5_RSS_HASH_KEY_LEN)
2031                 return rte_flow_error_set(error, ENOTSUP,
2032                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
2033                                           &rss->key_len,
2034                                           "RSS hash key too large");
2035         if (rss->queue_num > priv->config.ind_table_max_size)
2036                 return rte_flow_error_set(error, ENOTSUP,
2037                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
2038                                           &rss->queue_num,
2039                                           "number of queues too large");
2040         if (rss->types & MLX5_RSS_HF_MASK)
2041                 return rte_flow_error_set(error, ENOTSUP,
2042                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
2043                                           &rss->types,
2044                                           "some RSS protocols are not"
2045                                           " supported");
2046         for (i = 0; i != rss->queue_num; ++i) {
2047                 if (!(*priv->rxqs)[rss->queue[i]])
2048                         return rte_flow_error_set
2049                                 (error, EINVAL,
2050                                  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
2051                                  &rss->queue[i],
2052                                  "queue is not configured");
2053         }
2054         if (flow->queue)
2055                 memcpy((*flow->queue), rss->queue,
2056                        rss->queue_num * sizeof(uint16_t));
2057         flow->rss.queue_num = rss->queue_num;
2058         memcpy(flow->key, rss->key, MLX5_RSS_HASH_KEY_LEN);
2059         flow->rss.types = rss->types;
2060         flow->rss.level = rss->level;
2061         flow->fate |= MLX5_FLOW_FATE_RSS;
2062         return 0;
2063 }
2064
2065 /**
2066  * Convert the @p action into a Verbs specification after ensuring the NIC
2067  * will understand and process it correctly.
2068  * If the necessary size for the conversion is greater than the @p flow_size,
2069  * nothing is written in @p flow, the validation is still performed.
2070  *
2071  * @param[in] action
2072  *   Action configuration.
2073  * @param[in, out] flow
2074  *   Pointer to flow structure.
2075  * @param[in] flow_size
2076  *   Size in bytes of the available space in @p flow, if too small, nothing is
2077  *   written.
2078  * @param[out] error
2079  *   Pointer to error structure.
2080  *
2081  * @return
2082  *   On success the number of bytes consumed/necessary, if the returned value
2083  *   is lesser or equal to @p flow_size, the @p action has fully been
2084  *   converted, otherwise another call with this returned memory size should
2085  *   be done.
2086  *   On error, a negative errno value is returned and rte_errno is set.
2087  */
2088 static int
2089 mlx5_flow_action_flag(const struct rte_flow_action *action,
2090                       struct rte_flow *flow, const size_t flow_size,
2091                       struct rte_flow_error *error)
2092 {
2093         unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
2094         struct ibv_flow_spec_action_tag tag = {
2095                 .type = IBV_FLOW_SPEC_ACTION_TAG,
2096                 .size = size,
2097                 .tag_id = mlx5_flow_mark_set(MLX5_FLOW_MARK_DEFAULT),
2098         };
2099         struct mlx5_flow_verbs *verbs = flow->cur_verbs;
2100
2101         if (flow->modifier & MLX5_FLOW_MOD_FLAG)
2102                 return rte_flow_error_set(error, ENOTSUP,
2103                                           RTE_FLOW_ERROR_TYPE_ACTION,
2104                                           action,
2105                                           "flag action already present");
2106         if (flow->fate & MLX5_FLOW_FATE_DROP)
2107                 return rte_flow_error_set(error, ENOTSUP,
2108                                           RTE_FLOW_ERROR_TYPE_ACTION,
2109                                           action,
2110                                           "flag is not compatible with drop"
2111                                           " action");
2112         if (flow->modifier & MLX5_FLOW_MOD_MARK)
2113                 size = 0;
2114         else if (size <= flow_size && verbs)
2115                 mlx5_flow_spec_verbs_add(flow, &tag, size);
2116         flow->modifier |= MLX5_FLOW_MOD_FLAG;
2117         return size;
2118 }
2119
2120 /**
2121  * Update verbs specification to modify the flag to mark.
2122  *
2123  * @param[in, out] verbs
2124  *   Pointer to the mlx5_flow_verbs structure.
2125  * @param[in] mark_id
2126  *   Mark identifier to replace the flag.
2127  */
2128 static void
2129 mlx5_flow_verbs_mark_update(struct mlx5_flow_verbs *verbs, uint32_t mark_id)
2130 {
2131         struct ibv_spec_header *hdr;
2132         int i;
2133
2134         if (!verbs)
2135                 return;
2136         /* Update Verbs specification. */
2137         hdr = (struct ibv_spec_header *)verbs->specs;
2138         if (!hdr)
2139                 return;
2140         for (i = 0; i != verbs->attr->num_of_specs; ++i) {
2141                 if (hdr->type == IBV_FLOW_SPEC_ACTION_TAG) {
2142                         struct ibv_flow_spec_action_tag *t =
2143                                 (struct ibv_flow_spec_action_tag *)hdr;
2144
2145                         t->tag_id = mlx5_flow_mark_set(mark_id);
2146                 }
2147                 hdr = (struct ibv_spec_header *)((uintptr_t)hdr + hdr->size);
2148         }
2149 }
2150
2151 /**
2152  * Convert the @p action into @p flow (or by updating the already present
2153  * Flag Verbs specification) after ensuring the NIC will understand and
2154  * process it correctly.
2155  * If the necessary size for the conversion is greater than the @p flow_size,
2156  * nothing is written in @p flow, the validation is still performed.
2157  *
2158  * @param[in] action
2159  *   Action configuration.
2160  * @param[in, out] flow
2161  *   Pointer to flow structure.
2162  * @param[in] flow_size
2163  *   Size in bytes of the available space in @p flow, if too small, nothing is
2164  *   written.
2165  * @param[out] error
2166  *   Pointer to error structure.
2167  *
2168  * @return
2169  *   On success the number of bytes consumed/necessary, if the returned value
2170  *   is lesser or equal to @p flow_size, the @p action has fully been
2171  *   converted, otherwise another call with this returned memory size should
2172  *   be done.
2173  *   On error, a negative errno value is returned and rte_errno is set.
2174  */
2175 static int
2176 mlx5_flow_action_mark(const struct rte_flow_action *action,
2177                       struct rte_flow *flow, const size_t flow_size,
2178                       struct rte_flow_error *error)
2179 {
2180         const struct rte_flow_action_mark *mark = action->conf;
2181         unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
2182         struct ibv_flow_spec_action_tag tag = {
2183                 .type = IBV_FLOW_SPEC_ACTION_TAG,
2184                 .size = size,
2185         };
2186         struct mlx5_flow_verbs *verbs = flow->cur_verbs;
2187
2188         if (!mark)
2189                 return rte_flow_error_set(error, EINVAL,
2190                                           RTE_FLOW_ERROR_TYPE_ACTION,
2191                                           action,
2192                                           "configuration cannot be null");
2193         if (mark->id >= MLX5_FLOW_MARK_MAX)
2194                 return rte_flow_error_set(error, EINVAL,
2195                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
2196                                           &mark->id,
2197                                           "mark id must in 0 <= id < "
2198                                           RTE_STR(MLX5_FLOW_MARK_MAX));
2199         if (flow->modifier & MLX5_FLOW_MOD_MARK)
2200                 return rte_flow_error_set(error, ENOTSUP,
2201                                           RTE_FLOW_ERROR_TYPE_ACTION,
2202                                           action,
2203                                           "mark action already present");
2204         if (flow->fate & MLX5_FLOW_FATE_DROP)
2205                 return rte_flow_error_set(error, ENOTSUP,
2206                                           RTE_FLOW_ERROR_TYPE_ACTION,
2207                                           action,
2208                                           "mark is not compatible with drop"
2209                                           " action");
2210         if (flow->modifier & MLX5_FLOW_MOD_FLAG) {
2211                 mlx5_flow_verbs_mark_update(verbs, mark->id);
2212                 size = 0;
2213         } else if (size <= flow_size) {
2214                 tag.tag_id = mlx5_flow_mark_set(mark->id);
2215                 mlx5_flow_spec_verbs_add(flow, &tag, size);
2216         }
2217         flow->modifier |= MLX5_FLOW_MOD_MARK;
2218         return size;
2219 }
2220
2221 /**
2222  * Convert the @p action into a Verbs specification after ensuring the NIC
2223  * will understand and process it correctly.
2224  * If the necessary size for the conversion is greater than the @p flow_size,
2225  * nothing is written in @p flow, the validation is still performed.
2226  *
2227  * @param action[in]
2228  *   Action configuration.
2229  * @param flow[in, out]
2230  *   Pointer to flow structure.
2231  * @param flow_size[in]
2232  *   Size in bytes of the available space in @p flow, if too small, nothing is
2233  *   written.
2234  * @param error[int, out]
2235  *   Pointer to error structure.
2236  *
2237  * @return
2238  *   On success the number of bytes consumed/necessary, if the returned value
2239  *   is lesser or equal to @p flow_size, the @p action has fully been
2240  *   converted, otherwise another call with this returned memory size should
2241  *   be done.
2242  *   On error, a negative errno value is returned and rte_errno is set.
2243  */
2244 static int
2245 mlx5_flow_action_count(struct rte_eth_dev *dev,
2246                        const struct rte_flow_action *action,
2247                        struct rte_flow *flow,
2248                        const size_t flow_size __rte_unused,
2249                        struct rte_flow_error *error)
2250 {
2251         const struct rte_flow_action_count *count = action->conf;
2252 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
2253         unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
2254         struct ibv_flow_spec_counter_action counter = {
2255                 .type = IBV_FLOW_SPEC_ACTION_COUNT,
2256                 .size = size,
2257         };
2258 #endif
2259
2260         if (!flow->counter) {
2261                 flow->counter = mlx5_flow_counter_new(dev, count->shared,
2262                                                       count->id);
2263                 if (!flow->counter)
2264                         return rte_flow_error_set(error, ENOTSUP,
2265                                                   RTE_FLOW_ERROR_TYPE_ACTION,
2266                                                   action,
2267                                                   "cannot get counter"
2268                                                   " context.");
2269         }
2270         if (!((struct priv *)dev->data->dev_private)->config.flow_counter_en)
2271                 return rte_flow_error_set(error, ENOTSUP,
2272                                           RTE_FLOW_ERROR_TYPE_ACTION,
2273                                           action,
2274                                           "flow counters are not supported.");
2275         flow->modifier |= MLX5_FLOW_MOD_COUNT;
2276 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
2277         counter.counter_set_handle = flow->counter->cs->handle;
2278         if (size <= flow_size)
2279                 mlx5_flow_spec_verbs_add(flow, &counter, size);
2280         return size;
2281 #endif
2282         return 0;
2283 }
2284
2285 /**
2286  * Convert the @p action into @p flow after ensuring the NIC will understand
2287  * and process it correctly.
2288  * The conversion is performed action per action, each of them is written into
2289  * the @p flow if its size is lesser or equal to @p flow_size.
2290  * Validation and memory consumption computation are still performed until the
2291  * end of @p action, unless an error is encountered.
2292  *
2293  * @param[in] dev
2294  *   Pointer to Ethernet device structure.
2295  * @param[in] actions
2296  *   Pointer to flow actions array.
2297  * @param[in, out] flow
2298  *   Pointer to the rte_flow structure.
2299  * @param[in] flow_size
2300  *   Size in bytes of the available space in @p flow, if too small some
2301  *   garbage may be present.
2302  * @param[out] error
2303  *   Pointer to error structure.
2304  *
2305  * @return
2306  *   On success the number of bytes consumed/necessary, if the returned value
2307  *   is lesser or equal to @p flow_size, the @p actions has fully been
2308  *   converted, otherwise another call with this returned memory size should
2309  *   be done.
2310  *   On error, a negative errno value is returned and rte_errno is set.
2311  */
2312 static int
2313 mlx5_flow_actions(struct rte_eth_dev *dev,
2314                   const struct rte_flow_action actions[],
2315                   struct rte_flow *flow, const size_t flow_size,
2316                   struct rte_flow_error *error)
2317 {
2318         size_t size = 0;
2319         int remain = flow_size;
2320         int ret = 0;
2321
2322         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
2323                 switch (actions->type) {
2324                 case RTE_FLOW_ACTION_TYPE_VOID:
2325                         break;
2326                 case RTE_FLOW_ACTION_TYPE_FLAG:
2327                         ret = mlx5_flow_action_flag(actions, flow, remain,
2328                                                     error);
2329                         break;
2330                 case RTE_FLOW_ACTION_TYPE_MARK:
2331                         ret = mlx5_flow_action_mark(actions, flow, remain,
2332                                                     error);
2333                         break;
2334                 case RTE_FLOW_ACTION_TYPE_DROP:
2335                         ret = mlx5_flow_action_drop(actions, flow, remain,
2336                                                     error);
2337                         break;
2338                 case RTE_FLOW_ACTION_TYPE_QUEUE:
2339                         ret = mlx5_flow_action_queue(dev, actions, flow, error);
2340                         break;
2341                 case RTE_FLOW_ACTION_TYPE_RSS:
2342                         ret = mlx5_flow_action_rss(dev, actions, flow, error);
2343                         break;
2344                 case RTE_FLOW_ACTION_TYPE_COUNT:
2345                         ret = mlx5_flow_action_count(dev, actions, flow, remain,
2346                                                      error);
2347                         break;
2348                 default:
2349                         return rte_flow_error_set(error, ENOTSUP,
2350                                                   RTE_FLOW_ERROR_TYPE_ACTION,
2351                                                   actions,
2352                                                   "action not supported");
2353                 }
2354                 if (ret < 0)
2355                         return ret;
2356                 if (remain > ret)
2357                         remain -= ret;
2358                 else
2359                         remain = 0;
2360                 size += ret;
2361         }
2362         if (!flow->fate)
2363                 return rte_flow_error_set(error, ENOTSUP,
2364                                           RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2365                                           NULL,
2366                                           "no fate action found");
2367         return size;
2368 }
2369
2370 /**
2371  * Validate flow rule and fill flow structure accordingly.
2372  *
2373  * @param dev
2374  *   Pointer to Ethernet device.
2375  * @param[out] flow
2376  *   Pointer to flow structure.
2377  * @param flow_size
2378  *   Size of allocated space for @p flow.
2379  * @param[in] attr
2380  *   Flow rule attributes.
2381  * @param[in] pattern
2382  *   Pattern specification (list terminated by the END pattern item).
2383  * @param[in] actions
2384  *   Associated actions (list terminated by the END action).
2385  * @param[out] error
2386  *   Perform verbose error reporting if not NULL.
2387  *
2388  * @return
2389  *   A positive value representing the size of the flow object in bytes
2390  *   regardless of @p flow_size on success, a negative errno value otherwise
2391  *   and rte_errno is set.
2392  */
2393 static int
2394 mlx5_flow_merge_switch(struct rte_eth_dev *dev,
2395                        struct rte_flow *flow,
2396                        size_t flow_size,
2397                        const struct rte_flow_attr *attr,
2398                        const struct rte_flow_item pattern[],
2399                        const struct rte_flow_action actions[],
2400                        struct rte_flow_error *error)
2401 {
2402         unsigned int n = mlx5_dev_to_port_id(dev->device, NULL, 0);
2403         uint16_t port_id[!n + n];
2404         struct mlx5_nl_flow_ptoi ptoi[!n + n + 1];
2405         size_t off = RTE_ALIGN_CEIL(sizeof(*flow), alignof(max_align_t));
2406         unsigned int i;
2407         unsigned int own = 0;
2408         int ret;
2409
2410         /* At least one port is needed when no switch domain is present. */
2411         if (!n) {
2412                 n = 1;
2413                 port_id[0] = dev->data->port_id;
2414         } else {
2415                 n = RTE_MIN(mlx5_dev_to_port_id(dev->device, port_id, n), n);
2416         }
2417         for (i = 0; i != n; ++i) {
2418                 struct rte_eth_dev_info dev_info;
2419
2420                 rte_eth_dev_info_get(port_id[i], &dev_info);
2421                 if (port_id[i] == dev->data->port_id)
2422                         own = i;
2423                 ptoi[i].port_id = port_id[i];
2424                 ptoi[i].ifindex = dev_info.if_index;
2425         }
2426         /* Ensure first entry of ptoi[] is the current device. */
2427         if (own) {
2428                 ptoi[n] = ptoi[0];
2429                 ptoi[0] = ptoi[own];
2430                 ptoi[own] = ptoi[n];
2431         }
2432         /* An entry with zero ifindex terminates ptoi[]. */
2433         ptoi[n].port_id = 0;
2434         ptoi[n].ifindex = 0;
2435         if (flow_size < off)
2436                 flow_size = 0;
2437         ret = mlx5_nl_flow_transpose((uint8_t *)flow + off,
2438                                      flow_size ? flow_size - off : 0,
2439                                      ptoi, attr, pattern, actions, error);
2440         if (ret < 0)
2441                 return ret;
2442         if (flow_size) {
2443                 *flow = (struct rte_flow){
2444                         .attributes = *attr,
2445                         .nl_flow = (uint8_t *)flow + off,
2446                 };
2447                 /*
2448                  * Generate a reasonably unique handle based on the address
2449                  * of the target buffer.
2450                  *
2451                  * This is straightforward on 32-bit systems where the flow
2452                  * pointer can be used directly. Otherwise, its least
2453                  * significant part is taken after shifting it by the
2454                  * previous power of two of the pointed buffer size.
2455                  */
2456                 if (sizeof(flow) <= 4)
2457                         mlx5_nl_flow_brand(flow->nl_flow, (uintptr_t)flow);
2458                 else
2459                         mlx5_nl_flow_brand
2460                                 (flow->nl_flow,
2461                                  (uintptr_t)flow >>
2462                                  rte_log2_u32(rte_align32prevpow2(flow_size)));
2463         }
2464         return off + ret;
2465 }
2466
2467 /**
2468  * Convert the @p attributes, @p pattern, @p action, into an flow for the NIC
2469  * after ensuring the NIC will understand and process it correctly.
2470  * The conversion is only performed item/action per item/action, each of
2471  * them is written into the @p flow if its size is lesser or equal to @p
2472  * flow_size.
2473  * Validation and memory consumption computation are still performed until the
2474  * end, unless an error is encountered.
2475  *
2476  * @param[in] dev
2477  *   Pointer to Ethernet device.
2478  * @param[in, out] flow
2479  *   Pointer to flow structure.
2480  * @param[in] flow_size
2481  *   Size in bytes of the available space in @p flow, if too small some
2482  *   garbage may be present.
2483  * @param[in] attributes
2484  *   Flow rule attributes.
2485  * @param[in] pattern
2486  *   Pattern specification (list terminated by the END pattern item).
2487  * @param[in] actions
2488  *   Associated actions (list terminated by the END action).
2489  * @param[out] error
2490  *   Perform verbose error reporting if not NULL.
2491  *
2492  * @return
2493  *   On success the number of bytes consumed/necessary, if the returned value
2494  *   is lesser or equal to @p flow_size, the flow has fully been converted and
2495  *   can be applied, otherwise another call with this returned memory size
2496  *   should be done.
2497  *   On error, a negative errno value is returned and rte_errno is set.
2498  */
2499 static int
2500 mlx5_flow_merge(struct rte_eth_dev *dev, struct rte_flow *flow,
2501                 const size_t flow_size,
2502                 const struct rte_flow_attr *attributes,
2503                 const struct rte_flow_item pattern[],
2504                 const struct rte_flow_action actions[],
2505                 struct rte_flow_error *error)
2506 {
2507         struct rte_flow local_flow = { .layers = 0, };
2508         size_t size = sizeof(*flow);
2509         union {
2510                 struct rte_flow_expand_rss buf;
2511                 uint8_t buffer[2048];
2512         } expand_buffer;
2513         struct rte_flow_expand_rss *buf = &expand_buffer.buf;
2514         struct mlx5_flow_verbs *original_verbs = NULL;
2515         size_t original_verbs_size = 0;
2516         uint32_t original_layers = 0;
2517         int expanded_pattern_idx = 0;
2518         int ret;
2519         uint32_t i;
2520
2521         if (attributes->transfer)
2522                 return mlx5_flow_merge_switch(dev, flow, flow_size,
2523                                               attributes, pattern,
2524                                               actions, error);
2525         if (size > flow_size)
2526                 flow = &local_flow;
2527         ret = mlx5_flow_attributes(dev, attributes, flow, error);
2528         if (ret < 0)
2529                 return ret;
2530         ret = mlx5_flow_actions(dev, actions, &local_flow, 0, error);
2531         if (ret < 0)
2532                 return ret;
2533         if (local_flow.rss.types) {
2534                 ret = rte_flow_expand_rss(buf, sizeof(expand_buffer.buffer),
2535                                           pattern, local_flow.rss.types,
2536                                           mlx5_support_expansion,
2537                                           local_flow.rss.level < 2 ?
2538                                           MLX5_EXPANSION_ROOT :
2539                                           MLX5_EXPANSION_ROOT_OUTER);
2540                 assert(ret > 0 &&
2541                        (unsigned int)ret < sizeof(expand_buffer.buffer));
2542         } else {
2543                 buf->entries = 1;
2544                 buf->entry[0].pattern = (void *)(uintptr_t)pattern;
2545         }
2546         size += RTE_ALIGN_CEIL(local_flow.rss.queue_num * sizeof(uint16_t),
2547                                sizeof(void *));
2548         if (size <= flow_size)
2549                 flow->queue = (void *)(flow + 1);
2550         LIST_INIT(&flow->verbs);
2551         flow->layers = 0;
2552         flow->modifier = 0;
2553         flow->fate = 0;
2554         for (i = 0; i != buf->entries; ++i) {
2555                 size_t off = size;
2556                 size_t off2;
2557
2558                 flow->layers = original_layers;
2559                 size += sizeof(struct ibv_flow_attr) +
2560                         sizeof(struct mlx5_flow_verbs);
2561                 off2 = size;
2562                 if (size < flow_size) {
2563                         flow->cur_verbs = (void *)((uintptr_t)flow + off);
2564                         flow->cur_verbs->attr = (void *)(flow->cur_verbs + 1);
2565                         flow->cur_verbs->specs =
2566                                 (void *)(flow->cur_verbs->attr + 1);
2567                 }
2568                 /* First iteration convert the pattern into Verbs. */
2569                 if (i == 0) {
2570                         /* Actions don't need to be converted several time. */
2571                         ret = mlx5_flow_actions(dev, actions, flow,
2572                                                 (size < flow_size) ?
2573                                                 flow_size - size : 0,
2574                                                 error);
2575                         if (ret < 0)
2576                                 return ret;
2577                         size += ret;
2578                 } else {
2579                         /*
2580                          * Next iteration means the pattern has already been
2581                          * converted and an expansion is necessary to match
2582                          * the user RSS request.  For that only the expanded
2583                          * items will be converted, the common part with the
2584                          * user pattern are just copied into the next buffer
2585                          * zone.
2586                          */
2587                         size += original_verbs_size;
2588                         if (size < flow_size) {
2589                                 rte_memcpy(flow->cur_verbs->attr,
2590                                            original_verbs->attr,
2591                                            original_verbs_size +
2592                                            sizeof(struct ibv_flow_attr));
2593                                 flow->cur_verbs->size = original_verbs_size;
2594                         }
2595                 }
2596                 ret = mlx5_flow_items
2597                         (dev,
2598                          (const struct rte_flow_item *)
2599                          &buf->entry[i].pattern[expanded_pattern_idx],
2600                          flow,
2601                          (size < flow_size) ? flow_size - size : 0, error);
2602                 if (ret < 0)
2603                         return ret;
2604                 size += ret;
2605                 if (size <= flow_size) {
2606                         mlx5_flow_adjust_priority(dev, flow);
2607                         LIST_INSERT_HEAD(&flow->verbs, flow->cur_verbs, next);
2608                 }
2609                 /*
2610                  * Keep a pointer of the first verbs conversion and the layers
2611                  * it has encountered.
2612                  */
2613                 if (i == 0) {
2614                         original_verbs = flow->cur_verbs;
2615                         original_verbs_size = size - off2;
2616                         original_layers = flow->layers;
2617                         /*
2618                          * move the index of the expanded pattern to the
2619                          * first item not addressed yet.
2620                          */
2621                         if (pattern->type == RTE_FLOW_ITEM_TYPE_END) {
2622                                 expanded_pattern_idx++;
2623                         } else {
2624                                 const struct rte_flow_item *item = pattern;
2625
2626                                 for (item = pattern;
2627                                      item->type != RTE_FLOW_ITEM_TYPE_END;
2628                                      ++item)
2629                                         expanded_pattern_idx++;
2630                         }
2631                 }
2632         }
2633         /* Restore the origin layers in the flow. */
2634         flow->layers = original_layers;
2635         return size;
2636 }
2637
2638 /**
2639  * Lookup and set the ptype in the data Rx part.  A single Ptype can be used,
2640  * if several tunnel rules are used on this queue, the tunnel ptype will be
2641  * cleared.
2642  *
2643  * @param rxq_ctrl
2644  *   Rx queue to update.
2645  */
2646 static void
2647 mlx5_flow_rxq_tunnel_ptype_update(struct mlx5_rxq_ctrl *rxq_ctrl)
2648 {
2649         unsigned int i;
2650         uint32_t tunnel_ptype = 0;
2651
2652         /* Look up for the ptype to use. */
2653         for (i = 0; i != MLX5_FLOW_TUNNEL; ++i) {
2654                 if (!rxq_ctrl->flow_tunnels_n[i])
2655                         continue;
2656                 if (!tunnel_ptype) {
2657                         tunnel_ptype = tunnels_info[i].ptype;
2658                 } else {
2659                         tunnel_ptype = 0;
2660                         break;
2661                 }
2662         }
2663         rxq_ctrl->rxq.tunnel = tunnel_ptype;
2664 }
2665
2666 /**
2667  * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) according to the flow.
2668  *
2669  * @param[in] dev
2670  *   Pointer to Ethernet device.
2671  * @param[in] flow
2672  *   Pointer to flow structure.
2673  */
2674 static void
2675 mlx5_flow_rxq_flags_set(struct rte_eth_dev *dev, struct rte_flow *flow)
2676 {
2677         struct priv *priv = dev->data->dev_private;
2678         const int mark = !!(flow->modifier &
2679                             (MLX5_FLOW_MOD_FLAG | MLX5_FLOW_MOD_MARK));
2680         const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
2681         unsigned int i;
2682
2683         for (i = 0; i != flow->rss.queue_num; ++i) {
2684                 int idx = (*flow->queue)[i];
2685                 struct mlx5_rxq_ctrl *rxq_ctrl =
2686                         container_of((*priv->rxqs)[idx],
2687                                      struct mlx5_rxq_ctrl, rxq);
2688
2689                 if (mark) {
2690                         rxq_ctrl->rxq.mark = 1;
2691                         rxq_ctrl->flow_mark_n++;
2692                 }
2693                 if (tunnel) {
2694                         unsigned int j;
2695
2696                         /* Increase the counter matching the flow. */
2697                         for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) {
2698                                 if ((tunnels_info[j].tunnel & flow->layers) ==
2699                                     tunnels_info[j].tunnel) {
2700                                         rxq_ctrl->flow_tunnels_n[j]++;
2701                                         break;
2702                                 }
2703                         }
2704                         mlx5_flow_rxq_tunnel_ptype_update(rxq_ctrl);
2705                 }
2706         }
2707 }
2708
2709 /**
2710  * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the
2711  * @p flow if no other flow uses it with the same kind of request.
2712  *
2713  * @param dev
2714  *   Pointer to Ethernet device.
2715  * @param[in] flow
2716  *   Pointer to the flow.
2717  */
2718 static void
2719 mlx5_flow_rxq_flags_trim(struct rte_eth_dev *dev, struct rte_flow *flow)
2720 {
2721         struct priv *priv = dev->data->dev_private;
2722         const int mark = !!(flow->modifier &
2723                             (MLX5_FLOW_MOD_FLAG | MLX5_FLOW_MOD_MARK));
2724         const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
2725         unsigned int i;
2726
2727         assert(dev->data->dev_started);
2728         for (i = 0; i != flow->rss.queue_num; ++i) {
2729                 int idx = (*flow->queue)[i];
2730                 struct mlx5_rxq_ctrl *rxq_ctrl =
2731                         container_of((*priv->rxqs)[idx],
2732                                      struct mlx5_rxq_ctrl, rxq);
2733
2734                 if (mark) {
2735                         rxq_ctrl->flow_mark_n--;
2736                         rxq_ctrl->rxq.mark = !!rxq_ctrl->flow_mark_n;
2737                 }
2738                 if (tunnel) {
2739                         unsigned int j;
2740
2741                         /* Decrease the counter matching the flow. */
2742                         for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) {
2743                                 if ((tunnels_info[j].tunnel & flow->layers) ==
2744                                     tunnels_info[j].tunnel) {
2745                                         rxq_ctrl->flow_tunnels_n[j]--;
2746                                         break;
2747                                 }
2748                         }
2749                         mlx5_flow_rxq_tunnel_ptype_update(rxq_ctrl);
2750                 }
2751         }
2752 }
2753
2754 /**
2755  * Clear the Mark/Flag and Tunnel ptype information in all Rx queues.
2756  *
2757  * @param dev
2758  *   Pointer to Ethernet device.
2759  */
2760 static void
2761 mlx5_flow_rxq_flags_clear(struct rte_eth_dev *dev)
2762 {
2763         struct priv *priv = dev->data->dev_private;
2764         unsigned int i;
2765
2766         for (i = 0; i != priv->rxqs_n; ++i) {
2767                 struct mlx5_rxq_ctrl *rxq_ctrl;
2768                 unsigned int j;
2769
2770                 if (!(*priv->rxqs)[i])
2771                         continue;
2772                 rxq_ctrl = container_of((*priv->rxqs)[i],
2773                                         struct mlx5_rxq_ctrl, rxq);
2774                 rxq_ctrl->flow_mark_n = 0;
2775                 rxq_ctrl->rxq.mark = 0;
2776                 for (j = 0; j != MLX5_FLOW_TUNNEL; ++j)
2777                         rxq_ctrl->flow_tunnels_n[j] = 0;
2778                 rxq_ctrl->rxq.tunnel = 0;
2779         }
2780 }
2781
2782 /**
2783  * Validate a flow supported by the NIC.
2784  *
2785  * @see rte_flow_validate()
2786  * @see rte_flow_ops
2787  */
2788 int
2789 mlx5_flow_validate(struct rte_eth_dev *dev,
2790                    const struct rte_flow_attr *attr,
2791                    const struct rte_flow_item items[],
2792                    const struct rte_flow_action actions[],
2793                    struct rte_flow_error *error)
2794 {
2795         int ret = mlx5_flow_merge(dev, NULL, 0, attr, items, actions, error);
2796
2797         if (ret < 0)
2798                 return ret;
2799         return 0;
2800 }
2801
2802 /**
2803  * Remove the flow.
2804  *
2805  * @param[in] dev
2806  *   Pointer to Ethernet device.
2807  * @param[in, out] flow
2808  *   Pointer to flow structure.
2809  */
2810 static void
2811 mlx5_flow_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
2812 {
2813         struct priv *priv = dev->data->dev_private;
2814         struct mlx5_flow_verbs *verbs;
2815
2816         if (flow->nl_flow && priv->mnl_socket)
2817                 mlx5_nl_flow_destroy(priv->mnl_socket, flow->nl_flow, NULL);
2818         LIST_FOREACH(verbs, &flow->verbs, next) {
2819                 if (verbs->flow) {
2820                         claim_zero(mlx5_glue->destroy_flow(verbs->flow));
2821                         verbs->flow = NULL;
2822                 }
2823                 if (verbs->hrxq) {
2824                         if (flow->fate & MLX5_FLOW_FATE_DROP)
2825                                 mlx5_hrxq_drop_release(dev);
2826                         else
2827                                 mlx5_hrxq_release(dev, verbs->hrxq);
2828                         verbs->hrxq = NULL;
2829                 }
2830         }
2831         if (flow->counter) {
2832                 mlx5_flow_counter_release(flow->counter);
2833                 flow->counter = NULL;
2834         }
2835 }
2836
2837 /**
2838  * Apply the flow.
2839  *
2840  * @param[in] dev
2841  *   Pointer to Ethernet device structure.
2842  * @param[in, out] flow
2843  *   Pointer to flow structure.
2844  * @param[out] error
2845  *   Pointer to error structure.
2846  *
2847  * @return
2848  *   0 on success, a negative errno value otherwise and rte_errno is set.
2849  */
2850 static int
2851 mlx5_flow_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
2852                 struct rte_flow_error *error)
2853 {
2854         struct priv *priv = dev->data->dev_private;
2855         struct mlx5_flow_verbs *verbs;
2856         int err;
2857
2858         LIST_FOREACH(verbs, &flow->verbs, next) {
2859                 if (flow->fate & MLX5_FLOW_FATE_DROP) {
2860                         verbs->hrxq = mlx5_hrxq_drop_new(dev);
2861                         if (!verbs->hrxq) {
2862                                 rte_flow_error_set
2863                                         (error, errno,
2864                                          RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2865                                          NULL,
2866                                          "cannot get drop hash queue");
2867                                 goto error;
2868                         }
2869                 } else {
2870                         struct mlx5_hrxq *hrxq;
2871
2872                         hrxq = mlx5_hrxq_get(dev, flow->key,
2873                                              MLX5_RSS_HASH_KEY_LEN,
2874                                              verbs->hash_fields,
2875                                              (*flow->queue),
2876                                              flow->rss.queue_num);
2877                         if (!hrxq)
2878                                 hrxq = mlx5_hrxq_new(dev, flow->key,
2879                                                      MLX5_RSS_HASH_KEY_LEN,
2880                                                      verbs->hash_fields,
2881                                                      (*flow->queue),
2882                                                      flow->rss.queue_num);
2883                         if (!hrxq) {
2884                                 rte_flow_error_set
2885                                         (error, rte_errno,
2886                                          RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2887                                          NULL,
2888                                          "cannot get hash queue");
2889                                 goto error;
2890                         }
2891                         verbs->hrxq = hrxq;
2892                 }
2893                 verbs->flow =
2894                         mlx5_glue->create_flow(verbs->hrxq->qp, verbs->attr);
2895                 if (!verbs->flow) {
2896                         rte_flow_error_set(error, errno,
2897                                            RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2898                                            NULL,
2899                                            "hardware refuses to create flow");
2900                         goto error;
2901                 }
2902         }
2903         if (flow->nl_flow &&
2904             priv->mnl_socket &&
2905             mlx5_nl_flow_create(priv->mnl_socket, flow->nl_flow, error))
2906                 goto error;
2907         return 0;
2908 error:
2909         err = rte_errno; /* Save rte_errno before cleanup. */
2910         LIST_FOREACH(verbs, &flow->verbs, next) {
2911                 if (verbs->hrxq) {
2912                         if (flow->fate & MLX5_FLOW_FATE_DROP)
2913                                 mlx5_hrxq_drop_release(dev);
2914                         else
2915                                 mlx5_hrxq_release(dev, verbs->hrxq);
2916                         verbs->hrxq = NULL;
2917                 }
2918         }
2919         rte_errno = err; /* Restore rte_errno. */
2920         return -rte_errno;
2921 }
2922
2923 /**
2924  * Create a flow and add it to @p list.
2925  *
2926  * @param dev
2927  *   Pointer to Ethernet device.
2928  * @param list
2929  *   Pointer to a TAILQ flow list.
2930  * @param[in] attr
2931  *   Flow rule attributes.
2932  * @param[in] items
2933  *   Pattern specification (list terminated by the END pattern item).
2934  * @param[in] actions
2935  *   Associated actions (list terminated by the END action).
2936  * @param[out] error
2937  *   Perform verbose error reporting if not NULL.
2938  *
2939  * @return
2940  *   A flow on success, NULL otherwise and rte_errno is set.
2941  */
2942 static struct rte_flow *
2943 mlx5_flow_list_create(struct rte_eth_dev *dev,
2944                       struct mlx5_flows *list,
2945                       const struct rte_flow_attr *attr,
2946                       const struct rte_flow_item items[],
2947                       const struct rte_flow_action actions[],
2948                       struct rte_flow_error *error)
2949 {
2950         struct rte_flow *flow = NULL;
2951         size_t size = 0;
2952         int ret;
2953
2954         ret = mlx5_flow_merge(dev, flow, size, attr, items, actions, error);
2955         if (ret < 0)
2956                 return NULL;
2957         size = ret;
2958         flow = rte_calloc(__func__, 1, size, 0);
2959         if (!flow) {
2960                 rte_flow_error_set(error, ENOMEM,
2961                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2962                                    NULL,
2963                                    "not enough memory to create flow");
2964                 return NULL;
2965         }
2966         ret = mlx5_flow_merge(dev, flow, size, attr, items, actions, error);
2967         if (ret < 0) {
2968                 rte_free(flow);
2969                 return NULL;
2970         }
2971         assert((size_t)ret == size);
2972         if (dev->data->dev_started) {
2973                 ret = mlx5_flow_apply(dev, flow, error);
2974                 if (ret < 0) {
2975                         ret = rte_errno; /* Save rte_errno before cleanup. */
2976                         if (flow) {
2977                                 mlx5_flow_remove(dev, flow);
2978                                 rte_free(flow);
2979                         }
2980                         rte_errno = ret; /* Restore rte_errno. */
2981                         return NULL;
2982                 }
2983         }
2984         TAILQ_INSERT_TAIL(list, flow, next);
2985         mlx5_flow_rxq_flags_set(dev, flow);
2986         return flow;
2987 }
2988
2989 /**
2990  * Create a flow.
2991  *
2992  * @see rte_flow_create()
2993  * @see rte_flow_ops
2994  */
2995 struct rte_flow *
2996 mlx5_flow_create(struct rte_eth_dev *dev,
2997                  const struct rte_flow_attr *attr,
2998                  const struct rte_flow_item items[],
2999                  const struct rte_flow_action actions[],
3000                  struct rte_flow_error *error)
3001 {
3002         return mlx5_flow_list_create
3003                 (dev, &((struct priv *)dev->data->dev_private)->flows,
3004                  attr, items, actions, error);
3005 }
3006
3007 /**
3008  * Destroy a flow in a list.
3009  *
3010  * @param dev
3011  *   Pointer to Ethernet device.
3012  * @param list
3013  *   Pointer to a TAILQ flow list.
3014  * @param[in] flow
3015  *   Flow to destroy.
3016  */
3017 static void
3018 mlx5_flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list,
3019                        struct rte_flow *flow)
3020 {
3021         mlx5_flow_remove(dev, flow);
3022         TAILQ_REMOVE(list, flow, next);
3023         /*
3024          * Update RX queue flags only if port is started, otherwise it is
3025          * already clean.
3026          */
3027         if (dev->data->dev_started)
3028                 mlx5_flow_rxq_flags_trim(dev, flow);
3029         rte_free(flow);
3030 }
3031
3032 /**
3033  * Destroy all flows.
3034  *
3035  * @param dev
3036  *   Pointer to Ethernet device.
3037  * @param list
3038  *   Pointer to a TAILQ flow list.
3039  */
3040 void
3041 mlx5_flow_list_flush(struct rte_eth_dev *dev, struct mlx5_flows *list)
3042 {
3043         while (!TAILQ_EMPTY(list)) {
3044                 struct rte_flow *flow;
3045
3046                 flow = TAILQ_FIRST(list);
3047                 mlx5_flow_list_destroy(dev, list, flow);
3048         }
3049 }
3050
3051 /**
3052  * Remove all flows.
3053  *
3054  * @param dev
3055  *   Pointer to Ethernet device.
3056  * @param list
3057  *   Pointer to a TAILQ flow list.
3058  */
3059 void
3060 mlx5_flow_stop(struct rte_eth_dev *dev, struct mlx5_flows *list)
3061 {
3062         struct rte_flow *flow;
3063
3064         TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next)
3065                 mlx5_flow_remove(dev, flow);
3066         mlx5_flow_rxq_flags_clear(dev);
3067 }
3068
3069 /**
3070  * Add all flows.
3071  *
3072  * @param dev
3073  *   Pointer to Ethernet device.
3074  * @param list
3075  *   Pointer to a TAILQ flow list.
3076  *
3077  * @return
3078  *   0 on success, a negative errno value otherwise and rte_errno is set.
3079  */
3080 int
3081 mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list)
3082 {
3083         struct rte_flow *flow;
3084         struct rte_flow_error error;
3085         int ret = 0;
3086
3087         TAILQ_FOREACH(flow, list, next) {
3088                 ret = mlx5_flow_apply(dev, flow, &error);
3089                 if (ret < 0)
3090                         goto error;
3091                 mlx5_flow_rxq_flags_set(dev, flow);
3092         }
3093         return 0;
3094 error:
3095         ret = rte_errno; /* Save rte_errno before cleanup. */
3096         mlx5_flow_stop(dev, list);
3097         rte_errno = ret; /* Restore rte_errno. */
3098         return -rte_errno;
3099 }
3100
3101 /**
3102  * Verify the flow list is empty
3103  *
3104  * @param dev
3105  *  Pointer to Ethernet device.
3106  *
3107  * @return the number of flows not released.
3108  */
3109 int
3110 mlx5_flow_verify(struct rte_eth_dev *dev)
3111 {
3112         struct priv *priv = dev->data->dev_private;
3113         struct rte_flow *flow;
3114         int ret = 0;
3115
3116         TAILQ_FOREACH(flow, &priv->flows, next) {
3117                 DRV_LOG(DEBUG, "port %u flow %p still referenced",
3118                         dev->data->port_id, (void *)flow);
3119                 ++ret;
3120         }
3121         return ret;
3122 }
3123
3124 /**
3125  * Enable a control flow configured from the control plane.
3126  *
3127  * @param dev
3128  *   Pointer to Ethernet device.
3129  * @param eth_spec
3130  *   An Ethernet flow spec to apply.
3131  * @param eth_mask
3132  *   An Ethernet flow mask to apply.
3133  * @param vlan_spec
3134  *   A VLAN flow spec to apply.
3135  * @param vlan_mask
3136  *   A VLAN flow mask to apply.
3137  *
3138  * @return
3139  *   0 on success, a negative errno value otherwise and rte_errno is set.
3140  */
3141 int
3142 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
3143                     struct rte_flow_item_eth *eth_spec,
3144                     struct rte_flow_item_eth *eth_mask,
3145                     struct rte_flow_item_vlan *vlan_spec,
3146                     struct rte_flow_item_vlan *vlan_mask)
3147 {
3148         struct priv *priv = dev->data->dev_private;
3149         const struct rte_flow_attr attr = {
3150                 .ingress = 1,
3151                 .priority = MLX5_FLOW_PRIO_RSVD,
3152         };
3153         struct rte_flow_item items[] = {
3154                 {
3155                         .type = RTE_FLOW_ITEM_TYPE_ETH,
3156                         .spec = eth_spec,
3157                         .last = NULL,
3158                         .mask = eth_mask,
3159                 },
3160                 {
3161                         .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
3162                                 RTE_FLOW_ITEM_TYPE_END,
3163                         .spec = vlan_spec,
3164                         .last = NULL,
3165                         .mask = vlan_mask,
3166                 },
3167                 {
3168                         .type = RTE_FLOW_ITEM_TYPE_END,
3169                 },
3170         };
3171         uint16_t queue[priv->reta_idx_n];
3172         struct rte_flow_action_rss action_rss = {
3173                 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
3174                 .level = 0,
3175                 .types = priv->rss_conf.rss_hf,
3176                 .key_len = priv->rss_conf.rss_key_len,
3177                 .queue_num = priv->reta_idx_n,
3178                 .key = priv->rss_conf.rss_key,
3179                 .queue = queue,
3180         };
3181         struct rte_flow_action actions[] = {
3182                 {
3183                         .type = RTE_FLOW_ACTION_TYPE_RSS,
3184                         .conf = &action_rss,
3185                 },
3186                 {
3187                         .type = RTE_FLOW_ACTION_TYPE_END,
3188                 },
3189         };
3190         struct rte_flow *flow;
3191         struct rte_flow_error error;
3192         unsigned int i;
3193
3194         if (!priv->reta_idx_n) {
3195                 rte_errno = EINVAL;
3196                 return -rte_errno;
3197         }
3198         for (i = 0; i != priv->reta_idx_n; ++i)
3199                 queue[i] = (*priv->reta_idx)[i];
3200         flow = mlx5_flow_list_create(dev, &priv->ctrl_flows, &attr, items,
3201                                      actions, &error);
3202         if (!flow)
3203                 return -rte_errno;
3204         return 0;
3205 }
3206
3207 /**
3208  * Enable a flow control configured from the control plane.
3209  *
3210  * @param dev
3211  *   Pointer to Ethernet device.
3212  * @param eth_spec
3213  *   An Ethernet flow spec to apply.
3214  * @param eth_mask
3215  *   An Ethernet flow mask to apply.
3216  *
3217  * @return
3218  *   0 on success, a negative errno value otherwise and rte_errno is set.
3219  */
3220 int
3221 mlx5_ctrl_flow(struct rte_eth_dev *dev,
3222                struct rte_flow_item_eth *eth_spec,
3223                struct rte_flow_item_eth *eth_mask)
3224 {
3225         return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
3226 }
3227
3228 /**
3229  * Destroy a flow.
3230  *
3231  * @see rte_flow_destroy()
3232  * @see rte_flow_ops
3233  */
3234 int
3235 mlx5_flow_destroy(struct rte_eth_dev *dev,
3236                   struct rte_flow *flow,
3237                   struct rte_flow_error *error __rte_unused)
3238 {
3239         struct priv *priv = dev->data->dev_private;
3240
3241         mlx5_flow_list_destroy(dev, &priv->flows, flow);
3242         return 0;
3243 }
3244
3245 /**
3246  * Destroy all flows.
3247  *
3248  * @see rte_flow_flush()
3249  * @see rte_flow_ops
3250  */
3251 int
3252 mlx5_flow_flush(struct rte_eth_dev *dev,
3253                 struct rte_flow_error *error __rte_unused)
3254 {
3255         struct priv *priv = dev->data->dev_private;
3256
3257         mlx5_flow_list_flush(dev, &priv->flows);
3258         return 0;
3259 }
3260
3261 /**
3262  * Isolated mode.
3263  *
3264  * @see rte_flow_isolate()
3265  * @see rte_flow_ops
3266  */
3267 int
3268 mlx5_flow_isolate(struct rte_eth_dev *dev,
3269                   int enable,
3270                   struct rte_flow_error *error)
3271 {
3272         struct priv *priv = dev->data->dev_private;
3273
3274         if (dev->data->dev_started) {
3275                 rte_flow_error_set(error, EBUSY,
3276                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
3277                                    NULL,
3278                                    "port must be stopped first");
3279                 return -rte_errno;
3280         }
3281         priv->isolated = !!enable;
3282         if (enable)
3283                 dev->dev_ops = &mlx5_dev_ops_isolate;
3284         else
3285                 dev->dev_ops = &mlx5_dev_ops;
3286         return 0;
3287 }
3288
3289 /**
3290  * Query flow counter.
3291  *
3292  * @param flow
3293  *   Pointer to the flow.
3294  *
3295  * @return
3296  *   0 on success, a negative errno value otherwise and rte_errno is set.
3297  */
3298 static int
3299 mlx5_flow_query_count(struct rte_flow *flow __rte_unused,
3300                       void *data __rte_unused,
3301                       struct rte_flow_error *error)
3302 {
3303 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
3304         if (flow->modifier & MLX5_FLOW_MOD_COUNT) {
3305                 struct rte_flow_query_count *qc = data;
3306                 uint64_t counters[2] = {0, 0};
3307                 struct ibv_query_counter_set_attr query_cs_attr = {
3308                         .cs = flow->counter->cs,
3309                         .query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
3310                 };
3311                 struct ibv_counter_set_data query_out = {
3312                         .out = counters,
3313                         .outlen = 2 * sizeof(uint64_t),
3314                 };
3315                 int err = mlx5_glue->query_counter_set(&query_cs_attr,
3316                                                        &query_out);
3317
3318                 if (err)
3319                         return rte_flow_error_set
3320                                 (error, err,
3321                                  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
3322                                  NULL,
3323                                  "cannot read counter");
3324                 qc->hits_set = 1;
3325                 qc->bytes_set = 1;
3326                 qc->hits = counters[0] - flow->counter->hits;
3327                 qc->bytes = counters[1] - flow->counter->bytes;
3328                 if (qc->reset) {
3329                         flow->counter->hits = counters[0];
3330                         flow->counter->bytes = counters[1];
3331                 }
3332                 return 0;
3333         }
3334         return rte_flow_error_set(error, ENOTSUP,
3335                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
3336                                   NULL,
3337                                   "flow does not have counter");
3338 #endif
3339         return rte_flow_error_set(error, ENOTSUP,
3340                                   RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
3341                                   NULL,
3342                                   "counters are not available");
3343 }
3344
3345 /**
3346  * Query a flows.
3347  *
3348  * @see rte_flow_query()
3349  * @see rte_flow_ops
3350  */
3351 int
3352 mlx5_flow_query(struct rte_eth_dev *dev __rte_unused,
3353                 struct rte_flow *flow,
3354                 const struct rte_flow_action *actions,
3355                 void *data,
3356                 struct rte_flow_error *error)
3357 {
3358         int ret = 0;
3359
3360         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
3361                 switch (actions->type) {
3362                 case RTE_FLOW_ACTION_TYPE_VOID:
3363                         break;
3364                 case RTE_FLOW_ACTION_TYPE_COUNT:
3365                         ret = mlx5_flow_query_count(flow, data, error);
3366                         break;
3367                 default:
3368                         return rte_flow_error_set(error, ENOTSUP,
3369                                                   RTE_FLOW_ERROR_TYPE_ACTION,
3370                                                   actions,
3371                                                   "action not supported");
3372                 }
3373                 if (ret < 0)
3374                         return ret;
3375         }
3376         return 0;
3377 }
3378
3379 /**
3380  * Convert a flow director filter to a generic flow.
3381  *
3382  * @param dev
3383  *   Pointer to Ethernet device.
3384  * @param fdir_filter
3385  *   Flow director filter to add.
3386  * @param attributes
3387  *   Generic flow parameters structure.
3388  *
3389  * @return
3390  *   0 on success, a negative errno value otherwise and rte_errno is set.
3391  */
3392 static int
3393 mlx5_fdir_filter_convert(struct rte_eth_dev *dev,
3394                          const struct rte_eth_fdir_filter *fdir_filter,
3395                          struct mlx5_fdir *attributes)
3396 {
3397         struct priv *priv = dev->data->dev_private;
3398         const struct rte_eth_fdir_input *input = &fdir_filter->input;
3399         const struct rte_eth_fdir_masks *mask =
3400                 &dev->data->dev_conf.fdir_conf.mask;
3401
3402         /* Validate queue number. */
3403         if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
3404                 DRV_LOG(ERR, "port %u invalid queue number %d",
3405                         dev->data->port_id, fdir_filter->action.rx_queue);
3406                 rte_errno = EINVAL;
3407                 return -rte_errno;
3408         }
3409         attributes->attr.ingress = 1;
3410         attributes->items[0] = (struct rte_flow_item) {
3411                 .type = RTE_FLOW_ITEM_TYPE_ETH,
3412                 .spec = &attributes->l2,
3413                 .mask = &attributes->l2_mask,
3414         };
3415         switch (fdir_filter->action.behavior) {
3416         case RTE_ETH_FDIR_ACCEPT:
3417                 attributes->actions[0] = (struct rte_flow_action){
3418                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
3419                         .conf = &attributes->queue,
3420                 };
3421                 break;
3422         case RTE_ETH_FDIR_REJECT:
3423                 attributes->actions[0] = (struct rte_flow_action){
3424                         .type = RTE_FLOW_ACTION_TYPE_DROP,
3425                 };
3426                 break;
3427         default:
3428                 DRV_LOG(ERR, "port %u invalid behavior %d",
3429                         dev->data->port_id,
3430                         fdir_filter->action.behavior);
3431                 rte_errno = ENOTSUP;
3432                 return -rte_errno;
3433         }
3434         attributes->queue.index = fdir_filter->action.rx_queue;
3435         /* Handle L3. */
3436         switch (fdir_filter->input.flow_type) {
3437         case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
3438         case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
3439         case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
3440                 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
3441                         .src_addr = input->flow.ip4_flow.src_ip,
3442                         .dst_addr = input->flow.ip4_flow.dst_ip,
3443                         .time_to_live = input->flow.ip4_flow.ttl,
3444                         .type_of_service = input->flow.ip4_flow.tos,
3445                         .next_proto_id = input->flow.ip4_flow.proto,
3446                 };
3447                 attributes->l3_mask.ipv4.hdr = (struct ipv4_hdr){
3448                         .src_addr = mask->ipv4_mask.src_ip,
3449                         .dst_addr = mask->ipv4_mask.dst_ip,
3450                         .time_to_live = mask->ipv4_mask.ttl,
3451                         .type_of_service = mask->ipv4_mask.tos,
3452                         .next_proto_id = mask->ipv4_mask.proto,
3453                 };
3454                 attributes->items[1] = (struct rte_flow_item){
3455                         .type = RTE_FLOW_ITEM_TYPE_IPV4,
3456                         .spec = &attributes->l3,
3457                         .mask = &attributes->l3_mask,
3458                 };
3459                 break;
3460         case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
3461         case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
3462         case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
3463                 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
3464                         .hop_limits = input->flow.ipv6_flow.hop_limits,
3465                         .proto = input->flow.ipv6_flow.proto,
3466                 };
3467
3468                 memcpy(attributes->l3.ipv6.hdr.src_addr,
3469                        input->flow.ipv6_flow.src_ip,
3470                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
3471                 memcpy(attributes->l3.ipv6.hdr.dst_addr,
3472                        input->flow.ipv6_flow.dst_ip,
3473                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
3474                 memcpy(attributes->l3_mask.ipv6.hdr.src_addr,
3475                        mask->ipv6_mask.src_ip,
3476                        RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
3477                 memcpy(attributes->l3_mask.ipv6.hdr.dst_addr,
3478                        mask->ipv6_mask.dst_ip,
3479                        RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
3480                 attributes->items[1] = (struct rte_flow_item){
3481                         .type = RTE_FLOW_ITEM_TYPE_IPV6,
3482                         .spec = &attributes->l3,
3483                         .mask = &attributes->l3_mask,
3484                 };
3485                 break;
3486         default:
3487                 DRV_LOG(ERR, "port %u invalid flow type%d",
3488                         dev->data->port_id, fdir_filter->input.flow_type);
3489                 rte_errno = ENOTSUP;
3490                 return -rte_errno;
3491         }
3492         /* Handle L4. */
3493         switch (fdir_filter->input.flow_type) {
3494         case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
3495                 attributes->l4.udp.hdr = (struct udp_hdr){
3496                         .src_port = input->flow.udp4_flow.src_port,
3497                         .dst_port = input->flow.udp4_flow.dst_port,
3498                 };
3499                 attributes->l4_mask.udp.hdr = (struct udp_hdr){
3500                         .src_port = mask->src_port_mask,
3501                         .dst_port = mask->dst_port_mask,
3502                 };
3503                 attributes->items[2] = (struct rte_flow_item){
3504                         .type = RTE_FLOW_ITEM_TYPE_UDP,
3505                         .spec = &attributes->l4,
3506                         .mask = &attributes->l4_mask,
3507                 };
3508                 break;
3509         case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
3510                 attributes->l4.tcp.hdr = (struct tcp_hdr){
3511                         .src_port = input->flow.tcp4_flow.src_port,
3512                         .dst_port = input->flow.tcp4_flow.dst_port,
3513                 };
3514                 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
3515                         .src_port = mask->src_port_mask,
3516                         .dst_port = mask->dst_port_mask,
3517                 };
3518                 attributes->items[2] = (struct rte_flow_item){
3519                         .type = RTE_FLOW_ITEM_TYPE_TCP,
3520                         .spec = &attributes->l4,
3521                         .mask = &attributes->l4_mask,
3522                 };
3523                 break;
3524         case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
3525                 attributes->l4.udp.hdr = (struct udp_hdr){
3526                         .src_port = input->flow.udp6_flow.src_port,
3527                         .dst_port = input->flow.udp6_flow.dst_port,
3528                 };
3529                 attributes->l4_mask.udp.hdr = (struct udp_hdr){
3530                         .src_port = mask->src_port_mask,
3531                         .dst_port = mask->dst_port_mask,
3532                 };
3533                 attributes->items[2] = (struct rte_flow_item){
3534                         .type = RTE_FLOW_ITEM_TYPE_UDP,
3535                         .spec = &attributes->l4,
3536                         .mask = &attributes->l4_mask,
3537                 };
3538                 break;
3539         case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
3540                 attributes->l4.tcp.hdr = (struct tcp_hdr){
3541                         .src_port = input->flow.tcp6_flow.src_port,
3542                         .dst_port = input->flow.tcp6_flow.dst_port,
3543                 };
3544                 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
3545                         .src_port = mask->src_port_mask,
3546                         .dst_port = mask->dst_port_mask,
3547                 };
3548                 attributes->items[2] = (struct rte_flow_item){
3549                         .type = RTE_FLOW_ITEM_TYPE_TCP,
3550                         .spec = &attributes->l4,
3551                         .mask = &attributes->l4_mask,
3552                 };
3553                 break;
3554         case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
3555         case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
3556                 break;
3557         default:
3558                 DRV_LOG(ERR, "port %u invalid flow type%d",
3559                         dev->data->port_id, fdir_filter->input.flow_type);
3560                 rte_errno = ENOTSUP;
3561                 return -rte_errno;
3562         }
3563         return 0;
3564 }
3565
3566 /**
3567  * Add new flow director filter and store it in list.
3568  *
3569  * @param dev
3570  *   Pointer to Ethernet device.
3571  * @param fdir_filter
3572  *   Flow director filter to add.
3573  *
3574  * @return
3575  *   0 on success, a negative errno value otherwise and rte_errno is set.
3576  */
3577 static int
3578 mlx5_fdir_filter_add(struct rte_eth_dev *dev,
3579                      const struct rte_eth_fdir_filter *fdir_filter)
3580 {
3581         struct priv *priv = dev->data->dev_private;
3582         struct mlx5_fdir attributes = {
3583                 .attr.group = 0,
3584                 .l2_mask = {
3585                         .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
3586                         .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
3587                         .type = 0,
3588                 },
3589         };
3590         struct rte_flow_error error;
3591         struct rte_flow *flow;
3592         int ret;
3593
3594         ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
3595         if (ret)
3596                 return ret;
3597         flow = mlx5_flow_list_create(dev, &priv->flows, &attributes.attr,
3598                                      attributes.items, attributes.actions,
3599                                      &error);
3600         if (flow) {
3601                 DRV_LOG(DEBUG, "port %u FDIR created %p", dev->data->port_id,
3602                         (void *)flow);
3603                 return 0;
3604         }
3605         return -rte_errno;
3606 }
3607
3608 /**
3609  * Delete specific filter.
3610  *
3611  * @param dev
3612  *   Pointer to Ethernet device.
3613  * @param fdir_filter
3614  *   Filter to be deleted.
3615  *
3616  * @return
3617  *   0 on success, a negative errno value otherwise and rte_errno is set.
3618  */
3619 static int
3620 mlx5_fdir_filter_delete(struct rte_eth_dev *dev __rte_unused,
3621                         const struct rte_eth_fdir_filter *fdir_filter
3622                         __rte_unused)
3623 {
3624         rte_errno = ENOTSUP;
3625         return -rte_errno;
3626 }
3627
3628 /**
3629  * Update queue for specific filter.
3630  *
3631  * @param dev
3632  *   Pointer to Ethernet device.
3633  * @param fdir_filter
3634  *   Filter to be updated.
3635  *
3636  * @return
3637  *   0 on success, a negative errno value otherwise and rte_errno is set.
3638  */
3639 static int
3640 mlx5_fdir_filter_update(struct rte_eth_dev *dev,
3641                         const struct rte_eth_fdir_filter *fdir_filter)
3642 {
3643         int ret;
3644
3645         ret = mlx5_fdir_filter_delete(dev, fdir_filter);
3646         if (ret)
3647                 return ret;
3648         return mlx5_fdir_filter_add(dev, fdir_filter);
3649 }
3650
3651 /**
3652  * Flush all filters.
3653  *
3654  * @param dev
3655  *   Pointer to Ethernet device.
3656  */
3657 static void
3658 mlx5_fdir_filter_flush(struct rte_eth_dev *dev)
3659 {
3660         struct priv *priv = dev->data->dev_private;
3661
3662         mlx5_flow_list_flush(dev, &priv->flows);
3663 }
3664
3665 /**
3666  * Get flow director information.
3667  *
3668  * @param dev
3669  *   Pointer to Ethernet device.
3670  * @param[out] fdir_info
3671  *   Resulting flow director information.
3672  */
3673 static void
3674 mlx5_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info)
3675 {
3676         struct rte_eth_fdir_masks *mask =
3677                 &dev->data->dev_conf.fdir_conf.mask;
3678
3679         fdir_info->mode = dev->data->dev_conf.fdir_conf.mode;
3680         fdir_info->guarant_spc = 0;
3681         rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
3682         fdir_info->max_flexpayload = 0;
3683         fdir_info->flow_types_mask[0] = 0;
3684         fdir_info->flex_payload_unit = 0;
3685         fdir_info->max_flex_payload_segment_num = 0;
3686         fdir_info->flex_payload_limit = 0;
3687         memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
3688 }
3689
3690 /**
3691  * Deal with flow director operations.
3692  *
3693  * @param dev
3694  *   Pointer to Ethernet device.
3695  * @param filter_op
3696  *   Operation to perform.
3697  * @param arg
3698  *   Pointer to operation-specific structure.
3699  *
3700  * @return
3701  *   0 on success, a negative errno value otherwise and rte_errno is set.
3702  */
3703 static int
3704 mlx5_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op,
3705                     void *arg)
3706 {
3707         enum rte_fdir_mode fdir_mode =
3708                 dev->data->dev_conf.fdir_conf.mode;
3709
3710         if (filter_op == RTE_ETH_FILTER_NOP)
3711                 return 0;
3712         if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
3713             fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
3714                 DRV_LOG(ERR, "port %u flow director mode %d not supported",
3715                         dev->data->port_id, fdir_mode);
3716                 rte_errno = EINVAL;
3717                 return -rte_errno;
3718         }
3719         switch (filter_op) {
3720         case RTE_ETH_FILTER_ADD:
3721                 return mlx5_fdir_filter_add(dev, arg);
3722         case RTE_ETH_FILTER_UPDATE:
3723                 return mlx5_fdir_filter_update(dev, arg);
3724         case RTE_ETH_FILTER_DELETE:
3725                 return mlx5_fdir_filter_delete(dev, arg);
3726         case RTE_ETH_FILTER_FLUSH:
3727                 mlx5_fdir_filter_flush(dev);
3728                 break;
3729         case RTE_ETH_FILTER_INFO:
3730                 mlx5_fdir_info_get(dev, arg);
3731                 break;
3732         default:
3733                 DRV_LOG(DEBUG, "port %u unknown operation %u",
3734                         dev->data->port_id, filter_op);
3735                 rte_errno = EINVAL;
3736                 return -rte_errno;
3737         }
3738         return 0;
3739 }
3740
3741 /**
3742  * Manage filter operations.
3743  *
3744  * @param dev
3745  *   Pointer to Ethernet device structure.
3746  * @param filter_type
3747  *   Filter type.
3748  * @param filter_op
3749  *   Operation to perform.
3750  * @param arg
3751  *   Pointer to operation-specific structure.
3752  *
3753  * @return
3754  *   0 on success, a negative errno value otherwise and rte_errno is set.
3755  */
3756 int
3757 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
3758                      enum rte_filter_type filter_type,
3759                      enum rte_filter_op filter_op,
3760                      void *arg)
3761 {
3762         switch (filter_type) {
3763         case RTE_ETH_FILTER_GENERIC:
3764                 if (filter_op != RTE_ETH_FILTER_GET) {
3765                         rte_errno = EINVAL;
3766                         return -rte_errno;
3767                 }
3768                 *(const void **)arg = &mlx5_flow_ops;
3769                 return 0;
3770         case RTE_ETH_FILTER_FDIR:
3771                 return mlx5_fdir_ctrl_func(dev, filter_op, arg);
3772         default:
3773                 DRV_LOG(ERR, "port %u filter type (%d) not supported",
3774                         dev->data->port_id, filter_type);
3775                 rte_errno = ENOTSUP;
3776                 return -rte_errno;
3777         }
3778         return 0;
3779 }