net/mlx5: add flow MPLS item
[dpdk.git] / drivers / net / mlx5 / mlx5_flow.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2016 6WIND S.A.
3  * Copyright 2016 Mellanox Technologies, Ltd
4  */
5
6 #include <sys/queue.h>
7 #include <stdint.h>
8 #include <string.h>
9
10 /* Verbs header. */
11 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
12 #ifdef PEDANTIC
13 #pragma GCC diagnostic ignored "-Wpedantic"
14 #endif
15 #include <infiniband/verbs.h>
16 #ifdef PEDANTIC
17 #pragma GCC diagnostic error "-Wpedantic"
18 #endif
19
20 #include <rte_common.h>
21 #include <rte_ether.h>
22 #include <rte_eth_ctrl.h>
23 #include <rte_ethdev_driver.h>
24 #include <rte_flow.h>
25 #include <rte_flow_driver.h>
26 #include <rte_malloc.h>
27 #include <rte_ip.h>
28
29 #include "mlx5.h"
30 #include "mlx5_defs.h"
31 #include "mlx5_prm.h"
32 #include "mlx5_glue.h"
33
34 /* Dev ops structure defined in mlx5.c */
35 extern const struct eth_dev_ops mlx5_dev_ops;
36 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
37
38 /* Pattern outer Layer bits. */
39 #define MLX5_FLOW_LAYER_OUTER_L2 (1u << 0)
40 #define MLX5_FLOW_LAYER_OUTER_L3_IPV4 (1u << 1)
41 #define MLX5_FLOW_LAYER_OUTER_L3_IPV6 (1u << 2)
42 #define MLX5_FLOW_LAYER_OUTER_L4_UDP (1u << 3)
43 #define MLX5_FLOW_LAYER_OUTER_L4_TCP (1u << 4)
44 #define MLX5_FLOW_LAYER_OUTER_VLAN (1u << 5)
45
46 /* Pattern inner Layer bits. */
47 #define MLX5_FLOW_LAYER_INNER_L2 (1u << 6)
48 #define MLX5_FLOW_LAYER_INNER_L3_IPV4 (1u << 7)
49 #define MLX5_FLOW_LAYER_INNER_L3_IPV6 (1u << 8)
50 #define MLX5_FLOW_LAYER_INNER_L4_UDP (1u << 9)
51 #define MLX5_FLOW_LAYER_INNER_L4_TCP (1u << 10)
52 #define MLX5_FLOW_LAYER_INNER_VLAN (1u << 11)
53
54 /* Pattern tunnel Layer bits. */
55 #define MLX5_FLOW_LAYER_VXLAN (1u << 12)
56 #define MLX5_FLOW_LAYER_VXLAN_GPE (1u << 13)
57 #define MLX5_FLOW_LAYER_GRE (1u << 14)
58 #define MLX5_FLOW_LAYER_MPLS (1u << 15)
59
60 /* Outer Masks. */
61 #define MLX5_FLOW_LAYER_OUTER_L3 \
62         (MLX5_FLOW_LAYER_OUTER_L3_IPV4 | MLX5_FLOW_LAYER_OUTER_L3_IPV6)
63 #define MLX5_FLOW_LAYER_OUTER_L4 \
64         (MLX5_FLOW_LAYER_OUTER_L4_UDP | MLX5_FLOW_LAYER_OUTER_L4_TCP)
65 #define MLX5_FLOW_LAYER_OUTER \
66         (MLX5_FLOW_LAYER_OUTER_L2 | MLX5_FLOW_LAYER_OUTER_L3 | \
67          MLX5_FLOW_LAYER_OUTER_L4)
68
69 /* Tunnel Masks. */
70 #define MLX5_FLOW_LAYER_TUNNEL \
71         (MLX5_FLOW_LAYER_VXLAN | MLX5_FLOW_LAYER_VXLAN_GPE | \
72          MLX5_FLOW_LAYER_GRE | MLX5_FLOW_LAYER_MPLS)
73
74 /* Inner Masks. */
75 #define MLX5_FLOW_LAYER_INNER_L3 \
76         (MLX5_FLOW_LAYER_INNER_L3_IPV4 | MLX5_FLOW_LAYER_INNER_L3_IPV6)
77 #define MLX5_FLOW_LAYER_INNER_L4 \
78         (MLX5_FLOW_LAYER_INNER_L4_UDP | MLX5_FLOW_LAYER_INNER_L4_TCP)
79 #define MLX5_FLOW_LAYER_INNER \
80         (MLX5_FLOW_LAYER_INNER_L2 | MLX5_FLOW_LAYER_INNER_L3 | \
81          MLX5_FLOW_LAYER_INNER_L4)
82
83 /* Actions that modify the fate of matching traffic. */
84 #define MLX5_FLOW_FATE_DROP (1u << 0)
85 #define MLX5_FLOW_FATE_QUEUE (1u << 1)
86 #define MLX5_FLOW_FATE_RSS (1u << 2)
87
88 /* Modify a packet. */
89 #define MLX5_FLOW_MOD_FLAG (1u << 0)
90 #define MLX5_FLOW_MOD_MARK (1u << 1)
91
92 /* possible L3 layers protocols filtering. */
93 #define MLX5_IP_PROTOCOL_TCP 6
94 #define MLX5_IP_PROTOCOL_UDP 17
95 #define MLX5_IP_PROTOCOL_GRE 47
96 #define MLX5_IP_PROTOCOL_MPLS 147
97
98 /* Priority reserved for default flows. */
99 #define MLX5_FLOW_PRIO_RSVD ((uint32_t)-1)
100
101 enum mlx5_expansion {
102         MLX5_EXPANSION_ROOT,
103         MLX5_EXPANSION_ROOT_OUTER,
104         MLX5_EXPANSION_OUTER_ETH,
105         MLX5_EXPANSION_OUTER_IPV4,
106         MLX5_EXPANSION_OUTER_IPV4_UDP,
107         MLX5_EXPANSION_OUTER_IPV4_TCP,
108         MLX5_EXPANSION_OUTER_IPV6,
109         MLX5_EXPANSION_OUTER_IPV6_UDP,
110         MLX5_EXPANSION_OUTER_IPV6_TCP,
111         MLX5_EXPANSION_VXLAN,
112         MLX5_EXPANSION_VXLAN_GPE,
113         MLX5_EXPANSION_GRE,
114         MLX5_EXPANSION_MPLS,
115         MLX5_EXPANSION_ETH,
116         MLX5_EXPANSION_IPV4,
117         MLX5_EXPANSION_IPV4_UDP,
118         MLX5_EXPANSION_IPV4_TCP,
119         MLX5_EXPANSION_IPV6,
120         MLX5_EXPANSION_IPV6_UDP,
121         MLX5_EXPANSION_IPV6_TCP,
122 };
123
124 /** Supported expansion of items. */
125 static const struct rte_flow_expand_node mlx5_support_expansion[] = {
126         [MLX5_EXPANSION_ROOT] = {
127                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
128                                                  MLX5_EXPANSION_IPV4,
129                                                  MLX5_EXPANSION_IPV6),
130                 .type = RTE_FLOW_ITEM_TYPE_END,
131         },
132         [MLX5_EXPANSION_ROOT_OUTER] = {
133                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_ETH,
134                                                  MLX5_EXPANSION_OUTER_IPV4,
135                                                  MLX5_EXPANSION_OUTER_IPV6),
136                 .type = RTE_FLOW_ITEM_TYPE_END,
137         },
138         [MLX5_EXPANSION_OUTER_ETH] = {
139                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_IPV4,
140                                                  MLX5_EXPANSION_OUTER_IPV6,
141                                                  MLX5_EXPANSION_MPLS),
142                 .type = RTE_FLOW_ITEM_TYPE_ETH,
143                 .rss_types = 0,
144         },
145         [MLX5_EXPANSION_OUTER_IPV4] = {
146                 .next = RTE_FLOW_EXPAND_RSS_NEXT
147                         (MLX5_EXPANSION_OUTER_IPV4_UDP,
148                          MLX5_EXPANSION_OUTER_IPV4_TCP,
149                          MLX5_EXPANSION_GRE),
150                 .type = RTE_FLOW_ITEM_TYPE_IPV4,
151                 .rss_types = ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
152                         ETH_RSS_NONFRAG_IPV4_OTHER,
153         },
154         [MLX5_EXPANSION_OUTER_IPV4_UDP] = {
155                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN,
156                                                  MLX5_EXPANSION_VXLAN_GPE),
157                 .type = RTE_FLOW_ITEM_TYPE_UDP,
158                 .rss_types = ETH_RSS_NONFRAG_IPV4_UDP,
159         },
160         [MLX5_EXPANSION_OUTER_IPV4_TCP] = {
161                 .type = RTE_FLOW_ITEM_TYPE_TCP,
162                 .rss_types = ETH_RSS_NONFRAG_IPV4_TCP,
163         },
164         [MLX5_EXPANSION_OUTER_IPV6] = {
165                 .next = RTE_FLOW_EXPAND_RSS_NEXT
166                         (MLX5_EXPANSION_OUTER_IPV6_UDP,
167                          MLX5_EXPANSION_OUTER_IPV6_TCP),
168                 .type = RTE_FLOW_ITEM_TYPE_IPV6,
169                 .rss_types = ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 |
170                         ETH_RSS_NONFRAG_IPV6_OTHER,
171         },
172         [MLX5_EXPANSION_OUTER_IPV6_UDP] = {
173                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN,
174                                                  MLX5_EXPANSION_VXLAN_GPE),
175                 .type = RTE_FLOW_ITEM_TYPE_UDP,
176                 .rss_types = ETH_RSS_NONFRAG_IPV6_UDP,
177         },
178         [MLX5_EXPANSION_OUTER_IPV6_TCP] = {
179                 .type = RTE_FLOW_ITEM_TYPE_TCP,
180                 .rss_types = ETH_RSS_NONFRAG_IPV6_TCP,
181         },
182         [MLX5_EXPANSION_VXLAN] = {
183                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH),
184                 .type = RTE_FLOW_ITEM_TYPE_VXLAN,
185         },
186         [MLX5_EXPANSION_VXLAN_GPE] = {
187                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
188                                                  MLX5_EXPANSION_IPV4,
189                                                  MLX5_EXPANSION_IPV6),
190                 .type = RTE_FLOW_ITEM_TYPE_VXLAN_GPE,
191         },
192         [MLX5_EXPANSION_GRE] = {
193                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4),
194                 .type = RTE_FLOW_ITEM_TYPE_GRE,
195         },
196         [MLX5_EXPANSION_MPLS] = {
197                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
198                                                  MLX5_EXPANSION_IPV6),
199                 .type = RTE_FLOW_ITEM_TYPE_MPLS,
200         },
201         [MLX5_EXPANSION_ETH] = {
202                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
203                                                  MLX5_EXPANSION_IPV6),
204                 .type = RTE_FLOW_ITEM_TYPE_ETH,
205         },
206         [MLX5_EXPANSION_IPV4] = {
207                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4_UDP,
208                                                  MLX5_EXPANSION_IPV4_TCP),
209                 .type = RTE_FLOW_ITEM_TYPE_IPV4,
210                 .rss_types = ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
211                         ETH_RSS_NONFRAG_IPV4_OTHER,
212         },
213         [MLX5_EXPANSION_IPV4_UDP] = {
214                 .type = RTE_FLOW_ITEM_TYPE_UDP,
215                 .rss_types = ETH_RSS_NONFRAG_IPV4_UDP,
216         },
217         [MLX5_EXPANSION_IPV4_TCP] = {
218                 .type = RTE_FLOW_ITEM_TYPE_TCP,
219                 .rss_types = ETH_RSS_NONFRAG_IPV4_TCP,
220         },
221         [MLX5_EXPANSION_IPV6] = {
222                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV6_UDP,
223                                                  MLX5_EXPANSION_IPV6_TCP),
224                 .type = RTE_FLOW_ITEM_TYPE_IPV6,
225                 .rss_types = ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 |
226                         ETH_RSS_NONFRAG_IPV6_OTHER,
227         },
228         [MLX5_EXPANSION_IPV6_UDP] = {
229                 .type = RTE_FLOW_ITEM_TYPE_UDP,
230                 .rss_types = ETH_RSS_NONFRAG_IPV6_UDP,
231         },
232         [MLX5_EXPANSION_IPV6_TCP] = {
233                 .type = RTE_FLOW_ITEM_TYPE_TCP,
234                 .rss_types = ETH_RSS_NONFRAG_IPV6_TCP,
235         },
236 };
237
238 /** Handles information leading to a drop fate. */
239 struct mlx5_flow_verbs {
240         LIST_ENTRY(mlx5_flow_verbs) next;
241         unsigned int size; /**< Size of the attribute. */
242         struct {
243                 struct ibv_flow_attr *attr;
244                 /**< Pointer to the Specification buffer. */
245                 uint8_t *specs; /**< Pointer to the specifications. */
246         };
247         struct ibv_flow *flow; /**< Verbs flow pointer. */
248         struct mlx5_hrxq *hrxq; /**< Hash Rx queue object. */
249         uint64_t hash_fields; /**< Verbs hash Rx queue hash fields. */
250 };
251
252 /* Flow structure. */
253 struct rte_flow {
254         TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
255         struct rte_flow_attr attributes; /**< User flow attribute. */
256         uint32_t l3_protocol_en:1; /**< Protocol filtering requested. */
257         uint32_t layers;
258         /**< Bit-fields of present layers see MLX5_FLOW_LAYER_*. */
259         uint32_t modifier;
260         /**< Bit-fields of present modifier see MLX5_FLOW_MOD_*. */
261         uint32_t fate;
262         /**< Bit-fields of present fate see MLX5_FLOW_FATE_*. */
263         uint8_t l3_protocol; /**< valid when l3_protocol_en is set. */
264         LIST_HEAD(verbs, mlx5_flow_verbs) verbs; /**< Verbs flows list. */
265         struct mlx5_flow_verbs *cur_verbs;
266         /**< Current Verbs flow structure being filled. */
267         struct rte_flow_action_rss rss;/**< RSS context. */
268         uint8_t key[MLX5_RSS_HASH_KEY_LEN]; /**< RSS hash key. */
269         uint16_t (*queue)[]; /**< Destination queues to redirect traffic to. */
270 };
271
272 static const struct rte_flow_ops mlx5_flow_ops = {
273         .validate = mlx5_flow_validate,
274         .create = mlx5_flow_create,
275         .destroy = mlx5_flow_destroy,
276         .flush = mlx5_flow_flush,
277         .isolate = mlx5_flow_isolate,
278 };
279
280 /* Convert FDIR request to Generic flow. */
281 struct mlx5_fdir {
282         struct rte_flow_attr attr;
283         struct rte_flow_action actions[2];
284         struct rte_flow_item items[4];
285         struct rte_flow_item_eth l2;
286         struct rte_flow_item_eth l2_mask;
287         union {
288                 struct rte_flow_item_ipv4 ipv4;
289                 struct rte_flow_item_ipv6 ipv6;
290         } l3;
291         union {
292                 struct rte_flow_item_ipv4 ipv4;
293                 struct rte_flow_item_ipv6 ipv6;
294         } l3_mask;
295         union {
296                 struct rte_flow_item_udp udp;
297                 struct rte_flow_item_tcp tcp;
298         } l4;
299         union {
300                 struct rte_flow_item_udp udp;
301                 struct rte_flow_item_tcp tcp;
302         } l4_mask;
303         struct rte_flow_action_queue queue;
304 };
305
306 /* Verbs specification header. */
307 struct ibv_spec_header {
308         enum ibv_flow_spec_type type;
309         uint16_t size;
310 };
311
312 /*
313  * Number of sub priorities.
314  * For each kind of pattern matching i.e. L2, L3, L4 to have a correct
315  * matching on the NIC (firmware dependent) L4 most have the higher priority
316  * followed by L3 and ending with L2.
317  */
318 #define MLX5_PRIORITY_MAP_L2 2
319 #define MLX5_PRIORITY_MAP_L3 1
320 #define MLX5_PRIORITY_MAP_L4 0
321 #define MLX5_PRIORITY_MAP_MAX 3
322
323 /* Map of Verbs to Flow priority with 8 Verbs priorities. */
324 static const uint32_t priority_map_3[][MLX5_PRIORITY_MAP_MAX] = {
325         { 0, 1, 2 }, { 2, 3, 4 }, { 5, 6, 7 },
326 };
327
328 /* Map of Verbs to Flow priority with 16 Verbs priorities. */
329 static const uint32_t priority_map_5[][MLX5_PRIORITY_MAP_MAX] = {
330         { 0, 1, 2 }, { 3, 4, 5 }, { 6, 7, 8 },
331         { 9, 10, 11 }, { 12, 13, 14 },
332 };
333
334 /* Tunnel information. */
335 struct mlx5_flow_tunnel_info {
336         uint32_t tunnel; /**< Tunnel bit (see MLX5_FLOW_*). */
337         uint32_t ptype; /**< Tunnel Ptype (see RTE_PTYPE_*). */
338 };
339
340 static struct mlx5_flow_tunnel_info tunnels_info[] = {
341         {
342                 .tunnel = MLX5_FLOW_LAYER_VXLAN,
343                 .ptype = RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L4_UDP,
344         },
345         {
346                 .tunnel = MLX5_FLOW_LAYER_VXLAN_GPE,
347                 .ptype = RTE_PTYPE_TUNNEL_VXLAN_GPE | RTE_PTYPE_L4_UDP,
348         },
349         {
350                 .tunnel = MLX5_FLOW_LAYER_GRE,
351                 .ptype = RTE_PTYPE_TUNNEL_GRE,
352         },
353         {
354                 .tunnel = MLX5_FLOW_LAYER_MPLS | MLX5_FLOW_LAYER_OUTER_L4_UDP,
355                 .ptype = RTE_PTYPE_TUNNEL_MPLS_IN_GRE | RTE_PTYPE_L4_UDP,
356         },
357         {
358                 .tunnel = MLX5_FLOW_LAYER_MPLS,
359                 .ptype = RTE_PTYPE_TUNNEL_MPLS_IN_GRE,
360         },
361 };
362
363 /**
364  * Discover the maximum number of priority available.
365  *
366  * @param[in] dev
367  *   Pointer to Ethernet device.
368  *
369  * @return
370  *   number of supported flow priority on success, a negative errno
371  *   value otherwise and rte_errno is set.
372  */
373 int
374 mlx5_flow_discover_priorities(struct rte_eth_dev *dev)
375 {
376         struct {
377                 struct ibv_flow_attr attr;
378                 struct ibv_flow_spec_eth eth;
379                 struct ibv_flow_spec_action_drop drop;
380         } flow_attr = {
381                 .attr = {
382                         .num_of_specs = 2,
383                 },
384                 .eth = {
385                         .type = IBV_FLOW_SPEC_ETH,
386                         .size = sizeof(struct ibv_flow_spec_eth),
387                 },
388                 .drop = {
389                         .size = sizeof(struct ibv_flow_spec_action_drop),
390                         .type = IBV_FLOW_SPEC_ACTION_DROP,
391                 },
392         };
393         struct ibv_flow *flow;
394         struct mlx5_hrxq *drop = mlx5_hrxq_drop_new(dev);
395         uint16_t vprio[] = { 8, 16 };
396         int i;
397         int priority = 0;
398
399         if (!drop) {
400                 rte_errno = ENOTSUP;
401                 return -rte_errno;
402         }
403         for (i = 0; i != RTE_DIM(vprio); i++) {
404                 flow_attr.attr.priority = vprio[i] - 1;
405                 flow = mlx5_glue->create_flow(drop->qp, &flow_attr.attr);
406                 if (!flow)
407                         break;
408                 claim_zero(mlx5_glue->destroy_flow(flow));
409                 priority = vprio[i];
410         }
411         switch (priority) {
412         case 8:
413                 priority = RTE_DIM(priority_map_3);
414                 break;
415         case 16:
416                 priority = RTE_DIM(priority_map_5);
417                 break;
418         default:
419                 rte_errno = ENOTSUP;
420                 DRV_LOG(ERR,
421                         "port %u verbs maximum priority: %d expected 8/16",
422                         dev->data->port_id, vprio[i]);
423                 return -rte_errno;
424         }
425         mlx5_hrxq_drop_release(dev);
426         DRV_LOG(INFO, "port %u flow maximum priority: %d",
427                 dev->data->port_id, priority);
428         return priority;
429 }
430
431 /**
432  * Adjust flow priority.
433  *
434  * @param dev
435  *   Pointer to Ethernet device.
436  * @param flow
437  *   Pointer to an rte flow.
438  */
439 static void
440 mlx5_flow_adjust_priority(struct rte_eth_dev *dev, struct rte_flow *flow)
441 {
442         struct priv *priv = dev->data->dev_private;
443         uint32_t priority = flow->attributes.priority;
444         uint32_t subpriority = flow->cur_verbs->attr->priority;
445
446         switch (priv->config.flow_prio) {
447         case RTE_DIM(priority_map_3):
448                 priority = priority_map_3[priority][subpriority];
449                 break;
450         case RTE_DIM(priority_map_5):
451                 priority = priority_map_5[priority][subpriority];
452                 break;
453         }
454         flow->cur_verbs->attr->priority = priority;
455 }
456
457 /**
458  * Verify the @p attributes will be correctly understood by the NIC and store
459  * them in the @p flow if everything is correct.
460  *
461  * @param[in] dev
462  *   Pointer to Ethernet device.
463  * @param[in] attributes
464  *   Pointer to flow attributes
465  * @param[in, out] flow
466  *   Pointer to the rte_flow structure.
467  * @param[out] error
468  *   Pointer to error structure.
469  *
470  * @return
471  *   0 on success, a negative errno value otherwise and rte_errno is set.
472  */
473 static int
474 mlx5_flow_attributes(struct rte_eth_dev *dev,
475                      const struct rte_flow_attr *attributes,
476                      struct rte_flow *flow,
477                      struct rte_flow_error *error)
478 {
479         uint32_t priority_max =
480                 ((struct priv *)dev->data->dev_private)->config.flow_prio - 1;
481
482         if (attributes->group)
483                 return rte_flow_error_set(error, ENOTSUP,
484                                           RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
485                                           NULL,
486                                           "groups is not supported");
487         if (attributes->priority != MLX5_FLOW_PRIO_RSVD &&
488             attributes->priority >= priority_max)
489                 return rte_flow_error_set(error, ENOTSUP,
490                                           RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
491                                           NULL,
492                                           "priority out of range");
493         if (attributes->egress)
494                 return rte_flow_error_set(error, ENOTSUP,
495                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
496                                           NULL,
497                                           "egress is not supported");
498         if (attributes->transfer)
499                 return rte_flow_error_set(error, ENOTSUP,
500                                           RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
501                                           NULL,
502                                           "transfer is not supported");
503         if (!attributes->ingress)
504                 return rte_flow_error_set(error, ENOTSUP,
505                                           RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
506                                           NULL,
507                                           "ingress attribute is mandatory");
508         flow->attributes = *attributes;
509         if (attributes->priority == MLX5_FLOW_PRIO_RSVD)
510                 flow->attributes.priority = priority_max;
511         return 0;
512 }
513
514 /**
515  * Verify the @p item specifications (spec, last, mask) are compatible with the
516  * NIC capabilities.
517  *
518  * @param[in] item
519  *   Item specification.
520  * @param[in] mask
521  *   @p item->mask or flow default bit-masks.
522  * @param[in] nic_mask
523  *   Bit-masks covering supported fields by the NIC to compare with user mask.
524  * @param[in] size
525  *   Bit-masks size in bytes.
526  * @param[out] error
527  *   Pointer to error structure.
528  *
529  * @return
530  *   0 on success, a negative errno value otherwise and rte_errno is set.
531  */
532 static int
533 mlx5_flow_item_acceptable(const struct rte_flow_item *item,
534                           const uint8_t *mask,
535                           const uint8_t *nic_mask,
536                           unsigned int size,
537                           struct rte_flow_error *error)
538 {
539         unsigned int i;
540
541         assert(nic_mask);
542         for (i = 0; i < size; ++i)
543                 if ((nic_mask[i] | mask[i]) != nic_mask[i])
544                         return rte_flow_error_set(error, ENOTSUP,
545                                                   RTE_FLOW_ERROR_TYPE_ITEM,
546                                                   item,
547                                                   "mask enables non supported"
548                                                   " bits");
549         if (!item->spec && (item->mask || item->last))
550                 return rte_flow_error_set(error, EINVAL,
551                                           RTE_FLOW_ERROR_TYPE_ITEM,
552                                           item,
553                                           "mask/last without a spec is not"
554                                           " supported");
555         if (item->spec && item->last) {
556                 uint8_t spec[size];
557                 uint8_t last[size];
558                 unsigned int i;
559                 int ret;
560
561                 for (i = 0; i < size; ++i) {
562                         spec[i] = ((const uint8_t *)item->spec)[i] & mask[i];
563                         last[i] = ((const uint8_t *)item->last)[i] & mask[i];
564                 }
565                 ret = memcmp(spec, last, size);
566                 if (ret != 0)
567                         return rte_flow_error_set(error, ENOTSUP,
568                                                   RTE_FLOW_ERROR_TYPE_ITEM,
569                                                   item,
570                                                   "range is not supported");
571         }
572         return 0;
573 }
574
575 /**
576  * Add a verbs item specification into @p flow.
577  *
578  * @param[in, out] flow
579  *   Pointer to flow structure.
580  * @param[in] src
581  *   Create specification.
582  * @param[in] size
583  *   Size in bytes of the specification to copy.
584  */
585 static void
586 mlx5_flow_spec_verbs_add(struct rte_flow *flow, void *src, unsigned int size)
587 {
588         struct mlx5_flow_verbs *verbs = flow->cur_verbs;
589
590         if (verbs->specs) {
591                 void *dst;
592
593                 dst = (void *)(verbs->specs + verbs->size);
594                 memcpy(dst, src, size);
595                 ++verbs->attr->num_of_specs;
596         }
597         verbs->size += size;
598 }
599
600 /**
601  * Adjust verbs hash fields according to the @p flow information.
602  *
603  * @param[in, out] flow.
604  *   Pointer to flow structure.
605  * @param[in] tunnel
606  *   1 when the hash field is for a tunnel item.
607  * @param[in] layer_types
608  *   ETH_RSS_* types.
609  * @param[in] hash_fields
610  *   Item hash fields.
611  */
612 static void
613 mlx5_flow_verbs_hashfields_adjust(struct rte_flow *flow,
614                                   int tunnel __rte_unused,
615                                   uint32_t layer_types, uint64_t hash_fields)
616 {
617 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
618         hash_fields |= (tunnel ? IBV_RX_HASH_INNER : 0);
619         if (flow->rss.level == 2 && !tunnel)
620                 hash_fields = 0;
621         else if (flow->rss.level < 2 && tunnel)
622                 hash_fields = 0;
623 #endif
624         if (!(flow->rss.types & layer_types))
625                 hash_fields = 0;
626         flow->cur_verbs->hash_fields |= hash_fields;
627 }
628
629 /**
630  * Convert the @p item into a Verbs specification after ensuring the NIC
631  * will understand and process it correctly.
632  * If the necessary size for the conversion is greater than the @p flow_size,
633  * nothing is written in @p flow, the validation is still performed.
634  *
635  * @param[in] item
636  *   Item specification.
637  * @param[in, out] flow
638  *   Pointer to flow structure.
639  * @param[in] flow_size
640  *   Size in bytes of the available space in @p flow, if too small, nothing is
641  *   written.
642  * @param[out] error
643  *   Pointer to error structure.
644  *
645  * @return
646  *   On success the number of bytes consumed/necessary, if the returned value
647  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
648  *   otherwise another call with this returned memory size should be done.
649  *   On error, a negative errno value is returned and rte_errno is set.
650  */
651 static int
652 mlx5_flow_item_eth(const struct rte_flow_item *item, struct rte_flow *flow,
653                    const size_t flow_size, struct rte_flow_error *error)
654 {
655         const struct rte_flow_item_eth *spec = item->spec;
656         const struct rte_flow_item_eth *mask = item->mask;
657         const struct rte_flow_item_eth nic_mask = {
658                 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
659                 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
660                 .type = RTE_BE16(0xffff),
661         };
662         const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
663         const unsigned int size = sizeof(struct ibv_flow_spec_eth);
664         struct ibv_flow_spec_eth eth = {
665                 .type = IBV_FLOW_SPEC_ETH | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
666                 .size = size,
667         };
668         int ret;
669
670         if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
671                             MLX5_FLOW_LAYER_OUTER_L2))
672                 return rte_flow_error_set(error, ENOTSUP,
673                                           RTE_FLOW_ERROR_TYPE_ITEM,
674                                           item,
675                                           "L2 layers already configured");
676         if (!mask)
677                 mask = &rte_flow_item_eth_mask;
678         ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
679                                         (const uint8_t *)&nic_mask,
680                                         sizeof(struct rte_flow_item_eth),
681                                         error);
682         if (ret)
683                 return ret;
684         flow->layers |= tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
685                 MLX5_FLOW_LAYER_OUTER_L2;
686         if (size > flow_size)
687                 return size;
688         if (spec) {
689                 unsigned int i;
690
691                 memcpy(&eth.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
692                 memcpy(&eth.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
693                 eth.val.ether_type = spec->type;
694                 memcpy(&eth.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
695                 memcpy(&eth.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
696                 eth.mask.ether_type = mask->type;
697                 /* Remove unwanted bits from values. */
698                 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
699                         eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
700                         eth.val.src_mac[i] &= eth.mask.src_mac[i];
701                 }
702                 eth.val.ether_type &= eth.mask.ether_type;
703         }
704         flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2;
705         mlx5_flow_spec_verbs_add(flow, &eth, size);
706         return size;
707 }
708
709 /**
710  * Update the VLAN tag in the Verbs Ethernet specification.
711  *
712  * @param[in, out] attr
713  *   Pointer to Verbs attributes structure.
714  * @param[in] eth
715  *   Verbs structure containing the VLAN information to copy.
716  */
717 static void
718 mlx5_flow_item_vlan_update(struct ibv_flow_attr *attr,
719                            struct ibv_flow_spec_eth *eth)
720 {
721         unsigned int i;
722         const enum ibv_flow_spec_type search = eth->type;
723         struct ibv_spec_header *hdr = (struct ibv_spec_header *)
724                 ((uint8_t *)attr + sizeof(struct ibv_flow_attr));
725
726         for (i = 0; i != attr->num_of_specs; ++i) {
727                 if (hdr->type == search) {
728                         struct ibv_flow_spec_eth *e =
729                                 (struct ibv_flow_spec_eth *)hdr;
730
731                         e->val.vlan_tag = eth->val.vlan_tag;
732                         e->mask.vlan_tag = eth->mask.vlan_tag;
733                         e->val.ether_type = eth->val.ether_type;
734                         e->mask.ether_type = eth->mask.ether_type;
735                         break;
736                 }
737                 hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size);
738         }
739 }
740
741 /**
742  * Convert the @p item into @p flow (or by updating the already present
743  * Ethernet Verbs) specification after ensuring the NIC will understand and
744  * process it correctly.
745  * If the necessary size for the conversion is greater than the @p flow_size,
746  * nothing is written in @p flow, the validation is still performed.
747  *
748  * @param[in] item
749  *   Item specification.
750  * @param[in, out] flow
751  *   Pointer to flow structure.
752  * @param[in] flow_size
753  *   Size in bytes of the available space in @p flow, if too small, nothing is
754  *   written.
755  * @param[out] error
756  *   Pointer to error structure.
757  *
758  * @return
759  *   On success the number of bytes consumed/necessary, if the returned value
760  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
761  *   otherwise another call with this returned memory size should be done.
762  *   On error, a negative errno value is returned and rte_errno is set.
763  */
764 static int
765 mlx5_flow_item_vlan(const struct rte_flow_item *item, struct rte_flow *flow,
766                     const size_t flow_size, struct rte_flow_error *error)
767 {
768         const struct rte_flow_item_vlan *spec = item->spec;
769         const struct rte_flow_item_vlan *mask = item->mask;
770         const struct rte_flow_item_vlan nic_mask = {
771                 .tci = RTE_BE16(0x0fff),
772                 .inner_type = RTE_BE16(0xffff),
773         };
774         unsigned int size = sizeof(struct ibv_flow_spec_eth);
775         const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
776         struct ibv_flow_spec_eth eth = {
777                 .type = IBV_FLOW_SPEC_ETH | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
778                 .size = size,
779         };
780         int ret;
781         const uint32_t l34m = tunnel ? (MLX5_FLOW_LAYER_INNER_L3 |
782                                         MLX5_FLOW_LAYER_INNER_L4) :
783                 (MLX5_FLOW_LAYER_OUTER_L3 | MLX5_FLOW_LAYER_OUTER_L4);
784         const uint32_t vlanm = tunnel ? MLX5_FLOW_LAYER_INNER_VLAN :
785                 MLX5_FLOW_LAYER_OUTER_VLAN;
786         const uint32_t l2m = tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
787                 MLX5_FLOW_LAYER_OUTER_L2;
788
789         if (flow->layers & vlanm)
790                 return rte_flow_error_set(error, ENOTSUP,
791                                           RTE_FLOW_ERROR_TYPE_ITEM,
792                                           item,
793                                           "VLAN layer already configured");
794         else if ((flow->layers & l34m) != 0)
795                 return rte_flow_error_set(error, ENOTSUP,
796                                           RTE_FLOW_ERROR_TYPE_ITEM,
797                                           item,
798                                           "L2 layer cannot follow L3/L4 layer");
799         if (!mask)
800                 mask = &rte_flow_item_vlan_mask;
801         ret = mlx5_flow_item_acceptable
802                 (item, (const uint8_t *)mask,
803                  (const uint8_t *)&nic_mask,
804                  sizeof(struct rte_flow_item_vlan), error);
805         if (ret)
806                 return ret;
807         if (spec) {
808                 eth.val.vlan_tag = spec->tci;
809                 eth.mask.vlan_tag = mask->tci;
810                 eth.val.vlan_tag &= eth.mask.vlan_tag;
811                 eth.val.ether_type = spec->inner_type;
812                 eth.mask.ether_type = mask->inner_type;
813                 eth.val.ether_type &= eth.mask.ether_type;
814         }
815         /*
816          * From verbs perspective an empty VLAN is equivalent
817          * to a packet without VLAN layer.
818          */
819         if (!eth.mask.vlan_tag)
820                 return rte_flow_error_set(error, EINVAL,
821                                           RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
822                                           item->spec,
823                                           "VLAN cannot be empty");
824         if (!(flow->layers & l2m)) {
825                 if (size <= flow_size) {
826                         flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2;
827                         mlx5_flow_spec_verbs_add(flow, &eth, size);
828                 }
829         } else {
830                 if (flow->cur_verbs)
831                         mlx5_flow_item_vlan_update(flow->cur_verbs->attr,
832                                                    &eth);
833                 size = 0; /* Only an update is done in eth specification. */
834         }
835         flow->layers |= tunnel ?
836                 (MLX5_FLOW_LAYER_INNER_L2 | MLX5_FLOW_LAYER_INNER_VLAN) :
837                 (MLX5_FLOW_LAYER_OUTER_L2 | MLX5_FLOW_LAYER_OUTER_VLAN);
838         return size;
839 }
840
841 /**
842  * Convert the @p item into a Verbs specification after ensuring the NIC
843  * will understand and process it correctly.
844  * If the necessary size for the conversion is greater than the @p flow_size,
845  * nothing is written in @p flow, the validation is still performed.
846  *
847  * @param[in] item
848  *   Item specification.
849  * @param[in, out] flow
850  *   Pointer to flow structure.
851  * @param[in] flow_size
852  *   Size in bytes of the available space in @p flow, if too small, nothing is
853  *   written.
854  * @param[out] error
855  *   Pointer to error structure.
856  *
857  * @return
858  *   On success the number of bytes consumed/necessary, if the returned value
859  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
860  *   otherwise another call with this returned memory size should be done.
861  *   On error, a negative errno value is returned and rte_errno is set.
862  */
863 static int
864 mlx5_flow_item_ipv4(const struct rte_flow_item *item, struct rte_flow *flow,
865                     const size_t flow_size, struct rte_flow_error *error)
866 {
867         const struct rte_flow_item_ipv4 *spec = item->spec;
868         const struct rte_flow_item_ipv4 *mask = item->mask;
869         const struct rte_flow_item_ipv4 nic_mask = {
870                 .hdr = {
871                         .src_addr = RTE_BE32(0xffffffff),
872                         .dst_addr = RTE_BE32(0xffffffff),
873                         .type_of_service = 0xff,
874                         .next_proto_id = 0xff,
875                 },
876         };
877         const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
878         unsigned int size = sizeof(struct ibv_flow_spec_ipv4_ext);
879         struct ibv_flow_spec_ipv4_ext ipv4 = {
880                 .type = IBV_FLOW_SPEC_IPV4_EXT |
881                         (tunnel ? IBV_FLOW_SPEC_INNER : 0),
882                 .size = size,
883         };
884         int ret;
885
886         if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
887                             MLX5_FLOW_LAYER_OUTER_L3))
888                 return rte_flow_error_set(error, ENOTSUP,
889                                           RTE_FLOW_ERROR_TYPE_ITEM,
890                                           item,
891                                           "multiple L3 layers not supported");
892         else if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
893                                  MLX5_FLOW_LAYER_OUTER_L4))
894                 return rte_flow_error_set(error, ENOTSUP,
895                                           RTE_FLOW_ERROR_TYPE_ITEM,
896                                           item,
897                                           "L3 cannot follow an L4 layer.");
898         if (!mask)
899                 mask = &rte_flow_item_ipv4_mask;
900         ret = mlx5_flow_item_acceptable
901                 (item, (const uint8_t *)mask,
902                  (const uint8_t *)&nic_mask,
903                  sizeof(struct rte_flow_item_ipv4), error);
904         if (ret < 0)
905                 return ret;
906         flow->layers |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
907                 MLX5_FLOW_LAYER_OUTER_L3_IPV4;
908         if (spec) {
909                 ipv4.val = (struct ibv_flow_ipv4_ext_filter){
910                         .src_ip = spec->hdr.src_addr,
911                         .dst_ip = spec->hdr.dst_addr,
912                         .proto = spec->hdr.next_proto_id,
913                         .tos = spec->hdr.type_of_service,
914                 };
915                 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
916                         .src_ip = mask->hdr.src_addr,
917                         .dst_ip = mask->hdr.dst_addr,
918                         .proto = mask->hdr.next_proto_id,
919                         .tos = mask->hdr.type_of_service,
920                 };
921                 /* Remove unwanted bits from values. */
922                 ipv4.val.src_ip &= ipv4.mask.src_ip;
923                 ipv4.val.dst_ip &= ipv4.mask.dst_ip;
924                 ipv4.val.proto &= ipv4.mask.proto;
925                 ipv4.val.tos &= ipv4.mask.tos;
926         }
927         flow->l3_protocol_en = !!ipv4.mask.proto;
928         flow->l3_protocol = ipv4.val.proto;
929         if (size <= flow_size) {
930                 mlx5_flow_verbs_hashfields_adjust
931                         (flow, tunnel,
932                          (ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
933                           ETH_RSS_NONFRAG_IPV4_OTHER),
934                          (IBV_RX_HASH_SRC_IPV4 | IBV_RX_HASH_DST_IPV4));
935                 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L3;
936                 mlx5_flow_spec_verbs_add(flow, &ipv4, size);
937         }
938         return size;
939 }
940
941 /**
942  * Convert the @p item into a Verbs specification after ensuring the NIC
943  * will understand and process it correctly.
944  * If the necessary size for the conversion is greater than the @p flow_size,
945  * nothing is written in @p flow, the validation is still performed.
946  *
947  * @param[in] item
948  *   Item specification.
949  * @param[in, out] flow
950  *   Pointer to flow structure.
951  * @param[in] flow_size
952  *   Size in bytes of the available space in @p flow, if too small, nothing is
953  *   written.
954  * @param[out] error
955  *   Pointer to error structure.
956  *
957  * @return
958  *   On success the number of bytes consumed/necessary, if the returned value
959  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
960  *   otherwise another call with this returned memory size should be done.
961  *   On error, a negative errno value is returned and rte_errno is set.
962  */
963 static int
964 mlx5_flow_item_ipv6(const struct rte_flow_item *item, struct rte_flow *flow,
965                     const size_t flow_size, struct rte_flow_error *error)
966 {
967         const struct rte_flow_item_ipv6 *spec = item->spec;
968         const struct rte_flow_item_ipv6 *mask = item->mask;
969         const struct rte_flow_item_ipv6 nic_mask = {
970                 .hdr = {
971                         .src_addr =
972                                 "\xff\xff\xff\xff\xff\xff\xff\xff"
973                                 "\xff\xff\xff\xff\xff\xff\xff\xff",
974                         .dst_addr =
975                                 "\xff\xff\xff\xff\xff\xff\xff\xff"
976                                 "\xff\xff\xff\xff\xff\xff\xff\xff",
977                         .vtc_flow = RTE_BE32(0xffffffff),
978                         .proto = 0xff,
979                         .hop_limits = 0xff,
980                 },
981         };
982         const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
983         unsigned int size = sizeof(struct ibv_flow_spec_ipv6);
984         struct ibv_flow_spec_ipv6 ipv6 = {
985                 .type = IBV_FLOW_SPEC_IPV6 | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
986                 .size = size,
987         };
988         int ret;
989
990         if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
991                             MLX5_FLOW_LAYER_OUTER_L3))
992                 return rte_flow_error_set(error, ENOTSUP,
993                                           RTE_FLOW_ERROR_TYPE_ITEM,
994                                           item,
995                                           "multiple L3 layers not supported");
996         else if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
997                                  MLX5_FLOW_LAYER_OUTER_L4))
998                 return rte_flow_error_set(error, ENOTSUP,
999                                           RTE_FLOW_ERROR_TYPE_ITEM,
1000                                           item,
1001                                           "L3 cannot follow an L4 layer.");
1002         /*
1003          * IPv6 is not recognised by the NIC inside a GRE tunnel.
1004          * Such support has to be disabled as the rule will be
1005          * accepted.  Issue reproduced with Mellanox OFED 4.3-3.0.2.1 and
1006          * Mellanox OFED 4.4-1.0.0.0.
1007          */
1008         if (tunnel && flow->layers & MLX5_FLOW_LAYER_GRE)
1009                 return rte_flow_error_set(error, ENOTSUP,
1010                                           RTE_FLOW_ERROR_TYPE_ITEM,
1011                                           item,
1012                                           "IPv6 inside a GRE tunnel is"
1013                                           " not recognised.");
1014         if (!mask)
1015                 mask = &rte_flow_item_ipv6_mask;
1016         ret = mlx5_flow_item_acceptable
1017                 (item, (const uint8_t *)mask,
1018                  (const uint8_t *)&nic_mask,
1019                  sizeof(struct rte_flow_item_ipv6), error);
1020         if (ret < 0)
1021                 return ret;
1022         flow->layers |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
1023                 MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1024         if (spec) {
1025                 unsigned int i;
1026                 uint32_t vtc_flow_val;
1027                 uint32_t vtc_flow_mask;
1028
1029                 memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
1030                        RTE_DIM(ipv6.val.src_ip));
1031                 memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
1032                        RTE_DIM(ipv6.val.dst_ip));
1033                 memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
1034                        RTE_DIM(ipv6.mask.src_ip));
1035                 memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
1036                        RTE_DIM(ipv6.mask.dst_ip));
1037                 vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
1038                 vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
1039                 ipv6.val.flow_label =
1040                         rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >>
1041                                          IPV6_HDR_FL_SHIFT);
1042                 ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >>
1043                                          IPV6_HDR_TC_SHIFT;
1044                 ipv6.val.next_hdr = spec->hdr.proto;
1045                 ipv6.val.hop_limit = spec->hdr.hop_limits;
1046                 ipv6.mask.flow_label =
1047                         rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >>
1048                                          IPV6_HDR_FL_SHIFT);
1049                 ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >>
1050                                           IPV6_HDR_TC_SHIFT;
1051                 ipv6.mask.next_hdr = mask->hdr.proto;
1052                 ipv6.mask.hop_limit = mask->hdr.hop_limits;
1053                 /* Remove unwanted bits from values. */
1054                 for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
1055                         ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
1056                         ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
1057                 }
1058                 ipv6.val.flow_label &= ipv6.mask.flow_label;
1059                 ipv6.val.traffic_class &= ipv6.mask.traffic_class;
1060                 ipv6.val.next_hdr &= ipv6.mask.next_hdr;
1061                 ipv6.val.hop_limit &= ipv6.mask.hop_limit;
1062         }
1063         flow->l3_protocol_en = !!ipv6.mask.next_hdr;
1064         flow->l3_protocol = ipv6.val.next_hdr;
1065         if (size <= flow_size) {
1066                 mlx5_flow_verbs_hashfields_adjust
1067                         (flow, tunnel,
1068                          (ETH_RSS_IPV6 | ETH_RSS_NONFRAG_IPV6_OTHER),
1069                          (IBV_RX_HASH_SRC_IPV6 | IBV_RX_HASH_DST_IPV6));
1070                 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L3;
1071                 mlx5_flow_spec_verbs_add(flow, &ipv6, size);
1072         }
1073         return size;
1074 }
1075
1076 /**
1077  * Convert the @p item into a Verbs specification after ensuring the NIC
1078  * will understand and process it correctly.
1079  * If the necessary size for the conversion is greater than the @p flow_size,
1080  * nothing is written in @p flow, the validation is still performed.
1081  *
1082  * @param[in] item
1083  *   Item specification.
1084  * @param[in, out] flow
1085  *   Pointer to flow structure.
1086  * @param[in] flow_size
1087  *   Size in bytes of the available space in @p flow, if too small, nothing is
1088  *   written.
1089  * @param[out] error
1090  *   Pointer to error structure.
1091  *
1092  * @return
1093  *   On success the number of bytes consumed/necessary, if the returned value
1094  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
1095  *   otherwise another call with this returned memory size should be done.
1096  *   On error, a negative errno value is returned and rte_errno is set.
1097  */
1098 static int
1099 mlx5_flow_item_udp(const struct rte_flow_item *item, struct rte_flow *flow,
1100                    const size_t flow_size, struct rte_flow_error *error)
1101 {
1102         const struct rte_flow_item_udp *spec = item->spec;
1103         const struct rte_flow_item_udp *mask = item->mask;
1104         const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
1105         unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
1106         struct ibv_flow_spec_tcp_udp udp = {
1107                 .type = IBV_FLOW_SPEC_UDP | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
1108                 .size = size,
1109         };
1110         int ret;
1111
1112         if (flow->l3_protocol_en && flow->l3_protocol != MLX5_IP_PROTOCOL_UDP)
1113                 return rte_flow_error_set(error, ENOTSUP,
1114                                           RTE_FLOW_ERROR_TYPE_ITEM,
1115                                           item,
1116                                           "protocol filtering not compatible"
1117                                           " with UDP layer");
1118         if (!(flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
1119                               MLX5_FLOW_LAYER_OUTER_L3)))
1120                 return rte_flow_error_set(error, ENOTSUP,
1121                                           RTE_FLOW_ERROR_TYPE_ITEM,
1122                                           item,
1123                                           "L3 is mandatory to filter"
1124                                           " on L4");
1125         if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
1126                             MLX5_FLOW_LAYER_OUTER_L4))
1127                 return rte_flow_error_set(error, ENOTSUP,
1128                                           RTE_FLOW_ERROR_TYPE_ITEM,
1129                                           item,
1130                                           "L4 layer is already"
1131                                           " present");
1132         if (!mask)
1133                 mask = &rte_flow_item_udp_mask;
1134         ret = mlx5_flow_item_acceptable
1135                 (item, (const uint8_t *)mask,
1136                  (const uint8_t *)&rte_flow_item_udp_mask,
1137                  sizeof(struct rte_flow_item_udp), error);
1138         if (ret < 0)
1139                 return ret;
1140         flow->layers |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP :
1141                 MLX5_FLOW_LAYER_OUTER_L4_UDP;
1142         if (spec) {
1143                 udp.val.dst_port = spec->hdr.dst_port;
1144                 udp.val.src_port = spec->hdr.src_port;
1145                 udp.mask.dst_port = mask->hdr.dst_port;
1146                 udp.mask.src_port = mask->hdr.src_port;
1147                 /* Remove unwanted bits from values. */
1148                 udp.val.src_port &= udp.mask.src_port;
1149                 udp.val.dst_port &= udp.mask.dst_port;
1150         }
1151         if (size <= flow_size) {
1152                 mlx5_flow_verbs_hashfields_adjust(flow, tunnel, ETH_RSS_UDP,
1153                                                   (IBV_RX_HASH_SRC_PORT_UDP |
1154                                                    IBV_RX_HASH_DST_PORT_UDP));
1155                 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L4;
1156                 mlx5_flow_spec_verbs_add(flow, &udp, size);
1157         }
1158         return size;
1159 }
1160
1161 /**
1162  * Convert the @p item into a Verbs specification after ensuring the NIC
1163  * will understand and process it correctly.
1164  * If the necessary size for the conversion is greater than the @p flow_size,
1165  * nothing is written in @p flow, the validation is still performed.
1166  *
1167  * @param[in] item
1168  *   Item specification.
1169  * @param[in, out] flow
1170  *   Pointer to flow structure.
1171  * @param[in] flow_size
1172  *   Size in bytes of the available space in @p flow, if too small, nothing is
1173  *   written.
1174  * @param[out] error
1175  *   Pointer to error structure.
1176  *
1177  * @return
1178  *   On success the number of bytes consumed/necessary, if the returned value
1179  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
1180  *   otherwise another call with this returned memory size should be done.
1181  *   On error, a negative errno value is returned and rte_errno is set.
1182  */
1183 static int
1184 mlx5_flow_item_tcp(const struct rte_flow_item *item, struct rte_flow *flow,
1185                    const size_t flow_size, struct rte_flow_error *error)
1186 {
1187         const struct rte_flow_item_tcp *spec = item->spec;
1188         const struct rte_flow_item_tcp *mask = item->mask;
1189         const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
1190         unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
1191         struct ibv_flow_spec_tcp_udp tcp = {
1192                 .type = IBV_FLOW_SPEC_TCP | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
1193                 .size = size,
1194         };
1195         int ret;
1196
1197         if (flow->l3_protocol_en && flow->l3_protocol != MLX5_IP_PROTOCOL_TCP)
1198                 return rte_flow_error_set(error, ENOTSUP,
1199                                           RTE_FLOW_ERROR_TYPE_ITEM,
1200                                           item,
1201                                           "protocol filtering not compatible"
1202                                           " with TCP layer");
1203         if (!(flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
1204                               MLX5_FLOW_LAYER_OUTER_L3)))
1205                 return rte_flow_error_set(error, ENOTSUP,
1206                                           RTE_FLOW_ERROR_TYPE_ITEM,
1207                                           item,
1208                                           "L3 is mandatory to filter on L4");
1209         if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
1210                             MLX5_FLOW_LAYER_OUTER_L4))
1211                 return rte_flow_error_set(error, ENOTSUP,
1212                                           RTE_FLOW_ERROR_TYPE_ITEM,
1213                                           item,
1214                                           "L4 layer is already present");
1215         if (!mask)
1216                 mask = &rte_flow_item_tcp_mask;
1217         ret = mlx5_flow_item_acceptable
1218                 (item, (const uint8_t *)mask,
1219                  (const uint8_t *)&rte_flow_item_tcp_mask,
1220                  sizeof(struct rte_flow_item_tcp), error);
1221         if (ret < 0)
1222                 return ret;
1223         flow->layers |=  tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP :
1224                 MLX5_FLOW_LAYER_OUTER_L4_TCP;
1225         if (spec) {
1226                 tcp.val.dst_port = spec->hdr.dst_port;
1227                 tcp.val.src_port = spec->hdr.src_port;
1228                 tcp.mask.dst_port = mask->hdr.dst_port;
1229                 tcp.mask.src_port = mask->hdr.src_port;
1230                 /* Remove unwanted bits from values. */
1231                 tcp.val.src_port &= tcp.mask.src_port;
1232                 tcp.val.dst_port &= tcp.mask.dst_port;
1233         }
1234         if (size <= flow_size) {
1235                 mlx5_flow_verbs_hashfields_adjust(flow, tunnel, ETH_RSS_TCP,
1236                                                   (IBV_RX_HASH_SRC_PORT_TCP |
1237                                                    IBV_RX_HASH_DST_PORT_TCP));
1238                 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L4;
1239                 mlx5_flow_spec_verbs_add(flow, &tcp, size);
1240         }
1241         return size;
1242 }
1243
1244 /**
1245  * Convert the @p item into a Verbs specification after ensuring the NIC
1246  * will understand and process it correctly.
1247  * If the necessary size for the conversion is greater than the @p flow_size,
1248  * nothing is written in @p flow, the validation is still performed.
1249  *
1250  * @param[in] item
1251  *   Item specification.
1252  * @param[in, out] flow
1253  *   Pointer to flow structure.
1254  * @param[in] flow_size
1255  *   Size in bytes of the available space in @p flow, if too small, nothing is
1256  *   written.
1257  * @param[out] error
1258  *   Pointer to error structure.
1259  *
1260  * @return
1261  *   On success the number of bytes consumed/necessary, if the returned value
1262  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
1263  *   otherwise another call with this returned memory size should be done.
1264  *   On error, a negative errno value is returned and rte_errno is set.
1265  */
1266 static int
1267 mlx5_flow_item_vxlan(const struct rte_flow_item *item, struct rte_flow *flow,
1268                      const size_t flow_size, struct rte_flow_error *error)
1269 {
1270         const struct rte_flow_item_vxlan *spec = item->spec;
1271         const struct rte_flow_item_vxlan *mask = item->mask;
1272         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1273         struct ibv_flow_spec_tunnel vxlan = {
1274                 .type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
1275                 .size = size,
1276         };
1277         int ret;
1278         union vni {
1279                 uint32_t vlan_id;
1280                 uint8_t vni[4];
1281         } id = { .vlan_id = 0, };
1282
1283         if (flow->layers & MLX5_FLOW_LAYER_TUNNEL)
1284                 return rte_flow_error_set(error, ENOTSUP,
1285                                           RTE_FLOW_ERROR_TYPE_ITEM,
1286                                           item,
1287                                           "a tunnel is already present");
1288         /*
1289          * Verify only UDPv4 is present as defined in
1290          * https://tools.ietf.org/html/rfc7348
1291          */
1292         if (!(flow->layers & MLX5_FLOW_LAYER_OUTER_L4_UDP))
1293                 return rte_flow_error_set(error, ENOTSUP,
1294                                           RTE_FLOW_ERROR_TYPE_ITEM,
1295                                           item,
1296                                           "no outer UDP layer found");
1297         if (!mask)
1298                 mask = &rte_flow_item_vxlan_mask;
1299         ret = mlx5_flow_item_acceptable
1300                 (item, (const uint8_t *)mask,
1301                  (const uint8_t *)&rte_flow_item_vxlan_mask,
1302                  sizeof(struct rte_flow_item_vxlan), error);
1303         if (ret < 0)
1304                 return ret;
1305         if (spec) {
1306                 memcpy(&id.vni[1], spec->vni, 3);
1307                 vxlan.val.tunnel_id = id.vlan_id;
1308                 memcpy(&id.vni[1], mask->vni, 3);
1309                 vxlan.mask.tunnel_id = id.vlan_id;
1310                 /* Remove unwanted bits from values. */
1311                 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
1312         }
1313         /*
1314          * Tunnel id 0 is equivalent as not adding a VXLAN layer, if
1315          * only this layer is defined in the Verbs specification it is
1316          * interpreted as wildcard and all packets will match this
1317          * rule, if it follows a full stack layer (ex: eth / ipv4 /
1318          * udp), all packets matching the layers before will also
1319          * match this rule.  To avoid such situation, VNI 0 is
1320          * currently refused.
1321          */
1322         if (!vxlan.val.tunnel_id)
1323                 return rte_flow_error_set(error, EINVAL,
1324                                           RTE_FLOW_ERROR_TYPE_ITEM,
1325                                           item,
1326                                           "VXLAN vni cannot be 0");
1327         if (!(flow->layers & MLX5_FLOW_LAYER_OUTER))
1328                 return rte_flow_error_set(error, EINVAL,
1329                                           RTE_FLOW_ERROR_TYPE_ITEM,
1330                                           item,
1331                                           "VXLAN tunnel must be fully defined");
1332         if (size <= flow_size) {
1333                 mlx5_flow_spec_verbs_add(flow, &vxlan, size);
1334                 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2;
1335         }
1336         flow->layers |= MLX5_FLOW_LAYER_VXLAN;
1337         return size;
1338 }
1339
1340 /**
1341  * Convert the @p item into a Verbs specification after ensuring the NIC
1342  * will understand and process it correctly.
1343  * If the necessary size for the conversion is greater than the @p flow_size,
1344  * nothing is written in @p flow, the validation is still performed.
1345  *
1346  * @param dev
1347  *   Pointer to Ethernet device.
1348  * @param[in] item
1349  *   Item specification.
1350  * @param[in, out] flow
1351  *   Pointer to flow structure.
1352  * @param[in] flow_size
1353  *   Size in bytes of the available space in @p flow, if too small, nothing is
1354  *   written.
1355  * @param[out] error
1356  *   Pointer to error structure.
1357  *
1358  * @return
1359  *   On success the number of bytes consumed/necessary, if the returned value
1360  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
1361  *   otherwise another call with this returned memory size should be done.
1362  *   On error, a negative errno value is returned and rte_errno is set.
1363  */
1364 static int
1365 mlx5_flow_item_vxlan_gpe(struct rte_eth_dev *dev,
1366                          const struct rte_flow_item *item,
1367                          struct rte_flow *flow, const size_t flow_size,
1368                          struct rte_flow_error *error)
1369 {
1370         const struct rte_flow_item_vxlan_gpe *spec = item->spec;
1371         const struct rte_flow_item_vxlan_gpe *mask = item->mask;
1372         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1373         struct ibv_flow_spec_tunnel vxlan_gpe = {
1374                 .type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
1375                 .size = size,
1376         };
1377         int ret;
1378         union vni {
1379                 uint32_t vlan_id;
1380                 uint8_t vni[4];
1381         } id = { .vlan_id = 0, };
1382
1383         if (!((struct priv *)dev->data->dev_private)->config.l3_vxlan_en)
1384                 return rte_flow_error_set(error, ENOTSUP,
1385                                           RTE_FLOW_ERROR_TYPE_ITEM,
1386                                           item,
1387                                           "L3 VXLAN is not enabled by device"
1388                                           " parameter and/or not configured in"
1389                                           " firmware");
1390         if (flow->layers & MLX5_FLOW_LAYER_TUNNEL)
1391                 return rte_flow_error_set(error, ENOTSUP,
1392                                           RTE_FLOW_ERROR_TYPE_ITEM,
1393                                           item,
1394                                           "a tunnel is already present");
1395         /*
1396          * Verify only UDPv4 is present as defined in
1397          * https://tools.ietf.org/html/rfc7348
1398          */
1399         if (!(flow->layers & MLX5_FLOW_LAYER_OUTER_L4_UDP))
1400                 return rte_flow_error_set(error, ENOTSUP,
1401                                           RTE_FLOW_ERROR_TYPE_ITEM,
1402                                           item,
1403                                           "no outer UDP layer found");
1404         if (!mask)
1405                 mask = &rte_flow_item_vxlan_gpe_mask;
1406         ret = mlx5_flow_item_acceptable
1407                 (item, (const uint8_t *)mask,
1408                  (const uint8_t *)&rte_flow_item_vxlan_gpe_mask,
1409                  sizeof(struct rte_flow_item_vxlan_gpe), error);
1410         if (ret < 0)
1411                 return ret;
1412         if (spec) {
1413                 memcpy(&id.vni[1], spec->vni, 3);
1414                 vxlan_gpe.val.tunnel_id = id.vlan_id;
1415                 memcpy(&id.vni[1], mask->vni, 3);
1416                 vxlan_gpe.mask.tunnel_id = id.vlan_id;
1417                 if (spec->protocol)
1418                         return rte_flow_error_set
1419                                 (error, EINVAL,
1420                                  RTE_FLOW_ERROR_TYPE_ITEM,
1421                                  item,
1422                                  "VxLAN-GPE protocol not supported");
1423                 /* Remove unwanted bits from values. */
1424                 vxlan_gpe.val.tunnel_id &= vxlan_gpe.mask.tunnel_id;
1425         }
1426         /*
1427          * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
1428          * layer is defined in the Verbs specification it is interpreted as
1429          * wildcard and all packets will match this rule, if it follows a full
1430          * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
1431          * before will also match this rule.  To avoid such situation, VNI 0
1432          * is currently refused.
1433          */
1434         if (!vxlan_gpe.val.tunnel_id)
1435                 return rte_flow_error_set(error, EINVAL,
1436                                           RTE_FLOW_ERROR_TYPE_ITEM,
1437                                           item,
1438                                           "VXLAN-GPE vni cannot be 0");
1439         if (!(flow->layers & MLX5_FLOW_LAYER_OUTER))
1440                 return rte_flow_error_set(error, EINVAL,
1441                                           RTE_FLOW_ERROR_TYPE_ITEM,
1442                                           item,
1443                                           "VXLAN-GPE tunnel must be fully"
1444                                           " defined");
1445         if (size <= flow_size) {
1446                 mlx5_flow_spec_verbs_add(flow, &vxlan_gpe, size);
1447                 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2;
1448         }
1449         flow->layers |= MLX5_FLOW_LAYER_VXLAN_GPE;
1450         return size;
1451 }
1452
1453 /**
1454  * Update the protocol in Verbs IPv4/IPv6 spec.
1455  *
1456  * @param[in, out] attr
1457  *   Pointer to Verbs attributes structure.
1458  * @param[in] search
1459  *   Specification type to search in order to update the IP protocol.
1460  * @param[in] protocol
1461  *   Protocol value to set if none is present in the specification.
1462  */
1463 static void
1464 mlx5_flow_item_gre_ip_protocol_update(struct ibv_flow_attr *attr,
1465                                       enum ibv_flow_spec_type search,
1466                                       uint8_t protocol)
1467 {
1468         unsigned int i;
1469         struct ibv_spec_header *hdr = (struct ibv_spec_header *)
1470                 ((uint8_t *)attr + sizeof(struct ibv_flow_attr));
1471
1472         if (!attr)
1473                 return;
1474         for (i = 0; i != attr->num_of_specs; ++i) {
1475                 if (hdr->type == search) {
1476                         union {
1477                                 struct ibv_flow_spec_ipv4_ext *ipv4;
1478                                 struct ibv_flow_spec_ipv6 *ipv6;
1479                         } ip;
1480
1481                         switch (search) {
1482                         case IBV_FLOW_SPEC_IPV4_EXT:
1483                                 ip.ipv4 = (struct ibv_flow_spec_ipv4_ext *)hdr;
1484                                 if (!ip.ipv4->val.proto) {
1485                                         ip.ipv4->val.proto = protocol;
1486                                         ip.ipv4->mask.proto = 0xff;
1487                                 }
1488                                 break;
1489                         case IBV_FLOW_SPEC_IPV6:
1490                                 ip.ipv6 = (struct ibv_flow_spec_ipv6 *)hdr;
1491                                 if (!ip.ipv6->val.next_hdr) {
1492                                         ip.ipv6->val.next_hdr = protocol;
1493                                         ip.ipv6->mask.next_hdr = 0xff;
1494                                 }
1495                                 break;
1496                         default:
1497                                 break;
1498                         }
1499                         break;
1500                 }
1501                 hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size);
1502         }
1503 }
1504
1505 /**
1506  * Convert the @p item into a Verbs specification after ensuring the NIC
1507  * will understand and process it correctly.
1508  * It will also update the previous L3 layer with the protocol value matching
1509  * the GRE.
1510  * If the necessary size for the conversion is greater than the @p flow_size,
1511  * nothing is written in @p flow, the validation is still performed.
1512  *
1513  * @param dev
1514  *   Pointer to Ethernet device.
1515  * @param[in] item
1516  *   Item specification.
1517  * @param[in, out] flow
1518  *   Pointer to flow structure.
1519  * @param[in] flow_size
1520  *   Size in bytes of the available space in @p flow, if too small, nothing is
1521  *   written.
1522  * @param[out] error
1523  *   Pointer to error structure.
1524  *
1525  * @return
1526  *   On success the number of bytes consumed/necessary, if the returned value
1527  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
1528  *   otherwise another call with this returned memory size should be done.
1529  *   On error, a negative errno value is returned and rte_errno is set.
1530  */
1531 static int
1532 mlx5_flow_item_gre(const struct rte_flow_item *item,
1533                    struct rte_flow *flow, const size_t flow_size,
1534                    struct rte_flow_error *error)
1535 {
1536         struct mlx5_flow_verbs *verbs = flow->cur_verbs;
1537         const struct rte_flow_item_gre *spec = item->spec;
1538         const struct rte_flow_item_gre *mask = item->mask;
1539 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
1540         unsigned int size = sizeof(struct ibv_flow_spec_gre);
1541         struct ibv_flow_spec_gre tunnel = {
1542                 .type = IBV_FLOW_SPEC_GRE,
1543                 .size = size,
1544         };
1545 #else
1546         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1547         struct ibv_flow_spec_tunnel tunnel = {
1548                 .type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
1549                 .size = size,
1550         };
1551 #endif
1552         int ret;
1553
1554         if (flow->l3_protocol_en && flow->l3_protocol != MLX5_IP_PROTOCOL_GRE)
1555                 return rte_flow_error_set(error, ENOTSUP,
1556                                           RTE_FLOW_ERROR_TYPE_ITEM,
1557                                           item,
1558                                           "protocol filtering not compatible"
1559                                           " with this GRE layer");
1560         if (flow->layers & MLX5_FLOW_LAYER_TUNNEL)
1561                 return rte_flow_error_set(error, ENOTSUP,
1562                                           RTE_FLOW_ERROR_TYPE_ITEM,
1563                                           item,
1564                                           "a tunnel is already present");
1565         if (!(flow->layers & MLX5_FLOW_LAYER_OUTER_L3))
1566                 return rte_flow_error_set(error, ENOTSUP,
1567                                           RTE_FLOW_ERROR_TYPE_ITEM,
1568                                           item,
1569                                           "L3 Layer is missing");
1570         if (!mask)
1571                 mask = &rte_flow_item_gre_mask;
1572         ret = mlx5_flow_item_acceptable
1573                 (item, (const uint8_t *)mask,
1574                  (const uint8_t *)&rte_flow_item_gre_mask,
1575                  sizeof(struct rte_flow_item_gre), error);
1576         if (ret < 0)
1577                 return ret;
1578 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
1579         if (spec) {
1580                 tunnel.val.c_ks_res0_ver = spec->c_rsvd0_ver;
1581                 tunnel.val.protocol = spec->protocol;
1582                 tunnel.mask.c_ks_res0_ver = mask->c_rsvd0_ver;
1583                 tunnel.mask.protocol = mask->protocol;
1584                 /* Remove unwanted bits from values. */
1585                 tunnel.val.c_ks_res0_ver &= tunnel.mask.c_ks_res0_ver;
1586                 tunnel.val.protocol &= tunnel.mask.protocol;
1587                 tunnel.val.key &= tunnel.mask.key;
1588         }
1589 #else
1590         if (spec && (spec->protocol & mask->protocol))
1591                 return rte_flow_error_set(error, ENOTSUP,
1592                                           RTE_FLOW_ERROR_TYPE_ITEM,
1593                                           item,
1594                                           "without MPLS support the"
1595                                           " specification cannot be used for"
1596                                           " filtering");
1597 #endif /* !HAVE_IBV_DEVICE_MPLS_SUPPORT */
1598         if (size <= flow_size) {
1599                 if (flow->layers & MLX5_FLOW_LAYER_OUTER_L3_IPV4)
1600                         mlx5_flow_item_gre_ip_protocol_update
1601                                 (verbs->attr, IBV_FLOW_SPEC_IPV4_EXT,
1602                                  MLX5_IP_PROTOCOL_GRE);
1603                 else
1604                         mlx5_flow_item_gre_ip_protocol_update
1605                                 (verbs->attr, IBV_FLOW_SPEC_IPV6,
1606                                  MLX5_IP_PROTOCOL_GRE);
1607                 mlx5_flow_spec_verbs_add(flow, &tunnel, size);
1608                 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2;
1609         }
1610         flow->layers |= MLX5_FLOW_LAYER_GRE;
1611         return size;
1612 }
1613
1614 /**
1615  * Convert the @p item into a Verbs specification after ensuring the NIC
1616  * will understand and process it correctly.
1617  * If the necessary size for the conversion is greater than the @p flow_size,
1618  * nothing is written in @p flow, the validation is still performed.
1619  *
1620  * @param[in] item
1621  *   Item specification.
1622  * @param[in, out] flow
1623  *   Pointer to flow structure.
1624  * @param[in] flow_size
1625  *   Size in bytes of the available space in @p flow, if too small, nothing is
1626  *   written.
1627  * @param[out] error
1628  *   Pointer to error structure.
1629  *
1630  * @return
1631  *   On success the number of bytes consumed/necessary, if the returned value
1632  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
1633  *   otherwise another call with this returned memory size should be done.
1634  *   On error, a negative errno value is returned and rte_errno is set.
1635  */
1636 static int
1637 mlx5_flow_item_mpls(const struct rte_flow_item *item __rte_unused,
1638                     struct rte_flow *flow __rte_unused,
1639                     const size_t flow_size __rte_unused,
1640                     struct rte_flow_error *error)
1641 {
1642 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
1643         const struct rte_flow_item_mpls *spec = item->spec;
1644         const struct rte_flow_item_mpls *mask = item->mask;
1645         unsigned int size = sizeof(struct ibv_flow_spec_mpls);
1646         struct ibv_flow_spec_mpls mpls = {
1647                 .type = IBV_FLOW_SPEC_MPLS,
1648                 .size = size,
1649         };
1650         int ret;
1651
1652         if (flow->l3_protocol_en && flow->l3_protocol != MLX5_IP_PROTOCOL_MPLS)
1653                 return rte_flow_error_set(error, ENOTSUP,
1654                                           RTE_FLOW_ERROR_TYPE_ITEM,
1655                                           item,
1656                                           "protocol filtering not compatible"
1657                                           " with MPLS layer");
1658         if (flow->layers & MLX5_FLOW_LAYER_TUNNEL)
1659                 return rte_flow_error_set(error, ENOTSUP,
1660                                           RTE_FLOW_ERROR_TYPE_ITEM,
1661                                           item,
1662                                           "a tunnel is already"
1663                                           " present");
1664         if (!mask)
1665                 mask = &rte_flow_item_mpls_mask;
1666         ret = mlx5_flow_item_acceptable
1667                 (item, (const uint8_t *)mask,
1668                  (const uint8_t *)&rte_flow_item_mpls_mask,
1669                  sizeof(struct rte_flow_item_mpls), error);
1670         if (ret < 0)
1671                 return ret;
1672         if (spec) {
1673                 memcpy(&mpls.val.label, spec, sizeof(mpls.val.label));
1674                 memcpy(&mpls.mask.label, mask, sizeof(mpls.mask.label));
1675                 /* Remove unwanted bits from values.  */
1676                 mpls.val.label &= mpls.mask.label;
1677         }
1678         if (size <= flow_size) {
1679                 mlx5_flow_spec_verbs_add(flow, &mpls, size);
1680                 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2;
1681         }
1682         flow->layers |= MLX5_FLOW_LAYER_MPLS;
1683         return size;
1684 #endif /* !HAVE_IBV_DEVICE_MPLS_SUPPORT */
1685         return rte_flow_error_set(error, ENOTSUP,
1686                                   RTE_FLOW_ERROR_TYPE_ITEM,
1687                                   item,
1688                                   "MPLS is not supported by Verbs, please"
1689                                   " update.");
1690 }
1691
1692 /**
1693  * Convert the @p pattern into a Verbs specifications after ensuring the NIC
1694  * will understand and process it correctly.
1695  * The conversion is performed item per item, each of them is written into
1696  * the @p flow if its size is lesser or equal to @p flow_size.
1697  * Validation and memory consumption computation are still performed until the
1698  * end of @p pattern, unless an error is encountered.
1699  *
1700  * @param[in] pattern
1701  *   Flow pattern.
1702  * @param[in, out] flow
1703  *   Pointer to the rte_flow structure.
1704  * @param[in] flow_size
1705  *   Size in bytes of the available space in @p flow, if too small some
1706  *   garbage may be present.
1707  * @param[out] error
1708  *   Pointer to error structure.
1709  *
1710  * @return
1711  *   On success the number of bytes consumed/necessary, if the returned value
1712  *   is lesser or equal to @p flow_size, the @pattern  has fully been
1713  *   converted, otherwise another call with this returned memory size should
1714  *   be done.
1715  *   On error, a negative errno value is returned and rte_errno is set.
1716  */
1717 static int
1718 mlx5_flow_items(struct rte_eth_dev *dev,
1719                 const struct rte_flow_item pattern[],
1720                 struct rte_flow *flow, const size_t flow_size,
1721                 struct rte_flow_error *error)
1722 {
1723         int remain = flow_size;
1724         size_t size = 0;
1725
1726         for (; pattern->type != RTE_FLOW_ITEM_TYPE_END; pattern++) {
1727                 int ret = 0;
1728
1729                 switch (pattern->type) {
1730                 case RTE_FLOW_ITEM_TYPE_VOID:
1731                         break;
1732                 case RTE_FLOW_ITEM_TYPE_ETH:
1733                         ret = mlx5_flow_item_eth(pattern, flow, remain, error);
1734                         break;
1735                 case RTE_FLOW_ITEM_TYPE_VLAN:
1736                         ret = mlx5_flow_item_vlan(pattern, flow, remain, error);
1737                         break;
1738                 case RTE_FLOW_ITEM_TYPE_IPV4:
1739                         ret = mlx5_flow_item_ipv4(pattern, flow, remain, error);
1740                         break;
1741                 case RTE_FLOW_ITEM_TYPE_IPV6:
1742                         ret = mlx5_flow_item_ipv6(pattern, flow, remain, error);
1743                         break;
1744                 case RTE_FLOW_ITEM_TYPE_UDP:
1745                         ret = mlx5_flow_item_udp(pattern, flow, remain, error);
1746                         break;
1747                 case RTE_FLOW_ITEM_TYPE_TCP:
1748                         ret = mlx5_flow_item_tcp(pattern, flow, remain, error);
1749                         break;
1750                 case RTE_FLOW_ITEM_TYPE_VXLAN:
1751                         ret = mlx5_flow_item_vxlan(pattern, flow, remain,
1752                                                    error);
1753                         break;
1754                 case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
1755                         ret = mlx5_flow_item_vxlan_gpe(dev, pattern, flow,
1756                                                        remain, error);
1757                         break;
1758                 case RTE_FLOW_ITEM_TYPE_GRE:
1759                         ret = mlx5_flow_item_gre(pattern, flow, remain, error);
1760                         break;
1761                 case RTE_FLOW_ITEM_TYPE_MPLS:
1762                         ret = mlx5_flow_item_mpls(pattern, flow, remain, error);
1763                         break;
1764                 default:
1765                         return rte_flow_error_set(error, ENOTSUP,
1766                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1767                                                   pattern,
1768                                                   "item not supported");
1769                 }
1770                 if (ret < 0)
1771                         return ret;
1772                 if (remain > ret)
1773                         remain -= ret;
1774                 else
1775                         remain = 0;
1776                 size += ret;
1777         }
1778         if (!flow->layers) {
1779                 const struct rte_flow_item item = {
1780                         .type = RTE_FLOW_ITEM_TYPE_ETH,
1781                 };
1782
1783                 return mlx5_flow_item_eth(&item, flow, flow_size, error);
1784         }
1785         return size;
1786 }
1787
1788 /**
1789  * Convert the @p action into a Verbs specification after ensuring the NIC
1790  * will understand and process it correctly.
1791  * If the necessary size for the conversion is greater than the @p flow_size,
1792  * nothing is written in @p flow, the validation is still performed.
1793  *
1794  * @param[in] action
1795  *   Action configuration.
1796  * @param[in, out] flow
1797  *   Pointer to flow structure.
1798  * @param[in] flow_size
1799  *   Size in bytes of the available space in @p flow, if too small, nothing is
1800  *   written.
1801  * @param[out] error
1802  *   Pointer to error structure.
1803  *
1804  * @return
1805  *   On success the number of bytes consumed/necessary, if the returned value
1806  *   is lesser or equal to @p flow_size, the @p action has fully been
1807  *   converted, otherwise another call with this returned memory size should
1808  *   be done.
1809  *   On error, a negative errno value is returned and rte_errno is set.
1810  */
1811 static int
1812 mlx5_flow_action_drop(const struct rte_flow_action *action,
1813                       struct rte_flow *flow, const size_t flow_size,
1814                       struct rte_flow_error *error)
1815 {
1816         unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
1817         struct ibv_flow_spec_action_drop drop = {
1818                         .type = IBV_FLOW_SPEC_ACTION_DROP,
1819                         .size = size,
1820         };
1821
1822         if (flow->fate)
1823                 return rte_flow_error_set(error, ENOTSUP,
1824                                           RTE_FLOW_ERROR_TYPE_ACTION,
1825                                           action,
1826                                           "multiple fate actions are not"
1827                                           " supported");
1828         if (flow->modifier & (MLX5_FLOW_MOD_FLAG | MLX5_FLOW_MOD_MARK))
1829                 return rte_flow_error_set(error, ENOTSUP,
1830                                           RTE_FLOW_ERROR_TYPE_ACTION,
1831                                           action,
1832                                           "drop is not compatible with"
1833                                           " flag/mark action");
1834         if (size < flow_size)
1835                 mlx5_flow_spec_verbs_add(flow, &drop, size);
1836         flow->fate |= MLX5_FLOW_FATE_DROP;
1837         return size;
1838 }
1839
1840 /**
1841  * Convert the @p action into @p flow after ensuring the NIC will understand
1842  * and process it correctly.
1843  *
1844  * @param[in] dev
1845  *   Pointer to Ethernet device structure.
1846  * @param[in] action
1847  *   Action configuration.
1848  * @param[in, out] flow
1849  *   Pointer to flow structure.
1850  * @param[out] error
1851  *   Pointer to error structure.
1852  *
1853  * @return
1854  *   0 on success, a negative errno value otherwise and rte_errno is set.
1855  */
1856 static int
1857 mlx5_flow_action_queue(struct rte_eth_dev *dev,
1858                        const struct rte_flow_action *action,
1859                        struct rte_flow *flow,
1860                        struct rte_flow_error *error)
1861 {
1862         struct priv *priv = dev->data->dev_private;
1863         const struct rte_flow_action_queue *queue = action->conf;
1864
1865         if (flow->fate)
1866                 return rte_flow_error_set(error, ENOTSUP,
1867                                           RTE_FLOW_ERROR_TYPE_ACTION,
1868                                           action,
1869                                           "multiple fate actions are not"
1870                                           " supported");
1871         if (queue->index >= priv->rxqs_n)
1872                 return rte_flow_error_set(error, EINVAL,
1873                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1874                                           &queue->index,
1875                                           "queue index out of range");
1876         if (!(*priv->rxqs)[queue->index])
1877                 return rte_flow_error_set(error, EINVAL,
1878                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1879                                           &queue->index,
1880                                           "queue is not configured");
1881         if (flow->queue)
1882                 (*flow->queue)[0] = queue->index;
1883         flow->rss.queue_num = 1;
1884         flow->fate |= MLX5_FLOW_FATE_QUEUE;
1885         return 0;
1886 }
1887
1888 /**
1889  * Ensure the @p action will be understood and used correctly by the  NIC.
1890  *
1891  * @param dev
1892  *   Pointer to Ethernet device structure.
1893  * @param action[in]
1894  *   Pointer to flow actions array.
1895  * @param flow[in, out]
1896  *   Pointer to the rte_flow structure.
1897  * @param error[in, out]
1898  *   Pointer to error structure.
1899  *
1900  * @return
1901  *   On success @p flow->queue array and @p flow->rss are filled and valid.
1902  *   On error, a negative errno value is returned and rte_errno is set.
1903  */
1904 static int
1905 mlx5_flow_action_rss(struct rte_eth_dev *dev,
1906                      const struct rte_flow_action *action,
1907                      struct rte_flow *flow,
1908                      struct rte_flow_error *error)
1909 {
1910         struct priv *priv = dev->data->dev_private;
1911         const struct rte_flow_action_rss *rss = action->conf;
1912         unsigned int i;
1913
1914         if (flow->fate)
1915                 return rte_flow_error_set(error, ENOTSUP,
1916                                           RTE_FLOW_ERROR_TYPE_ACTION,
1917                                           action,
1918                                           "multiple fate actions are not"
1919                                           " supported");
1920         if (rss->func != RTE_ETH_HASH_FUNCTION_DEFAULT &&
1921             rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ)
1922                 return rte_flow_error_set(error, ENOTSUP,
1923                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1924                                           &rss->func,
1925                                           "RSS hash function not supported");
1926 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
1927         if (rss->level > 2)
1928 #else
1929         if (rss->level > 1)
1930 #endif
1931                 return rte_flow_error_set(error, ENOTSUP,
1932                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1933                                           &rss->level,
1934                                           "tunnel RSS is not supported");
1935         if (rss->key_len < MLX5_RSS_HASH_KEY_LEN)
1936                 return rte_flow_error_set(error, ENOTSUP,
1937                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1938                                           &rss->key_len,
1939                                           "RSS hash key too small");
1940         if (rss->key_len > MLX5_RSS_HASH_KEY_LEN)
1941                 return rte_flow_error_set(error, ENOTSUP,
1942                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1943                                           &rss->key_len,
1944                                           "RSS hash key too large");
1945         if (rss->queue_num > priv->config.ind_table_max_size)
1946                 return rte_flow_error_set(error, ENOTSUP,
1947                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1948                                           &rss->queue_num,
1949                                           "number of queues too large");
1950         if (rss->types & MLX5_RSS_HF_MASK)
1951                 return rte_flow_error_set(error, ENOTSUP,
1952                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1953                                           &rss->types,
1954                                           "some RSS protocols are not"
1955                                           " supported");
1956         for (i = 0; i != rss->queue_num; ++i) {
1957                 if (!(*priv->rxqs)[rss->queue[i]])
1958                         return rte_flow_error_set
1959                                 (error, EINVAL,
1960                                  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1961                                  &rss->queue[i],
1962                                  "queue is not configured");
1963         }
1964         if (flow->queue)
1965                 memcpy((*flow->queue), rss->queue,
1966                        rss->queue_num * sizeof(uint16_t));
1967         flow->rss.queue_num = rss->queue_num;
1968         memcpy(flow->key, rss->key, MLX5_RSS_HASH_KEY_LEN);
1969         flow->rss.types = rss->types;
1970         flow->rss.level = rss->level;
1971         flow->fate |= MLX5_FLOW_FATE_RSS;
1972         return 0;
1973 }
1974
1975 /**
1976  * Convert the @p action into a Verbs specification after ensuring the NIC
1977  * will understand and process it correctly.
1978  * If the necessary size for the conversion is greater than the @p flow_size,
1979  * nothing is written in @p flow, the validation is still performed.
1980  *
1981  * @param[in] action
1982  *   Action configuration.
1983  * @param[in, out] flow
1984  *   Pointer to flow structure.
1985  * @param[in] flow_size
1986  *   Size in bytes of the available space in @p flow, if too small, nothing is
1987  *   written.
1988  * @param[out] error
1989  *   Pointer to error structure.
1990  *
1991  * @return
1992  *   On success the number of bytes consumed/necessary, if the returned value
1993  *   is lesser or equal to @p flow_size, the @p action has fully been
1994  *   converted, otherwise another call with this returned memory size should
1995  *   be done.
1996  *   On error, a negative errno value is returned and rte_errno is set.
1997  */
1998 static int
1999 mlx5_flow_action_flag(const struct rte_flow_action *action,
2000                       struct rte_flow *flow, const size_t flow_size,
2001                       struct rte_flow_error *error)
2002 {
2003         unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
2004         struct ibv_flow_spec_action_tag tag = {
2005                 .type = IBV_FLOW_SPEC_ACTION_TAG,
2006                 .size = size,
2007                 .tag_id = mlx5_flow_mark_set(MLX5_FLOW_MARK_DEFAULT),
2008         };
2009         struct mlx5_flow_verbs *verbs = flow->cur_verbs;
2010
2011         if (flow->modifier & MLX5_FLOW_MOD_FLAG)
2012                 return rte_flow_error_set(error, ENOTSUP,
2013                                           RTE_FLOW_ERROR_TYPE_ACTION,
2014                                           action,
2015                                           "flag action already present");
2016         if (flow->fate & MLX5_FLOW_FATE_DROP)
2017                 return rte_flow_error_set(error, ENOTSUP,
2018                                           RTE_FLOW_ERROR_TYPE_ACTION,
2019                                           action,
2020                                           "flag is not compatible with drop"
2021                                           " action");
2022         if (flow->modifier & MLX5_FLOW_MOD_MARK)
2023                 size = 0;
2024         else if (size <= flow_size && verbs)
2025                 mlx5_flow_spec_verbs_add(flow, &tag, size);
2026         flow->modifier |= MLX5_FLOW_MOD_FLAG;
2027         return size;
2028 }
2029
2030 /**
2031  * Update verbs specification to modify the flag to mark.
2032  *
2033  * @param[in, out] verbs
2034  *   Pointer to the mlx5_flow_verbs structure.
2035  * @param[in] mark_id
2036  *   Mark identifier to replace the flag.
2037  */
2038 static void
2039 mlx5_flow_verbs_mark_update(struct mlx5_flow_verbs *verbs, uint32_t mark_id)
2040 {
2041         struct ibv_spec_header *hdr;
2042         int i;
2043
2044         if (!verbs)
2045                 return;
2046         /* Update Verbs specification. */
2047         hdr = (struct ibv_spec_header *)verbs->specs;
2048         if (!hdr)
2049                 return;
2050         for (i = 0; i != verbs->attr->num_of_specs; ++i) {
2051                 if (hdr->type == IBV_FLOW_SPEC_ACTION_TAG) {
2052                         struct ibv_flow_spec_action_tag *t =
2053                                 (struct ibv_flow_spec_action_tag *)hdr;
2054
2055                         t->tag_id = mlx5_flow_mark_set(mark_id);
2056                 }
2057                 hdr = (struct ibv_spec_header *)((uintptr_t)hdr + hdr->size);
2058         }
2059 }
2060
2061 /**
2062  * Convert the @p action into @p flow (or by updating the already present
2063  * Flag Verbs specification) after ensuring the NIC will understand and
2064  * process it correctly.
2065  * If the necessary size for the conversion is greater than the @p flow_size,
2066  * nothing is written in @p flow, the validation is still performed.
2067  *
2068  * @param[in] action
2069  *   Action configuration.
2070  * @param[in, out] flow
2071  *   Pointer to flow structure.
2072  * @param[in] flow_size
2073  *   Size in bytes of the available space in @p flow, if too small, nothing is
2074  *   written.
2075  * @param[out] error
2076  *   Pointer to error structure.
2077  *
2078  * @return
2079  *   On success the number of bytes consumed/necessary, if the returned value
2080  *   is lesser or equal to @p flow_size, the @p action has fully been
2081  *   converted, otherwise another call with this returned memory size should
2082  *   be done.
2083  *   On error, a negative errno value is returned and rte_errno is set.
2084  */
2085 static int
2086 mlx5_flow_action_mark(const struct rte_flow_action *action,
2087                       struct rte_flow *flow, const size_t flow_size,
2088                       struct rte_flow_error *error)
2089 {
2090         const struct rte_flow_action_mark *mark = action->conf;
2091         unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
2092         struct ibv_flow_spec_action_tag tag = {
2093                 .type = IBV_FLOW_SPEC_ACTION_TAG,
2094                 .size = size,
2095         };
2096         struct mlx5_flow_verbs *verbs = flow->cur_verbs;
2097
2098         if (!mark)
2099                 return rte_flow_error_set(error, EINVAL,
2100                                           RTE_FLOW_ERROR_TYPE_ACTION,
2101                                           action,
2102                                           "configuration cannot be null");
2103         if (mark->id >= MLX5_FLOW_MARK_MAX)
2104                 return rte_flow_error_set(error, EINVAL,
2105                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
2106                                           &mark->id,
2107                                           "mark id must in 0 <= id < "
2108                                           RTE_STR(MLX5_FLOW_MARK_MAX));
2109         if (flow->modifier & MLX5_FLOW_MOD_MARK)
2110                 return rte_flow_error_set(error, ENOTSUP,
2111                                           RTE_FLOW_ERROR_TYPE_ACTION,
2112                                           action,
2113                                           "mark action already present");
2114         if (flow->fate & MLX5_FLOW_FATE_DROP)
2115                 return rte_flow_error_set(error, ENOTSUP,
2116                                           RTE_FLOW_ERROR_TYPE_ACTION,
2117                                           action,
2118                                           "mark is not compatible with drop"
2119                                           " action");
2120         if (flow->modifier & MLX5_FLOW_MOD_FLAG) {
2121                 mlx5_flow_verbs_mark_update(verbs, mark->id);
2122                 size = 0;
2123         } else if (size <= flow_size) {
2124                 tag.tag_id = mlx5_flow_mark_set(mark->id);
2125                 mlx5_flow_spec_verbs_add(flow, &tag, size);
2126         }
2127         flow->modifier |= MLX5_FLOW_MOD_MARK;
2128         return size;
2129 }
2130
2131 /**
2132  * Convert the @p action into @p flow after ensuring the NIC will understand
2133  * and process it correctly.
2134  * The conversion is performed action per action, each of them is written into
2135  * the @p flow if its size is lesser or equal to @p flow_size.
2136  * Validation and memory consumption computation are still performed until the
2137  * end of @p action, unless an error is encountered.
2138  *
2139  * @param[in] dev
2140  *   Pointer to Ethernet device structure.
2141  * @param[in] actions
2142  *   Pointer to flow actions array.
2143  * @param[in, out] flow
2144  *   Pointer to the rte_flow structure.
2145  * @param[in] flow_size
2146  *   Size in bytes of the available space in @p flow, if too small some
2147  *   garbage may be present.
2148  * @param[out] error
2149  *   Pointer to error structure.
2150  *
2151  * @return
2152  *   On success the number of bytes consumed/necessary, if the returned value
2153  *   is lesser or equal to @p flow_size, the @p actions has fully been
2154  *   converted, otherwise another call with this returned memory size should
2155  *   be done.
2156  *   On error, a negative errno value is returned and rte_errno is set.
2157  */
2158 static int
2159 mlx5_flow_actions(struct rte_eth_dev *dev,
2160                   const struct rte_flow_action actions[],
2161                   struct rte_flow *flow, const size_t flow_size,
2162                   struct rte_flow_error *error)
2163 {
2164         size_t size = 0;
2165         int remain = flow_size;
2166         int ret = 0;
2167
2168         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
2169                 switch (actions->type) {
2170                 case RTE_FLOW_ACTION_TYPE_VOID:
2171                         break;
2172                 case RTE_FLOW_ACTION_TYPE_FLAG:
2173                         ret = mlx5_flow_action_flag(actions, flow, remain,
2174                                                     error);
2175                         break;
2176                 case RTE_FLOW_ACTION_TYPE_MARK:
2177                         ret = mlx5_flow_action_mark(actions, flow, remain,
2178                                                     error);
2179                         break;
2180                 case RTE_FLOW_ACTION_TYPE_DROP:
2181                         ret = mlx5_flow_action_drop(actions, flow, remain,
2182                                                     error);
2183                         break;
2184                 case RTE_FLOW_ACTION_TYPE_QUEUE:
2185                         ret = mlx5_flow_action_queue(dev, actions, flow, error);
2186                         break;
2187                 case RTE_FLOW_ACTION_TYPE_RSS:
2188                         ret = mlx5_flow_action_rss(dev, actions, flow, error);
2189                         break;
2190                 default:
2191                         return rte_flow_error_set(error, ENOTSUP,
2192                                                   RTE_FLOW_ERROR_TYPE_ACTION,
2193                                                   actions,
2194                                                   "action not supported");
2195                 }
2196                 if (ret < 0)
2197                         return ret;
2198                 if (remain > ret)
2199                         remain -= ret;
2200                 else
2201                         remain = 0;
2202                 size += ret;
2203         }
2204         if (!flow->fate)
2205                 return rte_flow_error_set(error, ENOTSUP,
2206                                           RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2207                                           NULL,
2208                                           "no fate action found");
2209         return size;
2210 }
2211
2212 /**
2213  * Convert the @p attributes, @p pattern, @p action, into an flow for the NIC
2214  * after ensuring the NIC will understand and process it correctly.
2215  * The conversion is only performed item/action per item/action, each of
2216  * them is written into the @p flow if its size is lesser or equal to @p
2217  * flow_size.
2218  * Validation and memory consumption computation are still performed until the
2219  * end, unless an error is encountered.
2220  *
2221  * @param[in] dev
2222  *   Pointer to Ethernet device.
2223  * @param[in, out] flow
2224  *   Pointer to flow structure.
2225  * @param[in] flow_size
2226  *   Size in bytes of the available space in @p flow, if too small some
2227  *   garbage may be present.
2228  * @param[in] attributes
2229  *   Flow rule attributes.
2230  * @param[in] pattern
2231  *   Pattern specification (list terminated by the END pattern item).
2232  * @param[in] actions
2233  *   Associated actions (list terminated by the END action).
2234  * @param[out] error
2235  *   Perform verbose error reporting if not NULL.
2236  *
2237  * @return
2238  *   On success the number of bytes consumed/necessary, if the returned value
2239  *   is lesser or equal to @p flow_size, the flow has fully been converted and
2240  *   can be applied, otherwise another call with this returned memory size
2241  *   should be done.
2242  *   On error, a negative errno value is returned and rte_errno is set.
2243  */
2244 static int
2245 mlx5_flow_merge(struct rte_eth_dev *dev, struct rte_flow *flow,
2246                 const size_t flow_size,
2247                 const struct rte_flow_attr *attributes,
2248                 const struct rte_flow_item pattern[],
2249                 const struct rte_flow_action actions[],
2250                 struct rte_flow_error *error)
2251 {
2252         struct rte_flow local_flow = { .layers = 0, };
2253         size_t size = sizeof(*flow);
2254         union {
2255                 struct rte_flow_expand_rss buf;
2256                 uint8_t buffer[2048];
2257         } expand_buffer;
2258         struct rte_flow_expand_rss *buf = &expand_buffer.buf;
2259         struct mlx5_flow_verbs *original_verbs = NULL;
2260         size_t original_verbs_size = 0;
2261         uint32_t original_layers = 0;
2262         int expanded_pattern_idx = 0;
2263         int ret;
2264         uint32_t i;
2265
2266         if (size > flow_size)
2267                 flow = &local_flow;
2268         ret = mlx5_flow_attributes(dev, attributes, flow, error);
2269         if (ret < 0)
2270                 return ret;
2271         ret = mlx5_flow_actions(dev, actions, &local_flow, 0, error);
2272         if (ret < 0)
2273                 return ret;
2274         if (local_flow.rss.types) {
2275                 ret = rte_flow_expand_rss(buf, sizeof(expand_buffer.buffer),
2276                                           pattern, local_flow.rss.types,
2277                                           mlx5_support_expansion,
2278                                           local_flow.rss.level < 2 ?
2279                                           MLX5_EXPANSION_ROOT :
2280                                           MLX5_EXPANSION_ROOT_OUTER);
2281                 assert(ret > 0 &&
2282                        (unsigned int)ret < sizeof(expand_buffer.buffer));
2283         } else {
2284                 buf->entries = 1;
2285                 buf->entry[0].pattern = (void *)(uintptr_t)pattern;
2286         }
2287         size += RTE_ALIGN_CEIL(local_flow.rss.queue_num * sizeof(uint16_t),
2288                                sizeof(void *));
2289         if (size <= flow_size)
2290                 flow->queue = (void *)(flow + 1);
2291         LIST_INIT(&flow->verbs);
2292         flow->layers = 0;
2293         flow->modifier = 0;
2294         flow->fate = 0;
2295         for (i = 0; i != buf->entries; ++i) {
2296                 size_t off = size;
2297                 size_t off2;
2298
2299                 flow->layers = original_layers;
2300                 size += sizeof(struct ibv_flow_attr) +
2301                         sizeof(struct mlx5_flow_verbs);
2302                 off2 = size;
2303                 if (size < flow_size) {
2304                         flow->cur_verbs = (void *)((uintptr_t)flow + off);
2305                         flow->cur_verbs->attr = (void *)(flow->cur_verbs + 1);
2306                         flow->cur_verbs->specs =
2307                                 (void *)(flow->cur_verbs->attr + 1);
2308                 }
2309                 /* First iteration convert the pattern into Verbs. */
2310                 if (i == 0) {
2311                         /* Actions don't need to be converted several time. */
2312                         ret = mlx5_flow_actions(dev, actions, flow,
2313                                                 (size < flow_size) ?
2314                                                 flow_size - size : 0,
2315                                                 error);
2316                         if (ret < 0)
2317                                 return ret;
2318                         size += ret;
2319                 } else {
2320                         /*
2321                          * Next iteration means the pattern has already been
2322                          * converted and an expansion is necessary to match
2323                          * the user RSS request.  For that only the expanded
2324                          * items will be converted, the common part with the
2325                          * user pattern are just copied into the next buffer
2326                          * zone.
2327                          */
2328                         size += original_verbs_size;
2329                         if (size < flow_size) {
2330                                 rte_memcpy(flow->cur_verbs->attr,
2331                                            original_verbs->attr,
2332                                            original_verbs_size +
2333                                            sizeof(struct ibv_flow_attr));
2334                                 flow->cur_verbs->size = original_verbs_size;
2335                         }
2336                 }
2337                 ret = mlx5_flow_items
2338                         (dev,
2339                          (const struct rte_flow_item *)
2340                          &buf->entry[i].pattern[expanded_pattern_idx],
2341                          flow,
2342                          (size < flow_size) ? flow_size - size : 0, error);
2343                 if (ret < 0)
2344                         return ret;
2345                 size += ret;
2346                 if (size <= flow_size) {
2347                         mlx5_flow_adjust_priority(dev, flow);
2348                         LIST_INSERT_HEAD(&flow->verbs, flow->cur_verbs, next);
2349                 }
2350                 /*
2351                  * Keep a pointer of the first verbs conversion and the layers
2352                  * it has encountered.
2353                  */
2354                 if (i == 0) {
2355                         original_verbs = flow->cur_verbs;
2356                         original_verbs_size = size - off2;
2357                         original_layers = flow->layers;
2358                         /*
2359                          * move the index of the expanded pattern to the
2360                          * first item not addressed yet.
2361                          */
2362                         if (pattern->type == RTE_FLOW_ITEM_TYPE_END) {
2363                                 expanded_pattern_idx++;
2364                         } else {
2365                                 const struct rte_flow_item *item = pattern;
2366
2367                                 for (item = pattern;
2368                                      item->type != RTE_FLOW_ITEM_TYPE_END;
2369                                      ++item)
2370                                         expanded_pattern_idx++;
2371                         }
2372                 }
2373         }
2374         /* Restore the origin layers in the flow. */
2375         flow->layers = original_layers;
2376         return size;
2377 }
2378
2379 /**
2380  * Lookup and set the ptype in the data Rx part.  A single Ptype can be used,
2381  * if several tunnel rules are used on this queue, the tunnel ptype will be
2382  * cleared.
2383  *
2384  * @param rxq_ctrl
2385  *   Rx queue to update.
2386  */
2387 static void
2388 mlx5_flow_rxq_tunnel_ptype_update(struct mlx5_rxq_ctrl *rxq_ctrl)
2389 {
2390         unsigned int i;
2391         uint32_t tunnel_ptype = 0;
2392
2393         /* Look up for the ptype to use. */
2394         for (i = 0; i != MLX5_FLOW_TUNNEL; ++i) {
2395                 if (!rxq_ctrl->flow_tunnels_n[i])
2396                         continue;
2397                 if (!tunnel_ptype) {
2398                         tunnel_ptype = tunnels_info[i].ptype;
2399                 } else {
2400                         tunnel_ptype = 0;
2401                         break;
2402                 }
2403         }
2404         rxq_ctrl->rxq.tunnel = tunnel_ptype;
2405 }
2406
2407 /**
2408  * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) according to the flow.
2409  *
2410  * @param[in] dev
2411  *   Pointer to Ethernet device.
2412  * @param[in] flow
2413  *   Pointer to flow structure.
2414  */
2415 static void
2416 mlx5_flow_rxq_flags_set(struct rte_eth_dev *dev, struct rte_flow *flow)
2417 {
2418         struct priv *priv = dev->data->dev_private;
2419         const int mark = !!(flow->modifier &
2420                             (MLX5_FLOW_MOD_FLAG | MLX5_FLOW_MOD_MARK));
2421         const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
2422         unsigned int i;
2423
2424         for (i = 0; i != flow->rss.queue_num; ++i) {
2425                 int idx = (*flow->queue)[i];
2426                 struct mlx5_rxq_ctrl *rxq_ctrl =
2427                         container_of((*priv->rxqs)[idx],
2428                                      struct mlx5_rxq_ctrl, rxq);
2429
2430                 if (mark) {
2431                         rxq_ctrl->rxq.mark = 1;
2432                         rxq_ctrl->flow_mark_n++;
2433                 }
2434                 if (tunnel) {
2435                         unsigned int j;
2436
2437                         /* Increase the counter matching the flow. */
2438                         for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) {
2439                                 if ((tunnels_info[j].tunnel & flow->layers) ==
2440                                     tunnels_info[j].tunnel) {
2441                                         rxq_ctrl->flow_tunnels_n[j]++;
2442                                         break;
2443                                 }
2444                         }
2445                         mlx5_flow_rxq_tunnel_ptype_update(rxq_ctrl);
2446                 }
2447         }
2448 }
2449
2450 /**
2451  * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the
2452  * @p flow if no other flow uses it with the same kind of request.
2453  *
2454  * @param dev
2455  *   Pointer to Ethernet device.
2456  * @param[in] flow
2457  *   Pointer to the flow.
2458  */
2459 static void
2460 mlx5_flow_rxq_flags_trim(struct rte_eth_dev *dev, struct rte_flow *flow)
2461 {
2462         struct priv *priv = dev->data->dev_private;
2463         const int mark = !!(flow->modifier &
2464                             (MLX5_FLOW_MOD_FLAG | MLX5_FLOW_MOD_MARK));
2465         const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
2466         unsigned int i;
2467
2468         assert(dev->data->dev_started);
2469         for (i = 0; i != flow->rss.queue_num; ++i) {
2470                 int idx = (*flow->queue)[i];
2471                 struct mlx5_rxq_ctrl *rxq_ctrl =
2472                         container_of((*priv->rxqs)[idx],
2473                                      struct mlx5_rxq_ctrl, rxq);
2474
2475                 if (mark) {
2476                         rxq_ctrl->flow_mark_n--;
2477                         rxq_ctrl->rxq.mark = !!rxq_ctrl->flow_mark_n;
2478                 }
2479                 if (tunnel) {
2480                         unsigned int j;
2481
2482                         /* Decrease the counter matching the flow. */
2483                         for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) {
2484                                 if ((tunnels_info[j].tunnel & flow->layers) ==
2485                                     tunnels_info[j].tunnel) {
2486                                         rxq_ctrl->flow_tunnels_n[j]--;
2487                                         break;
2488                                 }
2489                         }
2490                         mlx5_flow_rxq_tunnel_ptype_update(rxq_ctrl);
2491                 }
2492         }
2493 }
2494
2495 /**
2496  * Clear the Mark/Flag and Tunnel ptype information in all Rx queues.
2497  *
2498  * @param dev
2499  *   Pointer to Ethernet device.
2500  */
2501 static void
2502 mlx5_flow_rxq_flags_clear(struct rte_eth_dev *dev)
2503 {
2504         struct priv *priv = dev->data->dev_private;
2505         unsigned int i;
2506         unsigned int idx;
2507
2508         for (idx = 0, i = 0; idx != priv->rxqs_n; ++i) {
2509                 struct mlx5_rxq_ctrl *rxq_ctrl;
2510                 unsigned int j;
2511
2512                 if (!(*priv->rxqs)[idx])
2513                         continue;
2514                 rxq_ctrl = container_of((*priv->rxqs)[idx],
2515                                         struct mlx5_rxq_ctrl, rxq);
2516                 rxq_ctrl->flow_mark_n = 0;
2517                 rxq_ctrl->rxq.mark = 0;
2518                 for (j = 0; j != MLX5_FLOW_TUNNEL; ++j)
2519                         rxq_ctrl->flow_tunnels_n[j] = 0;
2520                 rxq_ctrl->rxq.tunnel = 0;
2521                 ++idx;
2522         }
2523 }
2524
2525 /**
2526  * Validate a flow supported by the NIC.
2527  *
2528  * @see rte_flow_validate()
2529  * @see rte_flow_ops
2530  */
2531 int
2532 mlx5_flow_validate(struct rte_eth_dev *dev,
2533                    const struct rte_flow_attr *attr,
2534                    const struct rte_flow_item items[],
2535                    const struct rte_flow_action actions[],
2536                    struct rte_flow_error *error)
2537 {
2538         int ret = mlx5_flow_merge(dev, NULL, 0, attr, items, actions, error);
2539
2540         if (ret < 0)
2541                 return ret;
2542         return 0;
2543 }
2544
2545 /**
2546  * Remove the flow.
2547  *
2548  * @param[in] dev
2549  *   Pointer to Ethernet device.
2550  * @param[in, out] flow
2551  *   Pointer to flow structure.
2552  */
2553 static void
2554 mlx5_flow_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
2555 {
2556         struct mlx5_flow_verbs *verbs;
2557
2558         LIST_FOREACH(verbs, &flow->verbs, next) {
2559                 if (verbs->flow) {
2560                         claim_zero(mlx5_glue->destroy_flow(verbs->flow));
2561                         verbs->flow = NULL;
2562                 }
2563                 if (verbs->hrxq) {
2564                         if (flow->fate & MLX5_FLOW_FATE_DROP)
2565                                 mlx5_hrxq_drop_release(dev);
2566                         else
2567                                 mlx5_hrxq_release(dev, verbs->hrxq);
2568                         verbs->hrxq = NULL;
2569                 }
2570         }
2571 }
2572
2573 /**
2574  * Apply the flow.
2575  *
2576  * @param[in] dev
2577  *   Pointer to Ethernet device structure.
2578  * @param[in, out] flow
2579  *   Pointer to flow structure.
2580  * @param[out] error
2581  *   Pointer to error structure.
2582  *
2583  * @return
2584  *   0 on success, a negative errno value otherwise and rte_errno is set.
2585  */
2586 static int
2587 mlx5_flow_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
2588                 struct rte_flow_error *error)
2589 {
2590         struct mlx5_flow_verbs *verbs;
2591         int err;
2592
2593         LIST_FOREACH(verbs, &flow->verbs, next) {
2594                 if (flow->fate & MLX5_FLOW_FATE_DROP) {
2595                         verbs->hrxq = mlx5_hrxq_drop_new(dev);
2596                         if (!verbs->hrxq) {
2597                                 rte_flow_error_set
2598                                         (error, errno,
2599                                          RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2600                                          NULL,
2601                                          "cannot get drop hash queue");
2602                                 goto error;
2603                         }
2604                 } else {
2605                         struct mlx5_hrxq *hrxq;
2606
2607                         hrxq = mlx5_hrxq_get(dev, flow->key,
2608                                              MLX5_RSS_HASH_KEY_LEN,
2609                                              verbs->hash_fields,
2610                                              (*flow->queue),
2611                                              flow->rss.queue_num);
2612                         if (!hrxq)
2613                                 hrxq = mlx5_hrxq_new(dev, flow->key,
2614                                                      MLX5_RSS_HASH_KEY_LEN,
2615                                                      verbs->hash_fields,
2616                                                      (*flow->queue),
2617                                                      flow->rss.queue_num);
2618                         if (!hrxq) {
2619                                 rte_flow_error_set
2620                                         (error, rte_errno,
2621                                          RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2622                                          NULL,
2623                                          "cannot get hash queue");
2624                                 goto error;
2625                         }
2626                         verbs->hrxq = hrxq;
2627                 }
2628                 verbs->flow =
2629                         mlx5_glue->create_flow(verbs->hrxq->qp, verbs->attr);
2630                 if (!verbs->flow) {
2631                         rte_flow_error_set(error, errno,
2632                                            RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2633                                            NULL,
2634                                            "hardware refuses to create flow");
2635                         goto error;
2636                 }
2637         }
2638         return 0;
2639 error:
2640         err = rte_errno; /* Save rte_errno before cleanup. */
2641         LIST_FOREACH(verbs, &flow->verbs, next) {
2642                 if (verbs->hrxq) {
2643                         if (flow->fate & MLX5_FLOW_FATE_DROP)
2644                                 mlx5_hrxq_drop_release(dev);
2645                         else
2646                                 mlx5_hrxq_release(dev, verbs->hrxq);
2647                         verbs->hrxq = NULL;
2648                 }
2649         }
2650         rte_errno = err; /* Restore rte_errno. */
2651         return -rte_errno;
2652 }
2653
2654 /**
2655  * Create a flow and add it to @p list.
2656  *
2657  * @param dev
2658  *   Pointer to Ethernet device.
2659  * @param list
2660  *   Pointer to a TAILQ flow list.
2661  * @param[in] attr
2662  *   Flow rule attributes.
2663  * @param[in] items
2664  *   Pattern specification (list terminated by the END pattern item).
2665  * @param[in] actions
2666  *   Associated actions (list terminated by the END action).
2667  * @param[out] error
2668  *   Perform verbose error reporting if not NULL.
2669  *
2670  * @return
2671  *   A flow on success, NULL otherwise and rte_errno is set.
2672  */
2673 static struct rte_flow *
2674 mlx5_flow_list_create(struct rte_eth_dev *dev,
2675                       struct mlx5_flows *list,
2676                       const struct rte_flow_attr *attr,
2677                       const struct rte_flow_item items[],
2678                       const struct rte_flow_action actions[],
2679                       struct rte_flow_error *error)
2680 {
2681         struct rte_flow *flow = NULL;
2682         size_t size = 0;
2683         int ret;
2684
2685         ret = mlx5_flow_merge(dev, flow, size, attr, items, actions, error);
2686         if (ret < 0)
2687                 return NULL;
2688         size = ret;
2689         flow = rte_calloc(__func__, 1, size, 0);
2690         if (!flow) {
2691                 rte_flow_error_set(error, ENOMEM,
2692                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2693                                    NULL,
2694                                    "not enough memory to create flow");
2695                 return NULL;
2696         }
2697         ret = mlx5_flow_merge(dev, flow, size, attr, items, actions, error);
2698         if (ret < 0) {
2699                 rte_free(flow);
2700                 return NULL;
2701         }
2702         assert((size_t)ret == size);
2703         if (dev->data->dev_started) {
2704                 ret = mlx5_flow_apply(dev, flow, error);
2705                 if (ret < 0) {
2706                         ret = rte_errno; /* Save rte_errno before cleanup. */
2707                         if (flow) {
2708                                 mlx5_flow_remove(dev, flow);
2709                                 rte_free(flow);
2710                         }
2711                         rte_errno = ret; /* Restore rte_errno. */
2712                         return NULL;
2713                 }
2714         }
2715         TAILQ_INSERT_TAIL(list, flow, next);
2716         mlx5_flow_rxq_flags_set(dev, flow);
2717         return flow;
2718 }
2719
2720 /**
2721  * Create a flow.
2722  *
2723  * @see rte_flow_create()
2724  * @see rte_flow_ops
2725  */
2726 struct rte_flow *
2727 mlx5_flow_create(struct rte_eth_dev *dev,
2728                  const struct rte_flow_attr *attr,
2729                  const struct rte_flow_item items[],
2730                  const struct rte_flow_action actions[],
2731                  struct rte_flow_error *error)
2732 {
2733         return mlx5_flow_list_create
2734                 (dev, &((struct priv *)dev->data->dev_private)->flows,
2735                  attr, items, actions, error);
2736 }
2737
2738 /**
2739  * Destroy a flow in a list.
2740  *
2741  * @param dev
2742  *   Pointer to Ethernet device.
2743  * @param list
2744  *   Pointer to a TAILQ flow list.
2745  * @param[in] flow
2746  *   Flow to destroy.
2747  */
2748 static void
2749 mlx5_flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list,
2750                        struct rte_flow *flow)
2751 {
2752         mlx5_flow_remove(dev, flow);
2753         TAILQ_REMOVE(list, flow, next);
2754         /*
2755          * Update RX queue flags only if port is started, otherwise it is
2756          * already clean.
2757          */
2758         if (dev->data->dev_started)
2759                 mlx5_flow_rxq_flags_trim(dev, flow);
2760         rte_free(flow);
2761 }
2762
2763 /**
2764  * Destroy all flows.
2765  *
2766  * @param dev
2767  *   Pointer to Ethernet device.
2768  * @param list
2769  *   Pointer to a TAILQ flow list.
2770  */
2771 void
2772 mlx5_flow_list_flush(struct rte_eth_dev *dev, struct mlx5_flows *list)
2773 {
2774         while (!TAILQ_EMPTY(list)) {
2775                 struct rte_flow *flow;
2776
2777                 flow = TAILQ_FIRST(list);
2778                 mlx5_flow_list_destroy(dev, list, flow);
2779         }
2780 }
2781
2782 /**
2783  * Remove all flows.
2784  *
2785  * @param dev
2786  *   Pointer to Ethernet device.
2787  * @param list
2788  *   Pointer to a TAILQ flow list.
2789  */
2790 void
2791 mlx5_flow_stop(struct rte_eth_dev *dev, struct mlx5_flows *list)
2792 {
2793         struct rte_flow *flow;
2794
2795         TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next)
2796                 mlx5_flow_remove(dev, flow);
2797         mlx5_flow_rxq_flags_clear(dev);
2798 }
2799
2800 /**
2801  * Add all flows.
2802  *
2803  * @param dev
2804  *   Pointer to Ethernet device.
2805  * @param list
2806  *   Pointer to a TAILQ flow list.
2807  *
2808  * @return
2809  *   0 on success, a negative errno value otherwise and rte_errno is set.
2810  */
2811 int
2812 mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list)
2813 {
2814         struct rte_flow *flow;
2815         struct rte_flow_error error;
2816         int ret = 0;
2817
2818         TAILQ_FOREACH(flow, list, next) {
2819                 ret = mlx5_flow_apply(dev, flow, &error);
2820                 if (ret < 0)
2821                         goto error;
2822                 mlx5_flow_rxq_flags_set(dev, flow);
2823         }
2824         return 0;
2825 error:
2826         ret = rte_errno; /* Save rte_errno before cleanup. */
2827         mlx5_flow_stop(dev, list);
2828         rte_errno = ret; /* Restore rte_errno. */
2829         return -rte_errno;
2830 }
2831
2832 /**
2833  * Verify the flow list is empty
2834  *
2835  * @param dev
2836  *  Pointer to Ethernet device.
2837  *
2838  * @return the number of flows not released.
2839  */
2840 int
2841 mlx5_flow_verify(struct rte_eth_dev *dev)
2842 {
2843         struct priv *priv = dev->data->dev_private;
2844         struct rte_flow *flow;
2845         int ret = 0;
2846
2847         TAILQ_FOREACH(flow, &priv->flows, next) {
2848                 DRV_LOG(DEBUG, "port %u flow %p still referenced",
2849                         dev->data->port_id, (void *)flow);
2850                 ++ret;
2851         }
2852         return ret;
2853 }
2854
2855 /**
2856  * Enable a control flow configured from the control plane.
2857  *
2858  * @param dev
2859  *   Pointer to Ethernet device.
2860  * @param eth_spec
2861  *   An Ethernet flow spec to apply.
2862  * @param eth_mask
2863  *   An Ethernet flow mask to apply.
2864  * @param vlan_spec
2865  *   A VLAN flow spec to apply.
2866  * @param vlan_mask
2867  *   A VLAN flow mask to apply.
2868  *
2869  * @return
2870  *   0 on success, a negative errno value otherwise and rte_errno is set.
2871  */
2872 int
2873 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
2874                     struct rte_flow_item_eth *eth_spec,
2875                     struct rte_flow_item_eth *eth_mask,
2876                     struct rte_flow_item_vlan *vlan_spec,
2877                     struct rte_flow_item_vlan *vlan_mask)
2878 {
2879         struct priv *priv = dev->data->dev_private;
2880         const struct rte_flow_attr attr = {
2881                 .ingress = 1,
2882                 .priority = MLX5_FLOW_PRIO_RSVD,
2883         };
2884         struct rte_flow_item items[] = {
2885                 {
2886                         .type = RTE_FLOW_ITEM_TYPE_ETH,
2887                         .spec = eth_spec,
2888                         .last = NULL,
2889                         .mask = eth_mask,
2890                 },
2891                 {
2892                         .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
2893                                 RTE_FLOW_ITEM_TYPE_END,
2894                         .spec = vlan_spec,
2895                         .last = NULL,
2896                         .mask = vlan_mask,
2897                 },
2898                 {
2899                         .type = RTE_FLOW_ITEM_TYPE_END,
2900                 },
2901         };
2902         uint16_t queue[priv->reta_idx_n];
2903         struct rte_flow_action_rss action_rss = {
2904                 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
2905                 .level = 0,
2906                 .types = priv->rss_conf.rss_hf,
2907                 .key_len = priv->rss_conf.rss_key_len,
2908                 .queue_num = priv->reta_idx_n,
2909                 .key = priv->rss_conf.rss_key,
2910                 .queue = queue,
2911         };
2912         struct rte_flow_action actions[] = {
2913                 {
2914                         .type = RTE_FLOW_ACTION_TYPE_RSS,
2915                         .conf = &action_rss,
2916                 },
2917                 {
2918                         .type = RTE_FLOW_ACTION_TYPE_END,
2919                 },
2920         };
2921         struct rte_flow *flow;
2922         struct rte_flow_error error;
2923         unsigned int i;
2924
2925         if (!priv->reta_idx_n) {
2926                 rte_errno = EINVAL;
2927                 return -rte_errno;
2928         }
2929         for (i = 0; i != priv->reta_idx_n; ++i)
2930                 queue[i] = (*priv->reta_idx)[i];
2931         flow = mlx5_flow_list_create(dev, &priv->ctrl_flows, &attr, items,
2932                                      actions, &error);
2933         if (!flow)
2934                 return -rte_errno;
2935         return 0;
2936 }
2937
2938 /**
2939  * Enable a flow control configured from the control plane.
2940  *
2941  * @param dev
2942  *   Pointer to Ethernet device.
2943  * @param eth_spec
2944  *   An Ethernet flow spec to apply.
2945  * @param eth_mask
2946  *   An Ethernet flow mask to apply.
2947  *
2948  * @return
2949  *   0 on success, a negative errno value otherwise and rte_errno is set.
2950  */
2951 int
2952 mlx5_ctrl_flow(struct rte_eth_dev *dev,
2953                struct rte_flow_item_eth *eth_spec,
2954                struct rte_flow_item_eth *eth_mask)
2955 {
2956         return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
2957 }
2958
2959 /**
2960  * Destroy a flow.
2961  *
2962  * @see rte_flow_destroy()
2963  * @see rte_flow_ops
2964  */
2965 int
2966 mlx5_flow_destroy(struct rte_eth_dev *dev,
2967                   struct rte_flow *flow,
2968                   struct rte_flow_error *error __rte_unused)
2969 {
2970         struct priv *priv = dev->data->dev_private;
2971
2972         mlx5_flow_list_destroy(dev, &priv->flows, flow);
2973         return 0;
2974 }
2975
2976 /**
2977  * Destroy all flows.
2978  *
2979  * @see rte_flow_flush()
2980  * @see rte_flow_ops
2981  */
2982 int
2983 mlx5_flow_flush(struct rte_eth_dev *dev,
2984                 struct rte_flow_error *error __rte_unused)
2985 {
2986         struct priv *priv = dev->data->dev_private;
2987
2988         mlx5_flow_list_flush(dev, &priv->flows);
2989         return 0;
2990 }
2991
2992 /**
2993  * Isolated mode.
2994  *
2995  * @see rte_flow_isolate()
2996  * @see rte_flow_ops
2997  */
2998 int
2999 mlx5_flow_isolate(struct rte_eth_dev *dev,
3000                   int enable,
3001                   struct rte_flow_error *error)
3002 {
3003         struct priv *priv = dev->data->dev_private;
3004
3005         if (dev->data->dev_started) {
3006                 rte_flow_error_set(error, EBUSY,
3007                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
3008                                    NULL,
3009                                    "port must be stopped first");
3010                 return -rte_errno;
3011         }
3012         priv->isolated = !!enable;
3013         if (enable)
3014                 dev->dev_ops = &mlx5_dev_ops_isolate;
3015         else
3016                 dev->dev_ops = &mlx5_dev_ops;
3017         return 0;
3018 }
3019
3020 /**
3021  * Convert a flow director filter to a generic flow.
3022  *
3023  * @param dev
3024  *   Pointer to Ethernet device.
3025  * @param fdir_filter
3026  *   Flow director filter to add.
3027  * @param attributes
3028  *   Generic flow parameters structure.
3029  *
3030  * @return
3031  *   0 on success, a negative errno value otherwise and rte_errno is set.
3032  */
3033 static int
3034 mlx5_fdir_filter_convert(struct rte_eth_dev *dev,
3035                          const struct rte_eth_fdir_filter *fdir_filter,
3036                          struct mlx5_fdir *attributes)
3037 {
3038         struct priv *priv = dev->data->dev_private;
3039         const struct rte_eth_fdir_input *input = &fdir_filter->input;
3040         const struct rte_eth_fdir_masks *mask =
3041                 &dev->data->dev_conf.fdir_conf.mask;
3042
3043         /* Validate queue number. */
3044         if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
3045                 DRV_LOG(ERR, "port %u invalid queue number %d",
3046                         dev->data->port_id, fdir_filter->action.rx_queue);
3047                 rte_errno = EINVAL;
3048                 return -rte_errno;
3049         }
3050         attributes->attr.ingress = 1;
3051         attributes->items[0] = (struct rte_flow_item) {
3052                 .type = RTE_FLOW_ITEM_TYPE_ETH,
3053                 .spec = &attributes->l2,
3054                 .mask = &attributes->l2_mask,
3055         };
3056         switch (fdir_filter->action.behavior) {
3057         case RTE_ETH_FDIR_ACCEPT:
3058                 attributes->actions[0] = (struct rte_flow_action){
3059                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
3060                         .conf = &attributes->queue,
3061                 };
3062                 break;
3063         case RTE_ETH_FDIR_REJECT:
3064                 attributes->actions[0] = (struct rte_flow_action){
3065                         .type = RTE_FLOW_ACTION_TYPE_DROP,
3066                 };
3067                 break;
3068         default:
3069                 DRV_LOG(ERR, "port %u invalid behavior %d",
3070                         dev->data->port_id,
3071                         fdir_filter->action.behavior);
3072                 rte_errno = ENOTSUP;
3073                 return -rte_errno;
3074         }
3075         attributes->queue.index = fdir_filter->action.rx_queue;
3076         /* Handle L3. */
3077         switch (fdir_filter->input.flow_type) {
3078         case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
3079         case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
3080         case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
3081                 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
3082                         .src_addr = input->flow.ip4_flow.src_ip,
3083                         .dst_addr = input->flow.ip4_flow.dst_ip,
3084                         .time_to_live = input->flow.ip4_flow.ttl,
3085                         .type_of_service = input->flow.ip4_flow.tos,
3086                         .next_proto_id = input->flow.ip4_flow.proto,
3087                 };
3088                 attributes->l3_mask.ipv4.hdr = (struct ipv4_hdr){
3089                         .src_addr = mask->ipv4_mask.src_ip,
3090                         .dst_addr = mask->ipv4_mask.dst_ip,
3091                         .time_to_live = mask->ipv4_mask.ttl,
3092                         .type_of_service = mask->ipv4_mask.tos,
3093                         .next_proto_id = mask->ipv4_mask.proto,
3094                 };
3095                 attributes->items[1] = (struct rte_flow_item){
3096                         .type = RTE_FLOW_ITEM_TYPE_IPV4,
3097                         .spec = &attributes->l3,
3098                         .mask = &attributes->l3_mask,
3099                 };
3100                 break;
3101         case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
3102         case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
3103         case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
3104                 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
3105                         .hop_limits = input->flow.ipv6_flow.hop_limits,
3106                         .proto = input->flow.ipv6_flow.proto,
3107                 };
3108
3109                 memcpy(attributes->l3.ipv6.hdr.src_addr,
3110                        input->flow.ipv6_flow.src_ip,
3111                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
3112                 memcpy(attributes->l3.ipv6.hdr.dst_addr,
3113                        input->flow.ipv6_flow.dst_ip,
3114                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
3115                 memcpy(attributes->l3_mask.ipv6.hdr.src_addr,
3116                        mask->ipv6_mask.src_ip,
3117                        RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
3118                 memcpy(attributes->l3_mask.ipv6.hdr.dst_addr,
3119                        mask->ipv6_mask.dst_ip,
3120                        RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
3121                 attributes->items[1] = (struct rte_flow_item){
3122                         .type = RTE_FLOW_ITEM_TYPE_IPV6,
3123                         .spec = &attributes->l3,
3124                         .mask = &attributes->l3_mask,
3125                 };
3126                 break;
3127         default:
3128                 DRV_LOG(ERR, "port %u invalid flow type%d",
3129                         dev->data->port_id, fdir_filter->input.flow_type);
3130                 rte_errno = ENOTSUP;
3131                 return -rte_errno;
3132         }
3133         /* Handle L4. */
3134         switch (fdir_filter->input.flow_type) {
3135         case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
3136                 attributes->l4.udp.hdr = (struct udp_hdr){
3137                         .src_port = input->flow.udp4_flow.src_port,
3138                         .dst_port = input->flow.udp4_flow.dst_port,
3139                 };
3140                 attributes->l4_mask.udp.hdr = (struct udp_hdr){
3141                         .src_port = mask->src_port_mask,
3142                         .dst_port = mask->dst_port_mask,
3143                 };
3144                 attributes->items[2] = (struct rte_flow_item){
3145                         .type = RTE_FLOW_ITEM_TYPE_UDP,
3146                         .spec = &attributes->l4,
3147                         .mask = &attributes->l4_mask,
3148                 };
3149                 break;
3150         case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
3151                 attributes->l4.tcp.hdr = (struct tcp_hdr){
3152                         .src_port = input->flow.tcp4_flow.src_port,
3153                         .dst_port = input->flow.tcp4_flow.dst_port,
3154                 };
3155                 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
3156                         .src_port = mask->src_port_mask,
3157                         .dst_port = mask->dst_port_mask,
3158                 };
3159                 attributes->items[2] = (struct rte_flow_item){
3160                         .type = RTE_FLOW_ITEM_TYPE_TCP,
3161                         .spec = &attributes->l4,
3162                         .mask = &attributes->l4_mask,
3163                 };
3164                 break;
3165         case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
3166                 attributes->l4.udp.hdr = (struct udp_hdr){
3167                         .src_port = input->flow.udp6_flow.src_port,
3168                         .dst_port = input->flow.udp6_flow.dst_port,
3169                 };
3170                 attributes->l4_mask.udp.hdr = (struct udp_hdr){
3171                         .src_port = mask->src_port_mask,
3172                         .dst_port = mask->dst_port_mask,
3173                 };
3174                 attributes->items[2] = (struct rte_flow_item){
3175                         .type = RTE_FLOW_ITEM_TYPE_UDP,
3176                         .spec = &attributes->l4,
3177                         .mask = &attributes->l4_mask,
3178                 };
3179                 break;
3180         case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
3181                 attributes->l4.tcp.hdr = (struct tcp_hdr){
3182                         .src_port = input->flow.tcp6_flow.src_port,
3183                         .dst_port = input->flow.tcp6_flow.dst_port,
3184                 };
3185                 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
3186                         .src_port = mask->src_port_mask,
3187                         .dst_port = mask->dst_port_mask,
3188                 };
3189                 attributes->items[2] = (struct rte_flow_item){
3190                         .type = RTE_FLOW_ITEM_TYPE_TCP,
3191                         .spec = &attributes->l4,
3192                         .mask = &attributes->l4_mask,
3193                 };
3194                 break;
3195         case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
3196         case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
3197                 break;
3198         default:
3199                 DRV_LOG(ERR, "port %u invalid flow type%d",
3200                         dev->data->port_id, fdir_filter->input.flow_type);
3201                 rte_errno = ENOTSUP;
3202                 return -rte_errno;
3203         }
3204         return 0;
3205 }
3206
3207 /**
3208  * Add new flow director filter and store it in list.
3209  *
3210  * @param dev
3211  *   Pointer to Ethernet device.
3212  * @param fdir_filter
3213  *   Flow director filter to add.
3214  *
3215  * @return
3216  *   0 on success, a negative errno value otherwise and rte_errno is set.
3217  */
3218 static int
3219 mlx5_fdir_filter_add(struct rte_eth_dev *dev,
3220                      const struct rte_eth_fdir_filter *fdir_filter)
3221 {
3222         struct priv *priv = dev->data->dev_private;
3223         struct mlx5_fdir attributes = {
3224                 .attr.group = 0,
3225                 .l2_mask = {
3226                         .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
3227                         .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
3228                         .type = 0,
3229                 },
3230         };
3231         struct rte_flow_error error;
3232         struct rte_flow *flow;
3233         int ret;
3234
3235         ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
3236         if (ret)
3237                 return ret;
3238         flow = mlx5_flow_list_create(dev, &priv->flows, &attributes.attr,
3239                                      attributes.items, attributes.actions,
3240                                      &error);
3241         if (flow) {
3242                 DRV_LOG(DEBUG, "port %u FDIR created %p", dev->data->port_id,
3243                         (void *)flow);
3244                 return 0;
3245         }
3246         return -rte_errno;
3247 }
3248
3249 /**
3250  * Delete specific filter.
3251  *
3252  * @param dev
3253  *   Pointer to Ethernet device.
3254  * @param fdir_filter
3255  *   Filter to be deleted.
3256  *
3257  * @return
3258  *   0 on success, a negative errno value otherwise and rte_errno is set.
3259  */
3260 static int
3261 mlx5_fdir_filter_delete(struct rte_eth_dev *dev __rte_unused,
3262                         const struct rte_eth_fdir_filter *fdir_filter
3263                         __rte_unused)
3264 {
3265         rte_errno = ENOTSUP;
3266         return -rte_errno;
3267 }
3268
3269 /**
3270  * Update queue for specific filter.
3271  *
3272  * @param dev
3273  *   Pointer to Ethernet device.
3274  * @param fdir_filter
3275  *   Filter to be updated.
3276  *
3277  * @return
3278  *   0 on success, a negative errno value otherwise and rte_errno is set.
3279  */
3280 static int
3281 mlx5_fdir_filter_update(struct rte_eth_dev *dev,
3282                         const struct rte_eth_fdir_filter *fdir_filter)
3283 {
3284         int ret;
3285
3286         ret = mlx5_fdir_filter_delete(dev, fdir_filter);
3287         if (ret)
3288                 return ret;
3289         return mlx5_fdir_filter_add(dev, fdir_filter);
3290 }
3291
3292 /**
3293  * Flush all filters.
3294  *
3295  * @param dev
3296  *   Pointer to Ethernet device.
3297  */
3298 static void
3299 mlx5_fdir_filter_flush(struct rte_eth_dev *dev)
3300 {
3301         struct priv *priv = dev->data->dev_private;
3302
3303         mlx5_flow_list_flush(dev, &priv->flows);
3304 }
3305
3306 /**
3307  * Get flow director information.
3308  *
3309  * @param dev
3310  *   Pointer to Ethernet device.
3311  * @param[out] fdir_info
3312  *   Resulting flow director information.
3313  */
3314 static void
3315 mlx5_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info)
3316 {
3317         struct rte_eth_fdir_masks *mask =
3318                 &dev->data->dev_conf.fdir_conf.mask;
3319
3320         fdir_info->mode = dev->data->dev_conf.fdir_conf.mode;
3321         fdir_info->guarant_spc = 0;
3322         rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
3323         fdir_info->max_flexpayload = 0;
3324         fdir_info->flow_types_mask[0] = 0;
3325         fdir_info->flex_payload_unit = 0;
3326         fdir_info->max_flex_payload_segment_num = 0;
3327         fdir_info->flex_payload_limit = 0;
3328         memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
3329 }
3330
3331 /**
3332  * Deal with flow director operations.
3333  *
3334  * @param dev
3335  *   Pointer to Ethernet device.
3336  * @param filter_op
3337  *   Operation to perform.
3338  * @param arg
3339  *   Pointer to operation-specific structure.
3340  *
3341  * @return
3342  *   0 on success, a negative errno value otherwise and rte_errno is set.
3343  */
3344 static int
3345 mlx5_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op,
3346                     void *arg)
3347 {
3348         enum rte_fdir_mode fdir_mode =
3349                 dev->data->dev_conf.fdir_conf.mode;
3350
3351         if (filter_op == RTE_ETH_FILTER_NOP)
3352                 return 0;
3353         if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
3354             fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
3355                 DRV_LOG(ERR, "port %u flow director mode %d not supported",
3356                         dev->data->port_id, fdir_mode);
3357                 rte_errno = EINVAL;
3358                 return -rte_errno;
3359         }
3360         switch (filter_op) {
3361         case RTE_ETH_FILTER_ADD:
3362                 return mlx5_fdir_filter_add(dev, arg);
3363         case RTE_ETH_FILTER_UPDATE:
3364                 return mlx5_fdir_filter_update(dev, arg);
3365         case RTE_ETH_FILTER_DELETE:
3366                 return mlx5_fdir_filter_delete(dev, arg);
3367         case RTE_ETH_FILTER_FLUSH:
3368                 mlx5_fdir_filter_flush(dev);
3369                 break;
3370         case RTE_ETH_FILTER_INFO:
3371                 mlx5_fdir_info_get(dev, arg);
3372                 break;
3373         default:
3374                 DRV_LOG(DEBUG, "port %u unknown operation %u",
3375                         dev->data->port_id, filter_op);
3376                 rte_errno = EINVAL;
3377                 return -rte_errno;
3378         }
3379         return 0;
3380 }
3381
3382 /**
3383  * Manage filter operations.
3384  *
3385  * @param dev
3386  *   Pointer to Ethernet device structure.
3387  * @param filter_type
3388  *   Filter type.
3389  * @param filter_op
3390  *   Operation to perform.
3391  * @param arg
3392  *   Pointer to operation-specific structure.
3393  *
3394  * @return
3395  *   0 on success, a negative errno value otherwise and rte_errno is set.
3396  */
3397 int
3398 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
3399                      enum rte_filter_type filter_type,
3400                      enum rte_filter_op filter_op,
3401                      void *arg)
3402 {
3403         switch (filter_type) {
3404         case RTE_ETH_FILTER_GENERIC:
3405                 if (filter_op != RTE_ETH_FILTER_GET) {
3406                         rte_errno = EINVAL;
3407                         return -rte_errno;
3408                 }
3409                 *(const void **)arg = &mlx5_flow_ops;
3410                 return 0;
3411         case RTE_ETH_FILTER_FDIR:
3412                 return mlx5_fdir_ctrl_func(dev, filter_op, arg);
3413         default:
3414                 DRV_LOG(ERR, "port %u filter type (%d) not supported",
3415                         dev->data->port_id, filter_type);
3416                 rte_errno = ENOTSUP;
3417                 return -rte_errno;
3418         }
3419         return 0;
3420 }