net/mlx5: add flow GRE item
[dpdk.git] / drivers / net / mlx5 / mlx5_flow.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2016 6WIND S.A.
3  * Copyright 2016 Mellanox Technologies, Ltd
4  */
5
6 #include <sys/queue.h>
7 #include <stdint.h>
8 #include <string.h>
9
10 /* Verbs header. */
11 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
12 #ifdef PEDANTIC
13 #pragma GCC diagnostic ignored "-Wpedantic"
14 #endif
15 #include <infiniband/verbs.h>
16 #ifdef PEDANTIC
17 #pragma GCC diagnostic error "-Wpedantic"
18 #endif
19
20 #include <rte_common.h>
21 #include <rte_ether.h>
22 #include <rte_eth_ctrl.h>
23 #include <rte_ethdev_driver.h>
24 #include <rte_flow.h>
25 #include <rte_flow_driver.h>
26 #include <rte_malloc.h>
27 #include <rte_ip.h>
28
29 #include "mlx5.h"
30 #include "mlx5_defs.h"
31 #include "mlx5_prm.h"
32 #include "mlx5_glue.h"
33
34 /* Dev ops structure defined in mlx5.c */
35 extern const struct eth_dev_ops mlx5_dev_ops;
36 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
37
38 /* Pattern outer Layer bits. */
39 #define MLX5_FLOW_LAYER_OUTER_L2 (1u << 0)
40 #define MLX5_FLOW_LAYER_OUTER_L3_IPV4 (1u << 1)
41 #define MLX5_FLOW_LAYER_OUTER_L3_IPV6 (1u << 2)
42 #define MLX5_FLOW_LAYER_OUTER_L4_UDP (1u << 3)
43 #define MLX5_FLOW_LAYER_OUTER_L4_TCP (1u << 4)
44 #define MLX5_FLOW_LAYER_OUTER_VLAN (1u << 5)
45
46 /* Pattern inner Layer bits. */
47 #define MLX5_FLOW_LAYER_INNER_L2 (1u << 6)
48 #define MLX5_FLOW_LAYER_INNER_L3_IPV4 (1u << 7)
49 #define MLX5_FLOW_LAYER_INNER_L3_IPV6 (1u << 8)
50 #define MLX5_FLOW_LAYER_INNER_L4_UDP (1u << 9)
51 #define MLX5_FLOW_LAYER_INNER_L4_TCP (1u << 10)
52 #define MLX5_FLOW_LAYER_INNER_VLAN (1u << 11)
53
54 /* Pattern tunnel Layer bits. */
55 #define MLX5_FLOW_LAYER_VXLAN (1u << 12)
56 #define MLX5_FLOW_LAYER_VXLAN_GPE (1u << 13)
57 #define MLX5_FLOW_LAYER_GRE (1u << 14)
58
59 /* Outer Masks. */
60 #define MLX5_FLOW_LAYER_OUTER_L3 \
61         (MLX5_FLOW_LAYER_OUTER_L3_IPV4 | MLX5_FLOW_LAYER_OUTER_L3_IPV6)
62 #define MLX5_FLOW_LAYER_OUTER_L4 \
63         (MLX5_FLOW_LAYER_OUTER_L4_UDP | MLX5_FLOW_LAYER_OUTER_L4_TCP)
64 #define MLX5_FLOW_LAYER_OUTER \
65         (MLX5_FLOW_LAYER_OUTER_L2 | MLX5_FLOW_LAYER_OUTER_L3 | \
66          MLX5_FLOW_LAYER_OUTER_L4)
67
68 /* Tunnel Masks. */
69 #define MLX5_FLOW_LAYER_TUNNEL \
70         (MLX5_FLOW_LAYER_VXLAN | MLX5_FLOW_LAYER_VXLAN_GPE | \
71          MLX5_FLOW_LAYER_GRE)
72
73 /* Inner Masks. */
74 #define MLX5_FLOW_LAYER_INNER_L3 \
75         (MLX5_FLOW_LAYER_INNER_L3_IPV4 | MLX5_FLOW_LAYER_INNER_L3_IPV6)
76 #define MLX5_FLOW_LAYER_INNER_L4 \
77         (MLX5_FLOW_LAYER_INNER_L4_UDP | MLX5_FLOW_LAYER_INNER_L4_TCP)
78 #define MLX5_FLOW_LAYER_INNER \
79         (MLX5_FLOW_LAYER_INNER_L2 | MLX5_FLOW_LAYER_INNER_L3 | \
80          MLX5_FLOW_LAYER_INNER_L4)
81
82 /* Actions that modify the fate of matching traffic. */
83 #define MLX5_FLOW_FATE_DROP (1u << 0)
84 #define MLX5_FLOW_FATE_QUEUE (1u << 1)
85 #define MLX5_FLOW_FATE_RSS (1u << 2)
86
87 /* Modify a packet. */
88 #define MLX5_FLOW_MOD_FLAG (1u << 0)
89 #define MLX5_FLOW_MOD_MARK (1u << 1)
90
91 /* possible L3 layers protocols filtering. */
92 #define MLX5_IP_PROTOCOL_TCP 6
93 #define MLX5_IP_PROTOCOL_UDP 17
94 #define MLX5_IP_PROTOCOL_GRE 47
95
96 /* Priority reserved for default flows. */
97 #define MLX5_FLOW_PRIO_RSVD ((uint32_t)-1)
98
99 enum mlx5_expansion {
100         MLX5_EXPANSION_ROOT,
101         MLX5_EXPANSION_ROOT_OUTER,
102         MLX5_EXPANSION_OUTER_ETH,
103         MLX5_EXPANSION_OUTER_IPV4,
104         MLX5_EXPANSION_OUTER_IPV4_UDP,
105         MLX5_EXPANSION_OUTER_IPV4_TCP,
106         MLX5_EXPANSION_OUTER_IPV6,
107         MLX5_EXPANSION_OUTER_IPV6_UDP,
108         MLX5_EXPANSION_OUTER_IPV6_TCP,
109         MLX5_EXPANSION_VXLAN,
110         MLX5_EXPANSION_VXLAN_GPE,
111         MLX5_EXPANSION_GRE,
112         MLX5_EXPANSION_ETH,
113         MLX5_EXPANSION_IPV4,
114         MLX5_EXPANSION_IPV4_UDP,
115         MLX5_EXPANSION_IPV4_TCP,
116         MLX5_EXPANSION_IPV6,
117         MLX5_EXPANSION_IPV6_UDP,
118         MLX5_EXPANSION_IPV6_TCP,
119 };
120
121 /** Supported expansion of items. */
122 static const struct rte_flow_expand_node mlx5_support_expansion[] = {
123         [MLX5_EXPANSION_ROOT] = {
124                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
125                                                  MLX5_EXPANSION_IPV4,
126                                                  MLX5_EXPANSION_IPV6),
127                 .type = RTE_FLOW_ITEM_TYPE_END,
128         },
129         [MLX5_EXPANSION_ROOT_OUTER] = {
130                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_ETH,
131                                                  MLX5_EXPANSION_OUTER_IPV4,
132                                                  MLX5_EXPANSION_OUTER_IPV6),
133                 .type = RTE_FLOW_ITEM_TYPE_END,
134         },
135         [MLX5_EXPANSION_OUTER_ETH] = {
136                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_IPV4,
137                                                  MLX5_EXPANSION_OUTER_IPV6),
138                 .type = RTE_FLOW_ITEM_TYPE_ETH,
139                 .rss_types = 0,
140         },
141         [MLX5_EXPANSION_OUTER_IPV4] = {
142                 .next = RTE_FLOW_EXPAND_RSS_NEXT
143                         (MLX5_EXPANSION_OUTER_IPV4_UDP,
144                          MLX5_EXPANSION_OUTER_IPV4_TCP,
145                          MLX5_EXPANSION_GRE),
146                 .type = RTE_FLOW_ITEM_TYPE_IPV4,
147                 .rss_types = ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
148                         ETH_RSS_NONFRAG_IPV4_OTHER,
149         },
150         [MLX5_EXPANSION_OUTER_IPV4_UDP] = {
151                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN,
152                                                  MLX5_EXPANSION_VXLAN_GPE),
153                 .type = RTE_FLOW_ITEM_TYPE_UDP,
154                 .rss_types = ETH_RSS_NONFRAG_IPV4_UDP,
155         },
156         [MLX5_EXPANSION_OUTER_IPV4_TCP] = {
157                 .type = RTE_FLOW_ITEM_TYPE_TCP,
158                 .rss_types = ETH_RSS_NONFRAG_IPV4_TCP,
159         },
160         [MLX5_EXPANSION_OUTER_IPV6] = {
161                 .next = RTE_FLOW_EXPAND_RSS_NEXT
162                         (MLX5_EXPANSION_OUTER_IPV6_UDP,
163                          MLX5_EXPANSION_OUTER_IPV6_TCP),
164                 .type = RTE_FLOW_ITEM_TYPE_IPV6,
165                 .rss_types = ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 |
166                         ETH_RSS_NONFRAG_IPV6_OTHER,
167         },
168         [MLX5_EXPANSION_OUTER_IPV6_UDP] = {
169                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN,
170                                                  MLX5_EXPANSION_VXLAN_GPE),
171                 .type = RTE_FLOW_ITEM_TYPE_UDP,
172                 .rss_types = ETH_RSS_NONFRAG_IPV6_UDP,
173         },
174         [MLX5_EXPANSION_OUTER_IPV6_TCP] = {
175                 .type = RTE_FLOW_ITEM_TYPE_TCP,
176                 .rss_types = ETH_RSS_NONFRAG_IPV6_TCP,
177         },
178         [MLX5_EXPANSION_VXLAN] = {
179                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH),
180                 .type = RTE_FLOW_ITEM_TYPE_VXLAN,
181         },
182         [MLX5_EXPANSION_VXLAN_GPE] = {
183                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
184                                                  MLX5_EXPANSION_IPV4,
185                                                  MLX5_EXPANSION_IPV6),
186                 .type = RTE_FLOW_ITEM_TYPE_VXLAN_GPE,
187         },
188         [MLX5_EXPANSION_GRE] = {
189                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4),
190                 .type = RTE_FLOW_ITEM_TYPE_GRE,
191         },
192         [MLX5_EXPANSION_ETH] = {
193                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
194                                                  MLX5_EXPANSION_IPV6),
195                 .type = RTE_FLOW_ITEM_TYPE_ETH,
196         },
197         [MLX5_EXPANSION_IPV4] = {
198                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4_UDP,
199                                                  MLX5_EXPANSION_IPV4_TCP),
200                 .type = RTE_FLOW_ITEM_TYPE_IPV4,
201                 .rss_types = ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
202                         ETH_RSS_NONFRAG_IPV4_OTHER,
203         },
204         [MLX5_EXPANSION_IPV4_UDP] = {
205                 .type = RTE_FLOW_ITEM_TYPE_UDP,
206                 .rss_types = ETH_RSS_NONFRAG_IPV4_UDP,
207         },
208         [MLX5_EXPANSION_IPV4_TCP] = {
209                 .type = RTE_FLOW_ITEM_TYPE_TCP,
210                 .rss_types = ETH_RSS_NONFRAG_IPV4_TCP,
211         },
212         [MLX5_EXPANSION_IPV6] = {
213                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV6_UDP,
214                                                  MLX5_EXPANSION_IPV6_TCP),
215                 .type = RTE_FLOW_ITEM_TYPE_IPV6,
216                 .rss_types = ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 |
217                         ETH_RSS_NONFRAG_IPV6_OTHER,
218         },
219         [MLX5_EXPANSION_IPV6_UDP] = {
220                 .type = RTE_FLOW_ITEM_TYPE_UDP,
221                 .rss_types = ETH_RSS_NONFRAG_IPV6_UDP,
222         },
223         [MLX5_EXPANSION_IPV6_TCP] = {
224                 .type = RTE_FLOW_ITEM_TYPE_TCP,
225                 .rss_types = ETH_RSS_NONFRAG_IPV6_TCP,
226         },
227 };
228
229 /** Handles information leading to a drop fate. */
230 struct mlx5_flow_verbs {
231         LIST_ENTRY(mlx5_flow_verbs) next;
232         unsigned int size; /**< Size of the attribute. */
233         struct {
234                 struct ibv_flow_attr *attr;
235                 /**< Pointer to the Specification buffer. */
236                 uint8_t *specs; /**< Pointer to the specifications. */
237         };
238         struct ibv_flow *flow; /**< Verbs flow pointer. */
239         struct mlx5_hrxq *hrxq; /**< Hash Rx queue object. */
240         uint64_t hash_fields; /**< Verbs hash Rx queue hash fields. */
241 };
242
243 /* Flow structure. */
244 struct rte_flow {
245         TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
246         struct rte_flow_attr attributes; /**< User flow attribute. */
247         uint32_t l3_protocol_en:1; /**< Protocol filtering requested. */
248         uint32_t layers;
249         /**< Bit-fields of present layers see MLX5_FLOW_LAYER_*. */
250         uint32_t modifier;
251         /**< Bit-fields of present modifier see MLX5_FLOW_MOD_*. */
252         uint32_t fate;
253         /**< Bit-fields of present fate see MLX5_FLOW_FATE_*. */
254         uint8_t l3_protocol; /**< valid when l3_protocol_en is set. */
255         LIST_HEAD(verbs, mlx5_flow_verbs) verbs; /**< Verbs flows list. */
256         struct mlx5_flow_verbs *cur_verbs;
257         /**< Current Verbs flow structure being filled. */
258         struct rte_flow_action_rss rss;/**< RSS context. */
259         uint8_t key[MLX5_RSS_HASH_KEY_LEN]; /**< RSS hash key. */
260         uint16_t (*queue)[]; /**< Destination queues to redirect traffic to. */
261 };
262
263 static const struct rte_flow_ops mlx5_flow_ops = {
264         .validate = mlx5_flow_validate,
265         .create = mlx5_flow_create,
266         .destroy = mlx5_flow_destroy,
267         .flush = mlx5_flow_flush,
268         .isolate = mlx5_flow_isolate,
269 };
270
271 /* Convert FDIR request to Generic flow. */
272 struct mlx5_fdir {
273         struct rte_flow_attr attr;
274         struct rte_flow_action actions[2];
275         struct rte_flow_item items[4];
276         struct rte_flow_item_eth l2;
277         struct rte_flow_item_eth l2_mask;
278         union {
279                 struct rte_flow_item_ipv4 ipv4;
280                 struct rte_flow_item_ipv6 ipv6;
281         } l3;
282         union {
283                 struct rte_flow_item_ipv4 ipv4;
284                 struct rte_flow_item_ipv6 ipv6;
285         } l3_mask;
286         union {
287                 struct rte_flow_item_udp udp;
288                 struct rte_flow_item_tcp tcp;
289         } l4;
290         union {
291                 struct rte_flow_item_udp udp;
292                 struct rte_flow_item_tcp tcp;
293         } l4_mask;
294         struct rte_flow_action_queue queue;
295 };
296
297 /* Verbs specification header. */
298 struct ibv_spec_header {
299         enum ibv_flow_spec_type type;
300         uint16_t size;
301 };
302
303 /*
304  * Number of sub priorities.
305  * For each kind of pattern matching i.e. L2, L3, L4 to have a correct
306  * matching on the NIC (firmware dependent) L4 most have the higher priority
307  * followed by L3 and ending with L2.
308  */
309 #define MLX5_PRIORITY_MAP_L2 2
310 #define MLX5_PRIORITY_MAP_L3 1
311 #define MLX5_PRIORITY_MAP_L4 0
312 #define MLX5_PRIORITY_MAP_MAX 3
313
314 /* Map of Verbs to Flow priority with 8 Verbs priorities. */
315 static const uint32_t priority_map_3[][MLX5_PRIORITY_MAP_MAX] = {
316         { 0, 1, 2 }, { 2, 3, 4 }, { 5, 6, 7 },
317 };
318
319 /* Map of Verbs to Flow priority with 16 Verbs priorities. */
320 static const uint32_t priority_map_5[][MLX5_PRIORITY_MAP_MAX] = {
321         { 0, 1, 2 }, { 3, 4, 5 }, { 6, 7, 8 },
322         { 9, 10, 11 }, { 12, 13, 14 },
323 };
324
325 /* Tunnel information. */
326 struct mlx5_flow_tunnel_info {
327         uint32_t tunnel; /**< Tunnel bit (see MLX5_FLOW_*). */
328         uint32_t ptype; /**< Tunnel Ptype (see RTE_PTYPE_*). */
329 };
330
331 static struct mlx5_flow_tunnel_info tunnels_info[] = {
332         {
333                 .tunnel = MLX5_FLOW_LAYER_VXLAN,
334                 .ptype = RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L4_UDP,
335         },
336         {
337                 .tunnel = MLX5_FLOW_LAYER_VXLAN_GPE,
338                 .ptype = RTE_PTYPE_TUNNEL_VXLAN_GPE | RTE_PTYPE_L4_UDP,
339         },
340         {
341                 .tunnel = MLX5_FLOW_LAYER_GRE,
342                 .ptype = RTE_PTYPE_TUNNEL_GRE,
343         },
344 };
345
346 /**
347  * Discover the maximum number of priority available.
348  *
349  * @param[in] dev
350  *   Pointer to Ethernet device.
351  *
352  * @return
353  *   number of supported flow priority on success, a negative errno
354  *   value otherwise and rte_errno is set.
355  */
356 int
357 mlx5_flow_discover_priorities(struct rte_eth_dev *dev)
358 {
359         struct {
360                 struct ibv_flow_attr attr;
361                 struct ibv_flow_spec_eth eth;
362                 struct ibv_flow_spec_action_drop drop;
363         } flow_attr = {
364                 .attr = {
365                         .num_of_specs = 2,
366                 },
367                 .eth = {
368                         .type = IBV_FLOW_SPEC_ETH,
369                         .size = sizeof(struct ibv_flow_spec_eth),
370                 },
371                 .drop = {
372                         .size = sizeof(struct ibv_flow_spec_action_drop),
373                         .type = IBV_FLOW_SPEC_ACTION_DROP,
374                 },
375         };
376         struct ibv_flow *flow;
377         struct mlx5_hrxq *drop = mlx5_hrxq_drop_new(dev);
378         uint16_t vprio[] = { 8, 16 };
379         int i;
380         int priority = 0;
381
382         if (!drop) {
383                 rte_errno = ENOTSUP;
384                 return -rte_errno;
385         }
386         for (i = 0; i != RTE_DIM(vprio); i++) {
387                 flow_attr.attr.priority = vprio[i] - 1;
388                 flow = mlx5_glue->create_flow(drop->qp, &flow_attr.attr);
389                 if (!flow)
390                         break;
391                 claim_zero(mlx5_glue->destroy_flow(flow));
392                 priority = vprio[i];
393         }
394         switch (priority) {
395         case 8:
396                 priority = RTE_DIM(priority_map_3);
397                 break;
398         case 16:
399                 priority = RTE_DIM(priority_map_5);
400                 break;
401         default:
402                 rte_errno = ENOTSUP;
403                 DRV_LOG(ERR,
404                         "port %u verbs maximum priority: %d expected 8/16",
405                         dev->data->port_id, vprio[i]);
406                 return -rte_errno;
407         }
408         mlx5_hrxq_drop_release(dev);
409         DRV_LOG(INFO, "port %u flow maximum priority: %d",
410                 dev->data->port_id, priority);
411         return priority;
412 }
413
414 /**
415  * Adjust flow priority.
416  *
417  * @param dev
418  *   Pointer to Ethernet device.
419  * @param flow
420  *   Pointer to an rte flow.
421  */
422 static void
423 mlx5_flow_adjust_priority(struct rte_eth_dev *dev, struct rte_flow *flow)
424 {
425         struct priv *priv = dev->data->dev_private;
426         uint32_t priority = flow->attributes.priority;
427         uint32_t subpriority = flow->cur_verbs->attr->priority;
428
429         switch (priv->config.flow_prio) {
430         case RTE_DIM(priority_map_3):
431                 priority = priority_map_3[priority][subpriority];
432                 break;
433         case RTE_DIM(priority_map_5):
434                 priority = priority_map_5[priority][subpriority];
435                 break;
436         }
437         flow->cur_verbs->attr->priority = priority;
438 }
439
440 /**
441  * Verify the @p attributes will be correctly understood by the NIC and store
442  * them in the @p flow if everything is correct.
443  *
444  * @param[in] dev
445  *   Pointer to Ethernet device.
446  * @param[in] attributes
447  *   Pointer to flow attributes
448  * @param[in, out] flow
449  *   Pointer to the rte_flow structure.
450  * @param[out] error
451  *   Pointer to error structure.
452  *
453  * @return
454  *   0 on success, a negative errno value otherwise and rte_errno is set.
455  */
456 static int
457 mlx5_flow_attributes(struct rte_eth_dev *dev,
458                      const struct rte_flow_attr *attributes,
459                      struct rte_flow *flow,
460                      struct rte_flow_error *error)
461 {
462         uint32_t priority_max =
463                 ((struct priv *)dev->data->dev_private)->config.flow_prio - 1;
464
465         if (attributes->group)
466                 return rte_flow_error_set(error, ENOTSUP,
467                                           RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
468                                           NULL,
469                                           "groups is not supported");
470         if (attributes->priority != MLX5_FLOW_PRIO_RSVD &&
471             attributes->priority >= priority_max)
472                 return rte_flow_error_set(error, ENOTSUP,
473                                           RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
474                                           NULL,
475                                           "priority out of range");
476         if (attributes->egress)
477                 return rte_flow_error_set(error, ENOTSUP,
478                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
479                                           NULL,
480                                           "egress is not supported");
481         if (attributes->transfer)
482                 return rte_flow_error_set(error, ENOTSUP,
483                                           RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
484                                           NULL,
485                                           "transfer is not supported");
486         if (!attributes->ingress)
487                 return rte_flow_error_set(error, ENOTSUP,
488                                           RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
489                                           NULL,
490                                           "ingress attribute is mandatory");
491         flow->attributes = *attributes;
492         if (attributes->priority == MLX5_FLOW_PRIO_RSVD)
493                 flow->attributes.priority = priority_max;
494         return 0;
495 }
496
497 /**
498  * Verify the @p item specifications (spec, last, mask) are compatible with the
499  * NIC capabilities.
500  *
501  * @param[in] item
502  *   Item specification.
503  * @param[in] mask
504  *   @p item->mask or flow default bit-masks.
505  * @param[in] nic_mask
506  *   Bit-masks covering supported fields by the NIC to compare with user mask.
507  * @param[in] size
508  *   Bit-masks size in bytes.
509  * @param[out] error
510  *   Pointer to error structure.
511  *
512  * @return
513  *   0 on success, a negative errno value otherwise and rte_errno is set.
514  */
515 static int
516 mlx5_flow_item_acceptable(const struct rte_flow_item *item,
517                           const uint8_t *mask,
518                           const uint8_t *nic_mask,
519                           unsigned int size,
520                           struct rte_flow_error *error)
521 {
522         unsigned int i;
523
524         assert(nic_mask);
525         for (i = 0; i < size; ++i)
526                 if ((nic_mask[i] | mask[i]) != nic_mask[i])
527                         return rte_flow_error_set(error, ENOTSUP,
528                                                   RTE_FLOW_ERROR_TYPE_ITEM,
529                                                   item,
530                                                   "mask enables non supported"
531                                                   " bits");
532         if (!item->spec && (item->mask || item->last))
533                 return rte_flow_error_set(error, EINVAL,
534                                           RTE_FLOW_ERROR_TYPE_ITEM,
535                                           item,
536                                           "mask/last without a spec is not"
537                                           " supported");
538         if (item->spec && item->last) {
539                 uint8_t spec[size];
540                 uint8_t last[size];
541                 unsigned int i;
542                 int ret;
543
544                 for (i = 0; i < size; ++i) {
545                         spec[i] = ((const uint8_t *)item->spec)[i] & mask[i];
546                         last[i] = ((const uint8_t *)item->last)[i] & mask[i];
547                 }
548                 ret = memcmp(spec, last, size);
549                 if (ret != 0)
550                         return rte_flow_error_set(error, ENOTSUP,
551                                                   RTE_FLOW_ERROR_TYPE_ITEM,
552                                                   item,
553                                                   "range is not supported");
554         }
555         return 0;
556 }
557
558 /**
559  * Add a verbs item specification into @p flow.
560  *
561  * @param[in, out] flow
562  *   Pointer to flow structure.
563  * @param[in] src
564  *   Create specification.
565  * @param[in] size
566  *   Size in bytes of the specification to copy.
567  */
568 static void
569 mlx5_flow_spec_verbs_add(struct rte_flow *flow, void *src, unsigned int size)
570 {
571         struct mlx5_flow_verbs *verbs = flow->cur_verbs;
572
573         if (verbs->specs) {
574                 void *dst;
575
576                 dst = (void *)(verbs->specs + verbs->size);
577                 memcpy(dst, src, size);
578                 ++verbs->attr->num_of_specs;
579         }
580         verbs->size += size;
581 }
582
583 /**
584  * Adjust verbs hash fields according to the @p flow information.
585  *
586  * @param[in, out] flow.
587  *   Pointer to flow structure.
588  * @param[in] tunnel
589  *   1 when the hash field is for a tunnel item.
590  * @param[in] layer_types
591  *   ETH_RSS_* types.
592  * @param[in] hash_fields
593  *   Item hash fields.
594  */
595 static void
596 mlx5_flow_verbs_hashfields_adjust(struct rte_flow *flow,
597                                   int tunnel __rte_unused,
598                                   uint32_t layer_types, uint64_t hash_fields)
599 {
600 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
601         hash_fields |= (tunnel ? IBV_RX_HASH_INNER : 0);
602         if (flow->rss.level == 2 && !tunnel)
603                 hash_fields = 0;
604         else if (flow->rss.level < 2 && tunnel)
605                 hash_fields = 0;
606 #endif
607         if (!(flow->rss.types & layer_types))
608                 hash_fields = 0;
609         flow->cur_verbs->hash_fields |= hash_fields;
610 }
611
612 /**
613  * Convert the @p item into a Verbs specification after ensuring the NIC
614  * will understand and process it correctly.
615  * If the necessary size for the conversion is greater than the @p flow_size,
616  * nothing is written in @p flow, the validation is still performed.
617  *
618  * @param[in] item
619  *   Item specification.
620  * @param[in, out] flow
621  *   Pointer to flow structure.
622  * @param[in] flow_size
623  *   Size in bytes of the available space in @p flow, if too small, nothing is
624  *   written.
625  * @param[out] error
626  *   Pointer to error structure.
627  *
628  * @return
629  *   On success the number of bytes consumed/necessary, if the returned value
630  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
631  *   otherwise another call with this returned memory size should be done.
632  *   On error, a negative errno value is returned and rte_errno is set.
633  */
634 static int
635 mlx5_flow_item_eth(const struct rte_flow_item *item, struct rte_flow *flow,
636                    const size_t flow_size, struct rte_flow_error *error)
637 {
638         const struct rte_flow_item_eth *spec = item->spec;
639         const struct rte_flow_item_eth *mask = item->mask;
640         const struct rte_flow_item_eth nic_mask = {
641                 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
642                 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
643                 .type = RTE_BE16(0xffff),
644         };
645         const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
646         const unsigned int size = sizeof(struct ibv_flow_spec_eth);
647         struct ibv_flow_spec_eth eth = {
648                 .type = IBV_FLOW_SPEC_ETH | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
649                 .size = size,
650         };
651         int ret;
652
653         if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
654                             MLX5_FLOW_LAYER_OUTER_L2))
655                 return rte_flow_error_set(error, ENOTSUP,
656                                           RTE_FLOW_ERROR_TYPE_ITEM,
657                                           item,
658                                           "L2 layers already configured");
659         if (!mask)
660                 mask = &rte_flow_item_eth_mask;
661         ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
662                                         (const uint8_t *)&nic_mask,
663                                         sizeof(struct rte_flow_item_eth),
664                                         error);
665         if (ret)
666                 return ret;
667         flow->layers |= tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
668                 MLX5_FLOW_LAYER_OUTER_L2;
669         if (size > flow_size)
670                 return size;
671         if (spec) {
672                 unsigned int i;
673
674                 memcpy(&eth.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
675                 memcpy(&eth.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
676                 eth.val.ether_type = spec->type;
677                 memcpy(&eth.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
678                 memcpy(&eth.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
679                 eth.mask.ether_type = mask->type;
680                 /* Remove unwanted bits from values. */
681                 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
682                         eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
683                         eth.val.src_mac[i] &= eth.mask.src_mac[i];
684                 }
685                 eth.val.ether_type &= eth.mask.ether_type;
686         }
687         flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2;
688         mlx5_flow_spec_verbs_add(flow, &eth, size);
689         return size;
690 }
691
692 /**
693  * Update the VLAN tag in the Verbs Ethernet specification.
694  *
695  * @param[in, out] attr
696  *   Pointer to Verbs attributes structure.
697  * @param[in] eth
698  *   Verbs structure containing the VLAN information to copy.
699  */
700 static void
701 mlx5_flow_item_vlan_update(struct ibv_flow_attr *attr,
702                            struct ibv_flow_spec_eth *eth)
703 {
704         unsigned int i;
705         const enum ibv_flow_spec_type search = eth->type;
706         struct ibv_spec_header *hdr = (struct ibv_spec_header *)
707                 ((uint8_t *)attr + sizeof(struct ibv_flow_attr));
708
709         for (i = 0; i != attr->num_of_specs; ++i) {
710                 if (hdr->type == search) {
711                         struct ibv_flow_spec_eth *e =
712                                 (struct ibv_flow_spec_eth *)hdr;
713
714                         e->val.vlan_tag = eth->val.vlan_tag;
715                         e->mask.vlan_tag = eth->mask.vlan_tag;
716                         e->val.ether_type = eth->val.ether_type;
717                         e->mask.ether_type = eth->mask.ether_type;
718                         break;
719                 }
720                 hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size);
721         }
722 }
723
724 /**
725  * Convert the @p item into @p flow (or by updating the already present
726  * Ethernet Verbs) specification after ensuring the NIC will understand and
727  * process it correctly.
728  * If the necessary size for the conversion is greater than the @p flow_size,
729  * nothing is written in @p flow, the validation is still performed.
730  *
731  * @param[in] item
732  *   Item specification.
733  * @param[in, out] flow
734  *   Pointer to flow structure.
735  * @param[in] flow_size
736  *   Size in bytes of the available space in @p flow, if too small, nothing is
737  *   written.
738  * @param[out] error
739  *   Pointer to error structure.
740  *
741  * @return
742  *   On success the number of bytes consumed/necessary, if the returned value
743  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
744  *   otherwise another call with this returned memory size should be done.
745  *   On error, a negative errno value is returned and rte_errno is set.
746  */
747 static int
748 mlx5_flow_item_vlan(const struct rte_flow_item *item, struct rte_flow *flow,
749                     const size_t flow_size, struct rte_flow_error *error)
750 {
751         const struct rte_flow_item_vlan *spec = item->spec;
752         const struct rte_flow_item_vlan *mask = item->mask;
753         const struct rte_flow_item_vlan nic_mask = {
754                 .tci = RTE_BE16(0x0fff),
755                 .inner_type = RTE_BE16(0xffff),
756         };
757         unsigned int size = sizeof(struct ibv_flow_spec_eth);
758         const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
759         struct ibv_flow_spec_eth eth = {
760                 .type = IBV_FLOW_SPEC_ETH | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
761                 .size = size,
762         };
763         int ret;
764         const uint32_t l34m = tunnel ? (MLX5_FLOW_LAYER_INNER_L3 |
765                                         MLX5_FLOW_LAYER_INNER_L4) :
766                 (MLX5_FLOW_LAYER_OUTER_L3 | MLX5_FLOW_LAYER_OUTER_L4);
767         const uint32_t vlanm = tunnel ? MLX5_FLOW_LAYER_INNER_VLAN :
768                 MLX5_FLOW_LAYER_OUTER_VLAN;
769         const uint32_t l2m = tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
770                 MLX5_FLOW_LAYER_OUTER_L2;
771
772         if (flow->layers & vlanm)
773                 return rte_flow_error_set(error, ENOTSUP,
774                                           RTE_FLOW_ERROR_TYPE_ITEM,
775                                           item,
776                                           "VLAN layer already configured");
777         else if ((flow->layers & l34m) != 0)
778                 return rte_flow_error_set(error, ENOTSUP,
779                                           RTE_FLOW_ERROR_TYPE_ITEM,
780                                           item,
781                                           "L2 layer cannot follow L3/L4 layer");
782         if (!mask)
783                 mask = &rte_flow_item_vlan_mask;
784         ret = mlx5_flow_item_acceptable
785                 (item, (const uint8_t *)mask,
786                  (const uint8_t *)&nic_mask,
787                  sizeof(struct rte_flow_item_vlan), error);
788         if (ret)
789                 return ret;
790         if (spec) {
791                 eth.val.vlan_tag = spec->tci;
792                 eth.mask.vlan_tag = mask->tci;
793                 eth.val.vlan_tag &= eth.mask.vlan_tag;
794                 eth.val.ether_type = spec->inner_type;
795                 eth.mask.ether_type = mask->inner_type;
796                 eth.val.ether_type &= eth.mask.ether_type;
797         }
798         /*
799          * From verbs perspective an empty VLAN is equivalent
800          * to a packet without VLAN layer.
801          */
802         if (!eth.mask.vlan_tag)
803                 return rte_flow_error_set(error, EINVAL,
804                                           RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
805                                           item->spec,
806                                           "VLAN cannot be empty");
807         if (!(flow->layers & l2m)) {
808                 if (size <= flow_size) {
809                         flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2;
810                         mlx5_flow_spec_verbs_add(flow, &eth, size);
811                 }
812         } else {
813                 if (flow->cur_verbs)
814                         mlx5_flow_item_vlan_update(flow->cur_verbs->attr,
815                                                    &eth);
816                 size = 0; /* Only an update is done in eth specification. */
817         }
818         flow->layers |= tunnel ?
819                 (MLX5_FLOW_LAYER_INNER_L2 | MLX5_FLOW_LAYER_INNER_VLAN) :
820                 (MLX5_FLOW_LAYER_OUTER_L2 | MLX5_FLOW_LAYER_OUTER_VLAN);
821         return size;
822 }
823
824 /**
825  * Convert the @p item into a Verbs specification after ensuring the NIC
826  * will understand and process it correctly.
827  * If the necessary size for the conversion is greater than the @p flow_size,
828  * nothing is written in @p flow, the validation is still performed.
829  *
830  * @param[in] item
831  *   Item specification.
832  * @param[in, out] flow
833  *   Pointer to flow structure.
834  * @param[in] flow_size
835  *   Size in bytes of the available space in @p flow, if too small, nothing is
836  *   written.
837  * @param[out] error
838  *   Pointer to error structure.
839  *
840  * @return
841  *   On success the number of bytes consumed/necessary, if the returned value
842  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
843  *   otherwise another call with this returned memory size should be done.
844  *   On error, a negative errno value is returned and rte_errno is set.
845  */
846 static int
847 mlx5_flow_item_ipv4(const struct rte_flow_item *item, struct rte_flow *flow,
848                     const size_t flow_size, struct rte_flow_error *error)
849 {
850         const struct rte_flow_item_ipv4 *spec = item->spec;
851         const struct rte_flow_item_ipv4 *mask = item->mask;
852         const struct rte_flow_item_ipv4 nic_mask = {
853                 .hdr = {
854                         .src_addr = RTE_BE32(0xffffffff),
855                         .dst_addr = RTE_BE32(0xffffffff),
856                         .type_of_service = 0xff,
857                         .next_proto_id = 0xff,
858                 },
859         };
860         const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
861         unsigned int size = sizeof(struct ibv_flow_spec_ipv4_ext);
862         struct ibv_flow_spec_ipv4_ext ipv4 = {
863                 .type = IBV_FLOW_SPEC_IPV4_EXT |
864                         (tunnel ? IBV_FLOW_SPEC_INNER : 0),
865                 .size = size,
866         };
867         int ret;
868
869         if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
870                             MLX5_FLOW_LAYER_OUTER_L3))
871                 return rte_flow_error_set(error, ENOTSUP,
872                                           RTE_FLOW_ERROR_TYPE_ITEM,
873                                           item,
874                                           "multiple L3 layers not supported");
875         else if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
876                                  MLX5_FLOW_LAYER_OUTER_L4))
877                 return rte_flow_error_set(error, ENOTSUP,
878                                           RTE_FLOW_ERROR_TYPE_ITEM,
879                                           item,
880                                           "L3 cannot follow an L4 layer.");
881         if (!mask)
882                 mask = &rte_flow_item_ipv4_mask;
883         ret = mlx5_flow_item_acceptable
884                 (item, (const uint8_t *)mask,
885                  (const uint8_t *)&nic_mask,
886                  sizeof(struct rte_flow_item_ipv4), error);
887         if (ret < 0)
888                 return ret;
889         flow->layers |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
890                 MLX5_FLOW_LAYER_OUTER_L3_IPV4;
891         if (spec) {
892                 ipv4.val = (struct ibv_flow_ipv4_ext_filter){
893                         .src_ip = spec->hdr.src_addr,
894                         .dst_ip = spec->hdr.dst_addr,
895                         .proto = spec->hdr.next_proto_id,
896                         .tos = spec->hdr.type_of_service,
897                 };
898                 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
899                         .src_ip = mask->hdr.src_addr,
900                         .dst_ip = mask->hdr.dst_addr,
901                         .proto = mask->hdr.next_proto_id,
902                         .tos = mask->hdr.type_of_service,
903                 };
904                 /* Remove unwanted bits from values. */
905                 ipv4.val.src_ip &= ipv4.mask.src_ip;
906                 ipv4.val.dst_ip &= ipv4.mask.dst_ip;
907                 ipv4.val.proto &= ipv4.mask.proto;
908                 ipv4.val.tos &= ipv4.mask.tos;
909         }
910         flow->l3_protocol_en = !!ipv4.mask.proto;
911         flow->l3_protocol = ipv4.val.proto;
912         if (size <= flow_size) {
913                 mlx5_flow_verbs_hashfields_adjust
914                         (flow, tunnel,
915                          (ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
916                           ETH_RSS_NONFRAG_IPV4_OTHER),
917                          (IBV_RX_HASH_SRC_IPV4 | IBV_RX_HASH_DST_IPV4));
918                 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L3;
919                 mlx5_flow_spec_verbs_add(flow, &ipv4, size);
920         }
921         return size;
922 }
923
924 /**
925  * Convert the @p item into a Verbs specification after ensuring the NIC
926  * will understand and process it correctly.
927  * If the necessary size for the conversion is greater than the @p flow_size,
928  * nothing is written in @p flow, the validation is still performed.
929  *
930  * @param[in] item
931  *   Item specification.
932  * @param[in, out] flow
933  *   Pointer to flow structure.
934  * @param[in] flow_size
935  *   Size in bytes of the available space in @p flow, if too small, nothing is
936  *   written.
937  * @param[out] error
938  *   Pointer to error structure.
939  *
940  * @return
941  *   On success the number of bytes consumed/necessary, if the returned value
942  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
943  *   otherwise another call with this returned memory size should be done.
944  *   On error, a negative errno value is returned and rte_errno is set.
945  */
946 static int
947 mlx5_flow_item_ipv6(const struct rte_flow_item *item, struct rte_flow *flow,
948                     const size_t flow_size, struct rte_flow_error *error)
949 {
950         const struct rte_flow_item_ipv6 *spec = item->spec;
951         const struct rte_flow_item_ipv6 *mask = item->mask;
952         const struct rte_flow_item_ipv6 nic_mask = {
953                 .hdr = {
954                         .src_addr =
955                                 "\xff\xff\xff\xff\xff\xff\xff\xff"
956                                 "\xff\xff\xff\xff\xff\xff\xff\xff",
957                         .dst_addr =
958                                 "\xff\xff\xff\xff\xff\xff\xff\xff"
959                                 "\xff\xff\xff\xff\xff\xff\xff\xff",
960                         .vtc_flow = RTE_BE32(0xffffffff),
961                         .proto = 0xff,
962                         .hop_limits = 0xff,
963                 },
964         };
965         const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
966         unsigned int size = sizeof(struct ibv_flow_spec_ipv6);
967         struct ibv_flow_spec_ipv6 ipv6 = {
968                 .type = IBV_FLOW_SPEC_IPV6 | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
969                 .size = size,
970         };
971         int ret;
972
973         if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
974                             MLX5_FLOW_LAYER_OUTER_L3))
975                 return rte_flow_error_set(error, ENOTSUP,
976                                           RTE_FLOW_ERROR_TYPE_ITEM,
977                                           item,
978                                           "multiple L3 layers not supported");
979         else if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
980                                  MLX5_FLOW_LAYER_OUTER_L4))
981                 return rte_flow_error_set(error, ENOTSUP,
982                                           RTE_FLOW_ERROR_TYPE_ITEM,
983                                           item,
984                                           "L3 cannot follow an L4 layer.");
985         /*
986          * IPv6 is not recognised by the NIC inside a GRE tunnel.
987          * Such support has to be disabled as the rule will be
988          * accepted.  Issue reproduced with Mellanox OFED 4.3-3.0.2.1 and
989          * Mellanox OFED 4.4-1.0.0.0.
990          */
991         if (tunnel && flow->layers & MLX5_FLOW_LAYER_GRE)
992                 return rte_flow_error_set(error, ENOTSUP,
993                                           RTE_FLOW_ERROR_TYPE_ITEM,
994                                           item,
995                                           "IPv6 inside a GRE tunnel is"
996                                           " not recognised.");
997         if (!mask)
998                 mask = &rte_flow_item_ipv6_mask;
999         ret = mlx5_flow_item_acceptable
1000                 (item, (const uint8_t *)mask,
1001                  (const uint8_t *)&nic_mask,
1002                  sizeof(struct rte_flow_item_ipv6), error);
1003         if (ret < 0)
1004                 return ret;
1005         flow->layers |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
1006                 MLX5_FLOW_LAYER_OUTER_L3_IPV6;
1007         if (spec) {
1008                 unsigned int i;
1009                 uint32_t vtc_flow_val;
1010                 uint32_t vtc_flow_mask;
1011
1012                 memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
1013                        RTE_DIM(ipv6.val.src_ip));
1014                 memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
1015                        RTE_DIM(ipv6.val.dst_ip));
1016                 memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
1017                        RTE_DIM(ipv6.mask.src_ip));
1018                 memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
1019                        RTE_DIM(ipv6.mask.dst_ip));
1020                 vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
1021                 vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
1022                 ipv6.val.flow_label =
1023                         rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >>
1024                                          IPV6_HDR_FL_SHIFT);
1025                 ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >>
1026                                          IPV6_HDR_TC_SHIFT;
1027                 ipv6.val.next_hdr = spec->hdr.proto;
1028                 ipv6.val.hop_limit = spec->hdr.hop_limits;
1029                 ipv6.mask.flow_label =
1030                         rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >>
1031                                          IPV6_HDR_FL_SHIFT);
1032                 ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >>
1033                                           IPV6_HDR_TC_SHIFT;
1034                 ipv6.mask.next_hdr = mask->hdr.proto;
1035                 ipv6.mask.hop_limit = mask->hdr.hop_limits;
1036                 /* Remove unwanted bits from values. */
1037                 for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
1038                         ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
1039                         ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
1040                 }
1041                 ipv6.val.flow_label &= ipv6.mask.flow_label;
1042                 ipv6.val.traffic_class &= ipv6.mask.traffic_class;
1043                 ipv6.val.next_hdr &= ipv6.mask.next_hdr;
1044                 ipv6.val.hop_limit &= ipv6.mask.hop_limit;
1045         }
1046         flow->l3_protocol_en = !!ipv6.mask.next_hdr;
1047         flow->l3_protocol = ipv6.val.next_hdr;
1048         if (size <= flow_size) {
1049                 mlx5_flow_verbs_hashfields_adjust
1050                         (flow, tunnel,
1051                          (ETH_RSS_IPV6 | ETH_RSS_NONFRAG_IPV6_OTHER),
1052                          (IBV_RX_HASH_SRC_IPV6 | IBV_RX_HASH_DST_IPV6));
1053                 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L3;
1054                 mlx5_flow_spec_verbs_add(flow, &ipv6, size);
1055         }
1056         return size;
1057 }
1058
1059 /**
1060  * Convert the @p item into a Verbs specification after ensuring the NIC
1061  * will understand and process it correctly.
1062  * If the necessary size for the conversion is greater than the @p flow_size,
1063  * nothing is written in @p flow, the validation is still performed.
1064  *
1065  * @param[in] item
1066  *   Item specification.
1067  * @param[in, out] flow
1068  *   Pointer to flow structure.
1069  * @param[in] flow_size
1070  *   Size in bytes of the available space in @p flow, if too small, nothing is
1071  *   written.
1072  * @param[out] error
1073  *   Pointer to error structure.
1074  *
1075  * @return
1076  *   On success the number of bytes consumed/necessary, if the returned value
1077  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
1078  *   otherwise another call with this returned memory size should be done.
1079  *   On error, a negative errno value is returned and rte_errno is set.
1080  */
1081 static int
1082 mlx5_flow_item_udp(const struct rte_flow_item *item, struct rte_flow *flow,
1083                    const size_t flow_size, struct rte_flow_error *error)
1084 {
1085         const struct rte_flow_item_udp *spec = item->spec;
1086         const struct rte_flow_item_udp *mask = item->mask;
1087         const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
1088         unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
1089         struct ibv_flow_spec_tcp_udp udp = {
1090                 .type = IBV_FLOW_SPEC_UDP | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
1091                 .size = size,
1092         };
1093         int ret;
1094
1095         if (flow->l3_protocol_en && flow->l3_protocol != MLX5_IP_PROTOCOL_UDP)
1096                 return rte_flow_error_set(error, ENOTSUP,
1097                                           RTE_FLOW_ERROR_TYPE_ITEM,
1098                                           item,
1099                                           "protocol filtering not compatible"
1100                                           " with UDP layer");
1101         if (!(flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
1102                               MLX5_FLOW_LAYER_OUTER_L3)))
1103                 return rte_flow_error_set(error, ENOTSUP,
1104                                           RTE_FLOW_ERROR_TYPE_ITEM,
1105                                           item,
1106                                           "L3 is mandatory to filter"
1107                                           " on L4");
1108         if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
1109                             MLX5_FLOW_LAYER_OUTER_L4))
1110                 return rte_flow_error_set(error, ENOTSUP,
1111                                           RTE_FLOW_ERROR_TYPE_ITEM,
1112                                           item,
1113                                           "L4 layer is already"
1114                                           " present");
1115         if (!mask)
1116                 mask = &rte_flow_item_udp_mask;
1117         ret = mlx5_flow_item_acceptable
1118                 (item, (const uint8_t *)mask,
1119                  (const uint8_t *)&rte_flow_item_udp_mask,
1120                  sizeof(struct rte_flow_item_udp), error);
1121         if (ret < 0)
1122                 return ret;
1123         flow->layers |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP :
1124                 MLX5_FLOW_LAYER_OUTER_L4_UDP;
1125         if (spec) {
1126                 udp.val.dst_port = spec->hdr.dst_port;
1127                 udp.val.src_port = spec->hdr.src_port;
1128                 udp.mask.dst_port = mask->hdr.dst_port;
1129                 udp.mask.src_port = mask->hdr.src_port;
1130                 /* Remove unwanted bits from values. */
1131                 udp.val.src_port &= udp.mask.src_port;
1132                 udp.val.dst_port &= udp.mask.dst_port;
1133         }
1134         if (size <= flow_size) {
1135                 mlx5_flow_verbs_hashfields_adjust(flow, tunnel, ETH_RSS_UDP,
1136                                                   (IBV_RX_HASH_SRC_PORT_UDP |
1137                                                    IBV_RX_HASH_DST_PORT_UDP));
1138                 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L4;
1139                 mlx5_flow_spec_verbs_add(flow, &udp, size);
1140         }
1141         return size;
1142 }
1143
1144 /**
1145  * Convert the @p item into a Verbs specification after ensuring the NIC
1146  * will understand and process it correctly.
1147  * If the necessary size for the conversion is greater than the @p flow_size,
1148  * nothing is written in @p flow, the validation is still performed.
1149  *
1150  * @param[in] item
1151  *   Item specification.
1152  * @param[in, out] flow
1153  *   Pointer to flow structure.
1154  * @param[in] flow_size
1155  *   Size in bytes of the available space in @p flow, if too small, nothing is
1156  *   written.
1157  * @param[out] error
1158  *   Pointer to error structure.
1159  *
1160  * @return
1161  *   On success the number of bytes consumed/necessary, if the returned value
1162  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
1163  *   otherwise another call with this returned memory size should be done.
1164  *   On error, a negative errno value is returned and rte_errno is set.
1165  */
1166 static int
1167 mlx5_flow_item_tcp(const struct rte_flow_item *item, struct rte_flow *flow,
1168                    const size_t flow_size, struct rte_flow_error *error)
1169 {
1170         const struct rte_flow_item_tcp *spec = item->spec;
1171         const struct rte_flow_item_tcp *mask = item->mask;
1172         const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
1173         unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
1174         struct ibv_flow_spec_tcp_udp tcp = {
1175                 .type = IBV_FLOW_SPEC_TCP | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
1176                 .size = size,
1177         };
1178         int ret;
1179
1180         if (flow->l3_protocol_en && flow->l3_protocol != MLX5_IP_PROTOCOL_TCP)
1181                 return rte_flow_error_set(error, ENOTSUP,
1182                                           RTE_FLOW_ERROR_TYPE_ITEM,
1183                                           item,
1184                                           "protocol filtering not compatible"
1185                                           " with TCP layer");
1186         if (!(flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
1187                               MLX5_FLOW_LAYER_OUTER_L3)))
1188                 return rte_flow_error_set(error, ENOTSUP,
1189                                           RTE_FLOW_ERROR_TYPE_ITEM,
1190                                           item,
1191                                           "L3 is mandatory to filter on L4");
1192         if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
1193                             MLX5_FLOW_LAYER_OUTER_L4))
1194                 return rte_flow_error_set(error, ENOTSUP,
1195                                           RTE_FLOW_ERROR_TYPE_ITEM,
1196                                           item,
1197                                           "L4 layer is already present");
1198         if (!mask)
1199                 mask = &rte_flow_item_tcp_mask;
1200         ret = mlx5_flow_item_acceptable
1201                 (item, (const uint8_t *)mask,
1202                  (const uint8_t *)&rte_flow_item_tcp_mask,
1203                  sizeof(struct rte_flow_item_tcp), error);
1204         if (ret < 0)
1205                 return ret;
1206         flow->layers |=  tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP :
1207                 MLX5_FLOW_LAYER_OUTER_L4_TCP;
1208         if (spec) {
1209                 tcp.val.dst_port = spec->hdr.dst_port;
1210                 tcp.val.src_port = spec->hdr.src_port;
1211                 tcp.mask.dst_port = mask->hdr.dst_port;
1212                 tcp.mask.src_port = mask->hdr.src_port;
1213                 /* Remove unwanted bits from values. */
1214                 tcp.val.src_port &= tcp.mask.src_port;
1215                 tcp.val.dst_port &= tcp.mask.dst_port;
1216         }
1217         if (size <= flow_size) {
1218                 mlx5_flow_verbs_hashfields_adjust(flow, tunnel, ETH_RSS_TCP,
1219                                                   (IBV_RX_HASH_SRC_PORT_TCP |
1220                                                    IBV_RX_HASH_DST_PORT_TCP));
1221                 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L4;
1222                 mlx5_flow_spec_verbs_add(flow, &tcp, size);
1223         }
1224         return size;
1225 }
1226
1227 /**
1228  * Convert the @p item into a Verbs specification after ensuring the NIC
1229  * will understand and process it correctly.
1230  * If the necessary size for the conversion is greater than the @p flow_size,
1231  * nothing is written in @p flow, the validation is still performed.
1232  *
1233  * @param[in] item
1234  *   Item specification.
1235  * @param[in, out] flow
1236  *   Pointer to flow structure.
1237  * @param[in] flow_size
1238  *   Size in bytes of the available space in @p flow, if too small, nothing is
1239  *   written.
1240  * @param[out] error
1241  *   Pointer to error structure.
1242  *
1243  * @return
1244  *   On success the number of bytes consumed/necessary, if the returned value
1245  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
1246  *   otherwise another call with this returned memory size should be done.
1247  *   On error, a negative errno value is returned and rte_errno is set.
1248  */
1249 static int
1250 mlx5_flow_item_vxlan(const struct rte_flow_item *item, struct rte_flow *flow,
1251                      const size_t flow_size, struct rte_flow_error *error)
1252 {
1253         const struct rte_flow_item_vxlan *spec = item->spec;
1254         const struct rte_flow_item_vxlan *mask = item->mask;
1255         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1256         struct ibv_flow_spec_tunnel vxlan = {
1257                 .type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
1258                 .size = size,
1259         };
1260         int ret;
1261         union vni {
1262                 uint32_t vlan_id;
1263                 uint8_t vni[4];
1264         } id = { .vlan_id = 0, };
1265
1266         if (flow->layers & MLX5_FLOW_LAYER_TUNNEL)
1267                 return rte_flow_error_set(error, ENOTSUP,
1268                                           RTE_FLOW_ERROR_TYPE_ITEM,
1269                                           item,
1270                                           "a tunnel is already present");
1271         /*
1272          * Verify only UDPv4 is present as defined in
1273          * https://tools.ietf.org/html/rfc7348
1274          */
1275         if (!(flow->layers & MLX5_FLOW_LAYER_OUTER_L4_UDP))
1276                 return rte_flow_error_set(error, ENOTSUP,
1277                                           RTE_FLOW_ERROR_TYPE_ITEM,
1278                                           item,
1279                                           "no outer UDP layer found");
1280         if (!mask)
1281                 mask = &rte_flow_item_vxlan_mask;
1282         ret = mlx5_flow_item_acceptable
1283                 (item, (const uint8_t *)mask,
1284                  (const uint8_t *)&rte_flow_item_vxlan_mask,
1285                  sizeof(struct rte_flow_item_vxlan), error);
1286         if (ret < 0)
1287                 return ret;
1288         if (spec) {
1289                 memcpy(&id.vni[1], spec->vni, 3);
1290                 vxlan.val.tunnel_id = id.vlan_id;
1291                 memcpy(&id.vni[1], mask->vni, 3);
1292                 vxlan.mask.tunnel_id = id.vlan_id;
1293                 /* Remove unwanted bits from values. */
1294                 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
1295         }
1296         /*
1297          * Tunnel id 0 is equivalent as not adding a VXLAN layer, if
1298          * only this layer is defined in the Verbs specification it is
1299          * interpreted as wildcard and all packets will match this
1300          * rule, if it follows a full stack layer (ex: eth / ipv4 /
1301          * udp), all packets matching the layers before will also
1302          * match this rule.  To avoid such situation, VNI 0 is
1303          * currently refused.
1304          */
1305         if (!vxlan.val.tunnel_id)
1306                 return rte_flow_error_set(error, EINVAL,
1307                                           RTE_FLOW_ERROR_TYPE_ITEM,
1308                                           item,
1309                                           "VXLAN vni cannot be 0");
1310         if (!(flow->layers & MLX5_FLOW_LAYER_OUTER))
1311                 return rte_flow_error_set(error, EINVAL,
1312                                           RTE_FLOW_ERROR_TYPE_ITEM,
1313                                           item,
1314                                           "VXLAN tunnel must be fully defined");
1315         if (size <= flow_size) {
1316                 mlx5_flow_spec_verbs_add(flow, &vxlan, size);
1317                 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2;
1318         }
1319         flow->layers |= MLX5_FLOW_LAYER_VXLAN;
1320         return size;
1321 }
1322
1323 /**
1324  * Convert the @p item into a Verbs specification after ensuring the NIC
1325  * will understand and process it correctly.
1326  * If the necessary size for the conversion is greater than the @p flow_size,
1327  * nothing is written in @p flow, the validation is still performed.
1328  *
1329  * @param dev
1330  *   Pointer to Ethernet device.
1331  * @param[in] item
1332  *   Item specification.
1333  * @param[in, out] flow
1334  *   Pointer to flow structure.
1335  * @param[in] flow_size
1336  *   Size in bytes of the available space in @p flow, if too small, nothing is
1337  *   written.
1338  * @param[out] error
1339  *   Pointer to error structure.
1340  *
1341  * @return
1342  *   On success the number of bytes consumed/necessary, if the returned value
1343  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
1344  *   otherwise another call with this returned memory size should be done.
1345  *   On error, a negative errno value is returned and rte_errno is set.
1346  */
1347 static int
1348 mlx5_flow_item_vxlan_gpe(struct rte_eth_dev *dev,
1349                          const struct rte_flow_item *item,
1350                          struct rte_flow *flow, const size_t flow_size,
1351                          struct rte_flow_error *error)
1352 {
1353         const struct rte_flow_item_vxlan_gpe *spec = item->spec;
1354         const struct rte_flow_item_vxlan_gpe *mask = item->mask;
1355         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1356         struct ibv_flow_spec_tunnel vxlan_gpe = {
1357                 .type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
1358                 .size = size,
1359         };
1360         int ret;
1361         union vni {
1362                 uint32_t vlan_id;
1363                 uint8_t vni[4];
1364         } id = { .vlan_id = 0, };
1365
1366         if (!((struct priv *)dev->data->dev_private)->config.l3_vxlan_en)
1367                 return rte_flow_error_set(error, ENOTSUP,
1368                                           RTE_FLOW_ERROR_TYPE_ITEM,
1369                                           item,
1370                                           "L3 VXLAN is not enabled by device"
1371                                           " parameter and/or not configured in"
1372                                           " firmware");
1373         if (flow->layers & MLX5_FLOW_LAYER_TUNNEL)
1374                 return rte_flow_error_set(error, ENOTSUP,
1375                                           RTE_FLOW_ERROR_TYPE_ITEM,
1376                                           item,
1377                                           "a tunnel is already present");
1378         /*
1379          * Verify only UDPv4 is present as defined in
1380          * https://tools.ietf.org/html/rfc7348
1381          */
1382         if (!(flow->layers & MLX5_FLOW_LAYER_OUTER_L4_UDP))
1383                 return rte_flow_error_set(error, ENOTSUP,
1384                                           RTE_FLOW_ERROR_TYPE_ITEM,
1385                                           item,
1386                                           "no outer UDP layer found");
1387         if (!mask)
1388                 mask = &rte_flow_item_vxlan_gpe_mask;
1389         ret = mlx5_flow_item_acceptable
1390                 (item, (const uint8_t *)mask,
1391                  (const uint8_t *)&rte_flow_item_vxlan_gpe_mask,
1392                  sizeof(struct rte_flow_item_vxlan_gpe), error);
1393         if (ret < 0)
1394                 return ret;
1395         if (spec) {
1396                 memcpy(&id.vni[1], spec->vni, 3);
1397                 vxlan_gpe.val.tunnel_id = id.vlan_id;
1398                 memcpy(&id.vni[1], mask->vni, 3);
1399                 vxlan_gpe.mask.tunnel_id = id.vlan_id;
1400                 if (spec->protocol)
1401                         return rte_flow_error_set
1402                                 (error, EINVAL,
1403                                  RTE_FLOW_ERROR_TYPE_ITEM,
1404                                  item,
1405                                  "VxLAN-GPE protocol not supported");
1406                 /* Remove unwanted bits from values. */
1407                 vxlan_gpe.val.tunnel_id &= vxlan_gpe.mask.tunnel_id;
1408         }
1409         /*
1410          * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
1411          * layer is defined in the Verbs specification it is interpreted as
1412          * wildcard and all packets will match this rule, if it follows a full
1413          * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
1414          * before will also match this rule.  To avoid such situation, VNI 0
1415          * is currently refused.
1416          */
1417         if (!vxlan_gpe.val.tunnel_id)
1418                 return rte_flow_error_set(error, EINVAL,
1419                                           RTE_FLOW_ERROR_TYPE_ITEM,
1420                                           item,
1421                                           "VXLAN-GPE vni cannot be 0");
1422         if (!(flow->layers & MLX5_FLOW_LAYER_OUTER))
1423                 return rte_flow_error_set(error, EINVAL,
1424                                           RTE_FLOW_ERROR_TYPE_ITEM,
1425                                           item,
1426                                           "VXLAN-GPE tunnel must be fully"
1427                                           " defined");
1428         if (size <= flow_size) {
1429                 mlx5_flow_spec_verbs_add(flow, &vxlan_gpe, size);
1430                 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2;
1431         }
1432         flow->layers |= MLX5_FLOW_LAYER_VXLAN_GPE;
1433         return size;
1434 }
1435
1436 /**
1437  * Update the protocol in Verbs IPv4/IPv6 spec.
1438  *
1439  * @param[in, out] attr
1440  *   Pointer to Verbs attributes structure.
1441  * @param[in] search
1442  *   Specification type to search in order to update the IP protocol.
1443  * @param[in] protocol
1444  *   Protocol value to set if none is present in the specification.
1445  */
1446 static void
1447 mlx5_flow_item_gre_ip_protocol_update(struct ibv_flow_attr *attr,
1448                                       enum ibv_flow_spec_type search,
1449                                       uint8_t protocol)
1450 {
1451         unsigned int i;
1452         struct ibv_spec_header *hdr = (struct ibv_spec_header *)
1453                 ((uint8_t *)attr + sizeof(struct ibv_flow_attr));
1454
1455         if (!attr)
1456                 return;
1457         for (i = 0; i != attr->num_of_specs; ++i) {
1458                 if (hdr->type == search) {
1459                         union {
1460                                 struct ibv_flow_spec_ipv4_ext *ipv4;
1461                                 struct ibv_flow_spec_ipv6 *ipv6;
1462                         } ip;
1463
1464                         switch (search) {
1465                         case IBV_FLOW_SPEC_IPV4_EXT:
1466                                 ip.ipv4 = (struct ibv_flow_spec_ipv4_ext *)hdr;
1467                                 if (!ip.ipv4->val.proto) {
1468                                         ip.ipv4->val.proto = protocol;
1469                                         ip.ipv4->mask.proto = 0xff;
1470                                 }
1471                                 break;
1472                         case IBV_FLOW_SPEC_IPV6:
1473                                 ip.ipv6 = (struct ibv_flow_spec_ipv6 *)hdr;
1474                                 if (!ip.ipv6->val.next_hdr) {
1475                                         ip.ipv6->val.next_hdr = protocol;
1476                                         ip.ipv6->mask.next_hdr = 0xff;
1477                                 }
1478                                 break;
1479                         default:
1480                                 break;
1481                         }
1482                         break;
1483                 }
1484                 hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size);
1485         }
1486 }
1487
1488 /**
1489  * Convert the @p item into a Verbs specification after ensuring the NIC
1490  * will understand and process it correctly.
1491  * It will also update the previous L3 layer with the protocol value matching
1492  * the GRE.
1493  * If the necessary size for the conversion is greater than the @p flow_size,
1494  * nothing is written in @p flow, the validation is still performed.
1495  *
1496  * @param dev
1497  *   Pointer to Ethernet device.
1498  * @param[in] item
1499  *   Item specification.
1500  * @param[in, out] flow
1501  *   Pointer to flow structure.
1502  * @param[in] flow_size
1503  *   Size in bytes of the available space in @p flow, if too small, nothing is
1504  *   written.
1505  * @param[out] error
1506  *   Pointer to error structure.
1507  *
1508  * @return
1509  *   On success the number of bytes consumed/necessary, if the returned value
1510  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
1511  *   otherwise another call with this returned memory size should be done.
1512  *   On error, a negative errno value is returned and rte_errno is set.
1513  */
1514 static int
1515 mlx5_flow_item_gre(const struct rte_flow_item *item,
1516                    struct rte_flow *flow, const size_t flow_size,
1517                    struct rte_flow_error *error)
1518 {
1519         struct mlx5_flow_verbs *verbs = flow->cur_verbs;
1520         const struct rte_flow_item_gre *spec = item->spec;
1521         const struct rte_flow_item_gre *mask = item->mask;
1522 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
1523         unsigned int size = sizeof(struct ibv_flow_spec_gre);
1524         struct ibv_flow_spec_gre tunnel = {
1525                 .type = IBV_FLOW_SPEC_GRE,
1526                 .size = size,
1527         };
1528 #else
1529         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1530         struct ibv_flow_spec_tunnel tunnel = {
1531                 .type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
1532                 .size = size,
1533         };
1534 #endif
1535         int ret;
1536
1537         if (flow->l3_protocol_en && flow->l3_protocol != MLX5_IP_PROTOCOL_GRE)
1538                 return rte_flow_error_set(error, ENOTSUP,
1539                                           RTE_FLOW_ERROR_TYPE_ITEM,
1540                                           item,
1541                                           "protocol filtering not compatible"
1542                                           " with this GRE layer");
1543         if (flow->layers & MLX5_FLOW_LAYER_TUNNEL)
1544                 return rte_flow_error_set(error, ENOTSUP,
1545                                           RTE_FLOW_ERROR_TYPE_ITEM,
1546                                           item,
1547                                           "a tunnel is already present");
1548         if (!(flow->layers & MLX5_FLOW_LAYER_OUTER_L3))
1549                 return rte_flow_error_set(error, ENOTSUP,
1550                                           RTE_FLOW_ERROR_TYPE_ITEM,
1551                                           item,
1552                                           "L3 Layer is missing");
1553         if (!mask)
1554                 mask = &rte_flow_item_gre_mask;
1555         ret = mlx5_flow_item_acceptable
1556                 (item, (const uint8_t *)mask,
1557                  (const uint8_t *)&rte_flow_item_gre_mask,
1558                  sizeof(struct rte_flow_item_gre), error);
1559         if (ret < 0)
1560                 return ret;
1561 #ifdef HAVE_IBV_DEVICE_MPLS_SUPPORT
1562         if (spec) {
1563                 tunnel.val.c_ks_res0_ver = spec->c_rsvd0_ver;
1564                 tunnel.val.protocol = spec->protocol;
1565                 tunnel.mask.c_ks_res0_ver = mask->c_rsvd0_ver;
1566                 tunnel.mask.protocol = mask->protocol;
1567                 /* Remove unwanted bits from values. */
1568                 tunnel.val.c_ks_res0_ver &= tunnel.mask.c_ks_res0_ver;
1569                 tunnel.val.protocol &= tunnel.mask.protocol;
1570                 tunnel.val.key &= tunnel.mask.key;
1571         }
1572 #else
1573         if (spec && (spec->protocol & mask->protocol))
1574                 return rte_flow_error_set(error, ENOTSUP,
1575                                           RTE_FLOW_ERROR_TYPE_ITEM,
1576                                           item,
1577                                           "without MPLS support the"
1578                                           " specification cannot be used for"
1579                                           " filtering");
1580 #endif /* !HAVE_IBV_DEVICE_MPLS_SUPPORT */
1581         if (size <= flow_size) {
1582                 if (flow->layers & MLX5_FLOW_LAYER_OUTER_L3_IPV4)
1583                         mlx5_flow_item_gre_ip_protocol_update
1584                                 (verbs->attr, IBV_FLOW_SPEC_IPV4_EXT,
1585                                  MLX5_IP_PROTOCOL_GRE);
1586                 else
1587                         mlx5_flow_item_gre_ip_protocol_update
1588                                 (verbs->attr, IBV_FLOW_SPEC_IPV6,
1589                                  MLX5_IP_PROTOCOL_GRE);
1590                 mlx5_flow_spec_verbs_add(flow, &tunnel, size);
1591                 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2;
1592         }
1593         flow->layers |= MLX5_FLOW_LAYER_GRE;
1594         return size;
1595 }
1596
1597 /**
1598  * Convert the @p pattern into a Verbs specifications after ensuring the NIC
1599  * will understand and process it correctly.
1600  * The conversion is performed item per item, each of them is written into
1601  * the @p flow if its size is lesser or equal to @p flow_size.
1602  * Validation and memory consumption computation are still performed until the
1603  * end of @p pattern, unless an error is encountered.
1604  *
1605  * @param[in] pattern
1606  *   Flow pattern.
1607  * @param[in, out] flow
1608  *   Pointer to the rte_flow structure.
1609  * @param[in] flow_size
1610  *   Size in bytes of the available space in @p flow, if too small some
1611  *   garbage may be present.
1612  * @param[out] error
1613  *   Pointer to error structure.
1614  *
1615  * @return
1616  *   On success the number of bytes consumed/necessary, if the returned value
1617  *   is lesser or equal to @p flow_size, the @pattern  has fully been
1618  *   converted, otherwise another call with this returned memory size should
1619  *   be done.
1620  *   On error, a negative errno value is returned and rte_errno is set.
1621  */
1622 static int
1623 mlx5_flow_items(struct rte_eth_dev *dev,
1624                 const struct rte_flow_item pattern[],
1625                 struct rte_flow *flow, const size_t flow_size,
1626                 struct rte_flow_error *error)
1627 {
1628         int remain = flow_size;
1629         size_t size = 0;
1630
1631         for (; pattern->type != RTE_FLOW_ITEM_TYPE_END; pattern++) {
1632                 int ret = 0;
1633
1634                 switch (pattern->type) {
1635                 case RTE_FLOW_ITEM_TYPE_VOID:
1636                         break;
1637                 case RTE_FLOW_ITEM_TYPE_ETH:
1638                         ret = mlx5_flow_item_eth(pattern, flow, remain, error);
1639                         break;
1640                 case RTE_FLOW_ITEM_TYPE_VLAN:
1641                         ret = mlx5_flow_item_vlan(pattern, flow, remain, error);
1642                         break;
1643                 case RTE_FLOW_ITEM_TYPE_IPV4:
1644                         ret = mlx5_flow_item_ipv4(pattern, flow, remain, error);
1645                         break;
1646                 case RTE_FLOW_ITEM_TYPE_IPV6:
1647                         ret = mlx5_flow_item_ipv6(pattern, flow, remain, error);
1648                         break;
1649                 case RTE_FLOW_ITEM_TYPE_UDP:
1650                         ret = mlx5_flow_item_udp(pattern, flow, remain, error);
1651                         break;
1652                 case RTE_FLOW_ITEM_TYPE_TCP:
1653                         ret = mlx5_flow_item_tcp(pattern, flow, remain, error);
1654                         break;
1655                 case RTE_FLOW_ITEM_TYPE_VXLAN:
1656                         ret = mlx5_flow_item_vxlan(pattern, flow, remain,
1657                                                    error);
1658                         break;
1659                 case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
1660                         ret = mlx5_flow_item_vxlan_gpe(dev, pattern, flow,
1661                                                        remain, error);
1662                         break;
1663                 case RTE_FLOW_ITEM_TYPE_GRE:
1664                         ret = mlx5_flow_item_gre(pattern, flow, remain, error);
1665                         break;
1666                 default:
1667                         return rte_flow_error_set(error, ENOTSUP,
1668                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1669                                                   pattern,
1670                                                   "item not supported");
1671                 }
1672                 if (ret < 0)
1673                         return ret;
1674                 if (remain > ret)
1675                         remain -= ret;
1676                 else
1677                         remain = 0;
1678                 size += ret;
1679         }
1680         if (!flow->layers) {
1681                 const struct rte_flow_item item = {
1682                         .type = RTE_FLOW_ITEM_TYPE_ETH,
1683                 };
1684
1685                 return mlx5_flow_item_eth(&item, flow, flow_size, error);
1686         }
1687         return size;
1688 }
1689
1690 /**
1691  * Convert the @p action into a Verbs specification after ensuring the NIC
1692  * will understand and process it correctly.
1693  * If the necessary size for the conversion is greater than the @p flow_size,
1694  * nothing is written in @p flow, the validation is still performed.
1695  *
1696  * @param[in] action
1697  *   Action configuration.
1698  * @param[in, out] flow
1699  *   Pointer to flow structure.
1700  * @param[in] flow_size
1701  *   Size in bytes of the available space in @p flow, if too small, nothing is
1702  *   written.
1703  * @param[out] error
1704  *   Pointer to error structure.
1705  *
1706  * @return
1707  *   On success the number of bytes consumed/necessary, if the returned value
1708  *   is lesser or equal to @p flow_size, the @p action has fully been
1709  *   converted, otherwise another call with this returned memory size should
1710  *   be done.
1711  *   On error, a negative errno value is returned and rte_errno is set.
1712  */
1713 static int
1714 mlx5_flow_action_drop(const struct rte_flow_action *action,
1715                       struct rte_flow *flow, const size_t flow_size,
1716                       struct rte_flow_error *error)
1717 {
1718         unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
1719         struct ibv_flow_spec_action_drop drop = {
1720                         .type = IBV_FLOW_SPEC_ACTION_DROP,
1721                         .size = size,
1722         };
1723
1724         if (flow->fate)
1725                 return rte_flow_error_set(error, ENOTSUP,
1726                                           RTE_FLOW_ERROR_TYPE_ACTION,
1727                                           action,
1728                                           "multiple fate actions are not"
1729                                           " supported");
1730         if (flow->modifier & (MLX5_FLOW_MOD_FLAG | MLX5_FLOW_MOD_MARK))
1731                 return rte_flow_error_set(error, ENOTSUP,
1732                                           RTE_FLOW_ERROR_TYPE_ACTION,
1733                                           action,
1734                                           "drop is not compatible with"
1735                                           " flag/mark action");
1736         if (size < flow_size)
1737                 mlx5_flow_spec_verbs_add(flow, &drop, size);
1738         flow->fate |= MLX5_FLOW_FATE_DROP;
1739         return size;
1740 }
1741
1742 /**
1743  * Convert the @p action into @p flow after ensuring the NIC will understand
1744  * and process it correctly.
1745  *
1746  * @param[in] dev
1747  *   Pointer to Ethernet device structure.
1748  * @param[in] action
1749  *   Action configuration.
1750  * @param[in, out] flow
1751  *   Pointer to flow structure.
1752  * @param[out] error
1753  *   Pointer to error structure.
1754  *
1755  * @return
1756  *   0 on success, a negative errno value otherwise and rte_errno is set.
1757  */
1758 static int
1759 mlx5_flow_action_queue(struct rte_eth_dev *dev,
1760                        const struct rte_flow_action *action,
1761                        struct rte_flow *flow,
1762                        struct rte_flow_error *error)
1763 {
1764         struct priv *priv = dev->data->dev_private;
1765         const struct rte_flow_action_queue *queue = action->conf;
1766
1767         if (flow->fate)
1768                 return rte_flow_error_set(error, ENOTSUP,
1769                                           RTE_FLOW_ERROR_TYPE_ACTION,
1770                                           action,
1771                                           "multiple fate actions are not"
1772                                           " supported");
1773         if (queue->index >= priv->rxqs_n)
1774                 return rte_flow_error_set(error, EINVAL,
1775                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1776                                           &queue->index,
1777                                           "queue index out of range");
1778         if (!(*priv->rxqs)[queue->index])
1779                 return rte_flow_error_set(error, EINVAL,
1780                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1781                                           &queue->index,
1782                                           "queue is not configured");
1783         if (flow->queue)
1784                 (*flow->queue)[0] = queue->index;
1785         flow->rss.queue_num = 1;
1786         flow->fate |= MLX5_FLOW_FATE_QUEUE;
1787         return 0;
1788 }
1789
1790 /**
1791  * Ensure the @p action will be understood and used correctly by the  NIC.
1792  *
1793  * @param dev
1794  *   Pointer to Ethernet device structure.
1795  * @param action[in]
1796  *   Pointer to flow actions array.
1797  * @param flow[in, out]
1798  *   Pointer to the rte_flow structure.
1799  * @param error[in, out]
1800  *   Pointer to error structure.
1801  *
1802  * @return
1803  *   On success @p flow->queue array and @p flow->rss are filled and valid.
1804  *   On error, a negative errno value is returned and rte_errno is set.
1805  */
1806 static int
1807 mlx5_flow_action_rss(struct rte_eth_dev *dev,
1808                      const struct rte_flow_action *action,
1809                      struct rte_flow *flow,
1810                      struct rte_flow_error *error)
1811 {
1812         struct priv *priv = dev->data->dev_private;
1813         const struct rte_flow_action_rss *rss = action->conf;
1814         unsigned int i;
1815
1816         if (flow->fate)
1817                 return rte_flow_error_set(error, ENOTSUP,
1818                                           RTE_FLOW_ERROR_TYPE_ACTION,
1819                                           action,
1820                                           "multiple fate actions are not"
1821                                           " supported");
1822         if (rss->func != RTE_ETH_HASH_FUNCTION_DEFAULT &&
1823             rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ)
1824                 return rte_flow_error_set(error, ENOTSUP,
1825                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1826                                           &rss->func,
1827                                           "RSS hash function not supported");
1828 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
1829         if (rss->level > 2)
1830 #else
1831         if (rss->level > 1)
1832 #endif
1833                 return rte_flow_error_set(error, ENOTSUP,
1834                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1835                                           &rss->level,
1836                                           "tunnel RSS is not supported");
1837         if (rss->key_len < MLX5_RSS_HASH_KEY_LEN)
1838                 return rte_flow_error_set(error, ENOTSUP,
1839                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1840                                           &rss->key_len,
1841                                           "RSS hash key too small");
1842         if (rss->key_len > MLX5_RSS_HASH_KEY_LEN)
1843                 return rte_flow_error_set(error, ENOTSUP,
1844                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1845                                           &rss->key_len,
1846                                           "RSS hash key too large");
1847         if (rss->queue_num > priv->config.ind_table_max_size)
1848                 return rte_flow_error_set(error, ENOTSUP,
1849                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1850                                           &rss->queue_num,
1851                                           "number of queues too large");
1852         if (rss->types & MLX5_RSS_HF_MASK)
1853                 return rte_flow_error_set(error, ENOTSUP,
1854                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1855                                           &rss->types,
1856                                           "some RSS protocols are not"
1857                                           " supported");
1858         for (i = 0; i != rss->queue_num; ++i) {
1859                 if (!(*priv->rxqs)[rss->queue[i]])
1860                         return rte_flow_error_set
1861                                 (error, EINVAL,
1862                                  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1863                                  &rss->queue[i],
1864                                  "queue is not configured");
1865         }
1866         if (flow->queue)
1867                 memcpy((*flow->queue), rss->queue,
1868                        rss->queue_num * sizeof(uint16_t));
1869         flow->rss.queue_num = rss->queue_num;
1870         memcpy(flow->key, rss->key, MLX5_RSS_HASH_KEY_LEN);
1871         flow->rss.types = rss->types;
1872         flow->rss.level = rss->level;
1873         flow->fate |= MLX5_FLOW_FATE_RSS;
1874         return 0;
1875 }
1876
1877 /**
1878  * Convert the @p action into a Verbs specification after ensuring the NIC
1879  * will understand and process it correctly.
1880  * If the necessary size for the conversion is greater than the @p flow_size,
1881  * nothing is written in @p flow, the validation is still performed.
1882  *
1883  * @param[in] action
1884  *   Action configuration.
1885  * @param[in, out] flow
1886  *   Pointer to flow structure.
1887  * @param[in] flow_size
1888  *   Size in bytes of the available space in @p flow, if too small, nothing is
1889  *   written.
1890  * @param[out] error
1891  *   Pointer to error structure.
1892  *
1893  * @return
1894  *   On success the number of bytes consumed/necessary, if the returned value
1895  *   is lesser or equal to @p flow_size, the @p action has fully been
1896  *   converted, otherwise another call with this returned memory size should
1897  *   be done.
1898  *   On error, a negative errno value is returned and rte_errno is set.
1899  */
1900 static int
1901 mlx5_flow_action_flag(const struct rte_flow_action *action,
1902                       struct rte_flow *flow, const size_t flow_size,
1903                       struct rte_flow_error *error)
1904 {
1905         unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1906         struct ibv_flow_spec_action_tag tag = {
1907                 .type = IBV_FLOW_SPEC_ACTION_TAG,
1908                 .size = size,
1909                 .tag_id = mlx5_flow_mark_set(MLX5_FLOW_MARK_DEFAULT),
1910         };
1911         struct mlx5_flow_verbs *verbs = flow->cur_verbs;
1912
1913         if (flow->modifier & MLX5_FLOW_MOD_FLAG)
1914                 return rte_flow_error_set(error, ENOTSUP,
1915                                           RTE_FLOW_ERROR_TYPE_ACTION,
1916                                           action,
1917                                           "flag action already present");
1918         if (flow->fate & MLX5_FLOW_FATE_DROP)
1919                 return rte_flow_error_set(error, ENOTSUP,
1920                                           RTE_FLOW_ERROR_TYPE_ACTION,
1921                                           action,
1922                                           "flag is not compatible with drop"
1923                                           " action");
1924         if (flow->modifier & MLX5_FLOW_MOD_MARK)
1925                 size = 0;
1926         else if (size <= flow_size && verbs)
1927                 mlx5_flow_spec_verbs_add(flow, &tag, size);
1928         flow->modifier |= MLX5_FLOW_MOD_FLAG;
1929         return size;
1930 }
1931
1932 /**
1933  * Update verbs specification to modify the flag to mark.
1934  *
1935  * @param[in, out] verbs
1936  *   Pointer to the mlx5_flow_verbs structure.
1937  * @param[in] mark_id
1938  *   Mark identifier to replace the flag.
1939  */
1940 static void
1941 mlx5_flow_verbs_mark_update(struct mlx5_flow_verbs *verbs, uint32_t mark_id)
1942 {
1943         struct ibv_spec_header *hdr;
1944         int i;
1945
1946         if (!verbs)
1947                 return;
1948         /* Update Verbs specification. */
1949         hdr = (struct ibv_spec_header *)verbs->specs;
1950         if (!hdr)
1951                 return;
1952         for (i = 0; i != verbs->attr->num_of_specs; ++i) {
1953                 if (hdr->type == IBV_FLOW_SPEC_ACTION_TAG) {
1954                         struct ibv_flow_spec_action_tag *t =
1955                                 (struct ibv_flow_spec_action_tag *)hdr;
1956
1957                         t->tag_id = mlx5_flow_mark_set(mark_id);
1958                 }
1959                 hdr = (struct ibv_spec_header *)((uintptr_t)hdr + hdr->size);
1960         }
1961 }
1962
1963 /**
1964  * Convert the @p action into @p flow (or by updating the already present
1965  * Flag Verbs specification) after ensuring the NIC will understand and
1966  * process it correctly.
1967  * If the necessary size for the conversion is greater than the @p flow_size,
1968  * nothing is written in @p flow, the validation is still performed.
1969  *
1970  * @param[in] action
1971  *   Action configuration.
1972  * @param[in, out] flow
1973  *   Pointer to flow structure.
1974  * @param[in] flow_size
1975  *   Size in bytes of the available space in @p flow, if too small, nothing is
1976  *   written.
1977  * @param[out] error
1978  *   Pointer to error structure.
1979  *
1980  * @return
1981  *   On success the number of bytes consumed/necessary, if the returned value
1982  *   is lesser or equal to @p flow_size, the @p action has fully been
1983  *   converted, otherwise another call with this returned memory size should
1984  *   be done.
1985  *   On error, a negative errno value is returned and rte_errno is set.
1986  */
1987 static int
1988 mlx5_flow_action_mark(const struct rte_flow_action *action,
1989                       struct rte_flow *flow, const size_t flow_size,
1990                       struct rte_flow_error *error)
1991 {
1992         const struct rte_flow_action_mark *mark = action->conf;
1993         unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1994         struct ibv_flow_spec_action_tag tag = {
1995                 .type = IBV_FLOW_SPEC_ACTION_TAG,
1996                 .size = size,
1997         };
1998         struct mlx5_flow_verbs *verbs = flow->cur_verbs;
1999
2000         if (!mark)
2001                 return rte_flow_error_set(error, EINVAL,
2002                                           RTE_FLOW_ERROR_TYPE_ACTION,
2003                                           action,
2004                                           "configuration cannot be null");
2005         if (mark->id >= MLX5_FLOW_MARK_MAX)
2006                 return rte_flow_error_set(error, EINVAL,
2007                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
2008                                           &mark->id,
2009                                           "mark id must in 0 <= id < "
2010                                           RTE_STR(MLX5_FLOW_MARK_MAX));
2011         if (flow->modifier & MLX5_FLOW_MOD_MARK)
2012                 return rte_flow_error_set(error, ENOTSUP,
2013                                           RTE_FLOW_ERROR_TYPE_ACTION,
2014                                           action,
2015                                           "mark action already present");
2016         if (flow->fate & MLX5_FLOW_FATE_DROP)
2017                 return rte_flow_error_set(error, ENOTSUP,
2018                                           RTE_FLOW_ERROR_TYPE_ACTION,
2019                                           action,
2020                                           "mark is not compatible with drop"
2021                                           " action");
2022         if (flow->modifier & MLX5_FLOW_MOD_FLAG) {
2023                 mlx5_flow_verbs_mark_update(verbs, mark->id);
2024                 size = 0;
2025         } else if (size <= flow_size) {
2026                 tag.tag_id = mlx5_flow_mark_set(mark->id);
2027                 mlx5_flow_spec_verbs_add(flow, &tag, size);
2028         }
2029         flow->modifier |= MLX5_FLOW_MOD_MARK;
2030         return size;
2031 }
2032
2033 /**
2034  * Convert the @p action into @p flow after ensuring the NIC will understand
2035  * and process it correctly.
2036  * The conversion is performed action per action, each of them is written into
2037  * the @p flow if its size is lesser or equal to @p flow_size.
2038  * Validation and memory consumption computation are still performed until the
2039  * end of @p action, unless an error is encountered.
2040  *
2041  * @param[in] dev
2042  *   Pointer to Ethernet device structure.
2043  * @param[in] actions
2044  *   Pointer to flow actions array.
2045  * @param[in, out] flow
2046  *   Pointer to the rte_flow structure.
2047  * @param[in] flow_size
2048  *   Size in bytes of the available space in @p flow, if too small some
2049  *   garbage may be present.
2050  * @param[out] error
2051  *   Pointer to error structure.
2052  *
2053  * @return
2054  *   On success the number of bytes consumed/necessary, if the returned value
2055  *   is lesser or equal to @p flow_size, the @p actions has fully been
2056  *   converted, otherwise another call with this returned memory size should
2057  *   be done.
2058  *   On error, a negative errno value is returned and rte_errno is set.
2059  */
2060 static int
2061 mlx5_flow_actions(struct rte_eth_dev *dev,
2062                   const struct rte_flow_action actions[],
2063                   struct rte_flow *flow, const size_t flow_size,
2064                   struct rte_flow_error *error)
2065 {
2066         size_t size = 0;
2067         int remain = flow_size;
2068         int ret = 0;
2069
2070         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
2071                 switch (actions->type) {
2072                 case RTE_FLOW_ACTION_TYPE_VOID:
2073                         break;
2074                 case RTE_FLOW_ACTION_TYPE_FLAG:
2075                         ret = mlx5_flow_action_flag(actions, flow, remain,
2076                                                     error);
2077                         break;
2078                 case RTE_FLOW_ACTION_TYPE_MARK:
2079                         ret = mlx5_flow_action_mark(actions, flow, remain,
2080                                                     error);
2081                         break;
2082                 case RTE_FLOW_ACTION_TYPE_DROP:
2083                         ret = mlx5_flow_action_drop(actions, flow, remain,
2084                                                     error);
2085                         break;
2086                 case RTE_FLOW_ACTION_TYPE_QUEUE:
2087                         ret = mlx5_flow_action_queue(dev, actions, flow, error);
2088                         break;
2089                 case RTE_FLOW_ACTION_TYPE_RSS:
2090                         ret = mlx5_flow_action_rss(dev, actions, flow, error);
2091                         break;
2092                 default:
2093                         return rte_flow_error_set(error, ENOTSUP,
2094                                                   RTE_FLOW_ERROR_TYPE_ACTION,
2095                                                   actions,
2096                                                   "action not supported");
2097                 }
2098                 if (ret < 0)
2099                         return ret;
2100                 if (remain > ret)
2101                         remain -= ret;
2102                 else
2103                         remain = 0;
2104                 size += ret;
2105         }
2106         if (!flow->fate)
2107                 return rte_flow_error_set(error, ENOTSUP,
2108                                           RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2109                                           NULL,
2110                                           "no fate action found");
2111         return size;
2112 }
2113
2114 /**
2115  * Convert the @p attributes, @p pattern, @p action, into an flow for the NIC
2116  * after ensuring the NIC will understand and process it correctly.
2117  * The conversion is only performed item/action per item/action, each of
2118  * them is written into the @p flow if its size is lesser or equal to @p
2119  * flow_size.
2120  * Validation and memory consumption computation are still performed until the
2121  * end, unless an error is encountered.
2122  *
2123  * @param[in] dev
2124  *   Pointer to Ethernet device.
2125  * @param[in, out] flow
2126  *   Pointer to flow structure.
2127  * @param[in] flow_size
2128  *   Size in bytes of the available space in @p flow, if too small some
2129  *   garbage may be present.
2130  * @param[in] attributes
2131  *   Flow rule attributes.
2132  * @param[in] pattern
2133  *   Pattern specification (list terminated by the END pattern item).
2134  * @param[in] actions
2135  *   Associated actions (list terminated by the END action).
2136  * @param[out] error
2137  *   Perform verbose error reporting if not NULL.
2138  *
2139  * @return
2140  *   On success the number of bytes consumed/necessary, if the returned value
2141  *   is lesser or equal to @p flow_size, the flow has fully been converted and
2142  *   can be applied, otherwise another call with this returned memory size
2143  *   should be done.
2144  *   On error, a negative errno value is returned and rte_errno is set.
2145  */
2146 static int
2147 mlx5_flow_merge(struct rte_eth_dev *dev, struct rte_flow *flow,
2148                 const size_t flow_size,
2149                 const struct rte_flow_attr *attributes,
2150                 const struct rte_flow_item pattern[],
2151                 const struct rte_flow_action actions[],
2152                 struct rte_flow_error *error)
2153 {
2154         struct rte_flow local_flow = { .layers = 0, };
2155         size_t size = sizeof(*flow);
2156         union {
2157                 struct rte_flow_expand_rss buf;
2158                 uint8_t buffer[2048];
2159         } expand_buffer;
2160         struct rte_flow_expand_rss *buf = &expand_buffer.buf;
2161         struct mlx5_flow_verbs *original_verbs = NULL;
2162         size_t original_verbs_size = 0;
2163         uint32_t original_layers = 0;
2164         int expanded_pattern_idx = 0;
2165         int ret;
2166         uint32_t i;
2167
2168         if (size > flow_size)
2169                 flow = &local_flow;
2170         ret = mlx5_flow_attributes(dev, attributes, flow, error);
2171         if (ret < 0)
2172                 return ret;
2173         ret = mlx5_flow_actions(dev, actions, &local_flow, 0, error);
2174         if (ret < 0)
2175                 return ret;
2176         if (local_flow.rss.types) {
2177                 ret = rte_flow_expand_rss(buf, sizeof(expand_buffer.buffer),
2178                                           pattern, local_flow.rss.types,
2179                                           mlx5_support_expansion,
2180                                           local_flow.rss.level < 2 ?
2181                                           MLX5_EXPANSION_ROOT :
2182                                           MLX5_EXPANSION_ROOT_OUTER);
2183                 assert(ret > 0 &&
2184                        (unsigned int)ret < sizeof(expand_buffer.buffer));
2185         } else {
2186                 buf->entries = 1;
2187                 buf->entry[0].pattern = (void *)(uintptr_t)pattern;
2188         }
2189         size += RTE_ALIGN_CEIL(local_flow.rss.queue_num * sizeof(uint16_t),
2190                                sizeof(void *));
2191         if (size <= flow_size)
2192                 flow->queue = (void *)(flow + 1);
2193         LIST_INIT(&flow->verbs);
2194         flow->layers = 0;
2195         flow->modifier = 0;
2196         flow->fate = 0;
2197         for (i = 0; i != buf->entries; ++i) {
2198                 size_t off = size;
2199                 size_t off2;
2200
2201                 flow->layers = original_layers;
2202                 size += sizeof(struct ibv_flow_attr) +
2203                         sizeof(struct mlx5_flow_verbs);
2204                 off2 = size;
2205                 if (size < flow_size) {
2206                         flow->cur_verbs = (void *)((uintptr_t)flow + off);
2207                         flow->cur_verbs->attr = (void *)(flow->cur_verbs + 1);
2208                         flow->cur_verbs->specs =
2209                                 (void *)(flow->cur_verbs->attr + 1);
2210                 }
2211                 /* First iteration convert the pattern into Verbs. */
2212                 if (i == 0) {
2213                         /* Actions don't need to be converted several time. */
2214                         ret = mlx5_flow_actions(dev, actions, flow,
2215                                                 (size < flow_size) ?
2216                                                 flow_size - size : 0,
2217                                                 error);
2218                         if (ret < 0)
2219                                 return ret;
2220                         size += ret;
2221                 } else {
2222                         /*
2223                          * Next iteration means the pattern has already been
2224                          * converted and an expansion is necessary to match
2225                          * the user RSS request.  For that only the expanded
2226                          * items will be converted, the common part with the
2227                          * user pattern are just copied into the next buffer
2228                          * zone.
2229                          */
2230                         size += original_verbs_size;
2231                         if (size < flow_size) {
2232                                 rte_memcpy(flow->cur_verbs->attr,
2233                                            original_verbs->attr,
2234                                            original_verbs_size +
2235                                            sizeof(struct ibv_flow_attr));
2236                                 flow->cur_verbs->size = original_verbs_size;
2237                         }
2238                 }
2239                 ret = mlx5_flow_items
2240                         (dev,
2241                          (const struct rte_flow_item *)
2242                          &buf->entry[i].pattern[expanded_pattern_idx],
2243                          flow,
2244                          (size < flow_size) ? flow_size - size : 0, error);
2245                 if (ret < 0)
2246                         return ret;
2247                 size += ret;
2248                 if (size <= flow_size) {
2249                         mlx5_flow_adjust_priority(dev, flow);
2250                         LIST_INSERT_HEAD(&flow->verbs, flow->cur_verbs, next);
2251                 }
2252                 /*
2253                  * Keep a pointer of the first verbs conversion and the layers
2254                  * it has encountered.
2255                  */
2256                 if (i == 0) {
2257                         original_verbs = flow->cur_verbs;
2258                         original_verbs_size = size - off2;
2259                         original_layers = flow->layers;
2260                         /*
2261                          * move the index of the expanded pattern to the
2262                          * first item not addressed yet.
2263                          */
2264                         if (pattern->type == RTE_FLOW_ITEM_TYPE_END) {
2265                                 expanded_pattern_idx++;
2266                         } else {
2267                                 const struct rte_flow_item *item = pattern;
2268
2269                                 for (item = pattern;
2270                                      item->type != RTE_FLOW_ITEM_TYPE_END;
2271                                      ++item)
2272                                         expanded_pattern_idx++;
2273                         }
2274                 }
2275         }
2276         /* Restore the origin layers in the flow. */
2277         flow->layers = original_layers;
2278         return size;
2279 }
2280
2281 /**
2282  * Lookup and set the ptype in the data Rx part.  A single Ptype can be used,
2283  * if several tunnel rules are used on this queue, the tunnel ptype will be
2284  * cleared.
2285  *
2286  * @param rxq_ctrl
2287  *   Rx queue to update.
2288  */
2289 static void
2290 mlx5_flow_rxq_tunnel_ptype_update(struct mlx5_rxq_ctrl *rxq_ctrl)
2291 {
2292         unsigned int i;
2293         uint32_t tunnel_ptype = 0;
2294
2295         /* Look up for the ptype to use. */
2296         for (i = 0; i != MLX5_FLOW_TUNNEL; ++i) {
2297                 if (!rxq_ctrl->flow_tunnels_n[i])
2298                         continue;
2299                 if (!tunnel_ptype) {
2300                         tunnel_ptype = tunnels_info[i].ptype;
2301                 } else {
2302                         tunnel_ptype = 0;
2303                         break;
2304                 }
2305         }
2306         rxq_ctrl->rxq.tunnel = tunnel_ptype;
2307 }
2308
2309 /**
2310  * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) according to the flow.
2311  *
2312  * @param[in] dev
2313  *   Pointer to Ethernet device.
2314  * @param[in] flow
2315  *   Pointer to flow structure.
2316  */
2317 static void
2318 mlx5_flow_rxq_flags_set(struct rte_eth_dev *dev, struct rte_flow *flow)
2319 {
2320         struct priv *priv = dev->data->dev_private;
2321         const int mark = !!(flow->modifier &
2322                             (MLX5_FLOW_MOD_FLAG | MLX5_FLOW_MOD_MARK));
2323         const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
2324         unsigned int i;
2325
2326         for (i = 0; i != flow->rss.queue_num; ++i) {
2327                 int idx = (*flow->queue)[i];
2328                 struct mlx5_rxq_ctrl *rxq_ctrl =
2329                         container_of((*priv->rxqs)[idx],
2330                                      struct mlx5_rxq_ctrl, rxq);
2331
2332                 if (mark) {
2333                         rxq_ctrl->rxq.mark = 1;
2334                         rxq_ctrl->flow_mark_n++;
2335                 }
2336                 if (tunnel) {
2337                         unsigned int j;
2338
2339                         /* Increase the counter matching the flow. */
2340                         for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) {
2341                                 if ((tunnels_info[j].tunnel & flow->layers) ==
2342                                     tunnels_info[j].tunnel) {
2343                                         rxq_ctrl->flow_tunnels_n[j]++;
2344                                         break;
2345                                 }
2346                         }
2347                         mlx5_flow_rxq_tunnel_ptype_update(rxq_ctrl);
2348                 }
2349         }
2350 }
2351
2352 /**
2353  * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the
2354  * @p flow if no other flow uses it with the same kind of request.
2355  *
2356  * @param dev
2357  *   Pointer to Ethernet device.
2358  * @param[in] flow
2359  *   Pointer to the flow.
2360  */
2361 static void
2362 mlx5_flow_rxq_flags_trim(struct rte_eth_dev *dev, struct rte_flow *flow)
2363 {
2364         struct priv *priv = dev->data->dev_private;
2365         const int mark = !!(flow->modifier &
2366                             (MLX5_FLOW_MOD_FLAG | MLX5_FLOW_MOD_MARK));
2367         const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
2368         unsigned int i;
2369
2370         assert(dev->data->dev_started);
2371         for (i = 0; i != flow->rss.queue_num; ++i) {
2372                 int idx = (*flow->queue)[i];
2373                 struct mlx5_rxq_ctrl *rxq_ctrl =
2374                         container_of((*priv->rxqs)[idx],
2375                                      struct mlx5_rxq_ctrl, rxq);
2376
2377                 if (mark) {
2378                         rxq_ctrl->flow_mark_n--;
2379                         rxq_ctrl->rxq.mark = !!rxq_ctrl->flow_mark_n;
2380                 }
2381                 if (tunnel) {
2382                         unsigned int j;
2383
2384                         /* Decrease the counter matching the flow. */
2385                         for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) {
2386                                 if ((tunnels_info[j].tunnel & flow->layers) ==
2387                                     tunnels_info[j].tunnel) {
2388                                         rxq_ctrl->flow_tunnels_n[j]--;
2389                                         break;
2390                                 }
2391                         }
2392                         mlx5_flow_rxq_tunnel_ptype_update(rxq_ctrl);
2393                 }
2394         }
2395 }
2396
2397 /**
2398  * Clear the Mark/Flag and Tunnel ptype information in all Rx queues.
2399  *
2400  * @param dev
2401  *   Pointer to Ethernet device.
2402  */
2403 static void
2404 mlx5_flow_rxq_flags_clear(struct rte_eth_dev *dev)
2405 {
2406         struct priv *priv = dev->data->dev_private;
2407         unsigned int i;
2408         unsigned int idx;
2409
2410         for (idx = 0, i = 0; idx != priv->rxqs_n; ++i) {
2411                 struct mlx5_rxq_ctrl *rxq_ctrl;
2412                 unsigned int j;
2413
2414                 if (!(*priv->rxqs)[idx])
2415                         continue;
2416                 rxq_ctrl = container_of((*priv->rxqs)[idx],
2417                                         struct mlx5_rxq_ctrl, rxq);
2418                 rxq_ctrl->flow_mark_n = 0;
2419                 rxq_ctrl->rxq.mark = 0;
2420                 for (j = 0; j != MLX5_FLOW_TUNNEL; ++j)
2421                         rxq_ctrl->flow_tunnels_n[j] = 0;
2422                 rxq_ctrl->rxq.tunnel = 0;
2423                 ++idx;
2424         }
2425 }
2426
2427 /**
2428  * Validate a flow supported by the NIC.
2429  *
2430  * @see rte_flow_validate()
2431  * @see rte_flow_ops
2432  */
2433 int
2434 mlx5_flow_validate(struct rte_eth_dev *dev,
2435                    const struct rte_flow_attr *attr,
2436                    const struct rte_flow_item items[],
2437                    const struct rte_flow_action actions[],
2438                    struct rte_flow_error *error)
2439 {
2440         int ret = mlx5_flow_merge(dev, NULL, 0, attr, items, actions, error);
2441
2442         if (ret < 0)
2443                 return ret;
2444         return 0;
2445 }
2446
2447 /**
2448  * Remove the flow.
2449  *
2450  * @param[in] dev
2451  *   Pointer to Ethernet device.
2452  * @param[in, out] flow
2453  *   Pointer to flow structure.
2454  */
2455 static void
2456 mlx5_flow_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
2457 {
2458         struct mlx5_flow_verbs *verbs;
2459
2460         LIST_FOREACH(verbs, &flow->verbs, next) {
2461                 if (verbs->flow) {
2462                         claim_zero(mlx5_glue->destroy_flow(verbs->flow));
2463                         verbs->flow = NULL;
2464                 }
2465                 if (verbs->hrxq) {
2466                         if (flow->fate & MLX5_FLOW_FATE_DROP)
2467                                 mlx5_hrxq_drop_release(dev);
2468                         else
2469                                 mlx5_hrxq_release(dev, verbs->hrxq);
2470                         verbs->hrxq = NULL;
2471                 }
2472         }
2473 }
2474
2475 /**
2476  * Apply the flow.
2477  *
2478  * @param[in] dev
2479  *   Pointer to Ethernet device structure.
2480  * @param[in, out] flow
2481  *   Pointer to flow structure.
2482  * @param[out] error
2483  *   Pointer to error structure.
2484  *
2485  * @return
2486  *   0 on success, a negative errno value otherwise and rte_errno is set.
2487  */
2488 static int
2489 mlx5_flow_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
2490                 struct rte_flow_error *error)
2491 {
2492         struct mlx5_flow_verbs *verbs;
2493         int err;
2494
2495         LIST_FOREACH(verbs, &flow->verbs, next) {
2496                 if (flow->fate & MLX5_FLOW_FATE_DROP) {
2497                         verbs->hrxq = mlx5_hrxq_drop_new(dev);
2498                         if (!verbs->hrxq) {
2499                                 rte_flow_error_set
2500                                         (error, errno,
2501                                          RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2502                                          NULL,
2503                                          "cannot get drop hash queue");
2504                                 goto error;
2505                         }
2506                 } else {
2507                         struct mlx5_hrxq *hrxq;
2508
2509                         hrxq = mlx5_hrxq_get(dev, flow->key,
2510                                              MLX5_RSS_HASH_KEY_LEN,
2511                                              verbs->hash_fields,
2512                                              (*flow->queue),
2513                                              flow->rss.queue_num);
2514                         if (!hrxq)
2515                                 hrxq = mlx5_hrxq_new(dev, flow->key,
2516                                                      MLX5_RSS_HASH_KEY_LEN,
2517                                                      verbs->hash_fields,
2518                                                      (*flow->queue),
2519                                                      flow->rss.queue_num);
2520                         if (!hrxq) {
2521                                 rte_flow_error_set
2522                                         (error, rte_errno,
2523                                          RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2524                                          NULL,
2525                                          "cannot get hash queue");
2526                                 goto error;
2527                         }
2528                         verbs->hrxq = hrxq;
2529                 }
2530                 verbs->flow =
2531                         mlx5_glue->create_flow(verbs->hrxq->qp, verbs->attr);
2532                 if (!verbs->flow) {
2533                         rte_flow_error_set(error, errno,
2534                                            RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2535                                            NULL,
2536                                            "hardware refuses to create flow");
2537                         goto error;
2538                 }
2539         }
2540         return 0;
2541 error:
2542         err = rte_errno; /* Save rte_errno before cleanup. */
2543         LIST_FOREACH(verbs, &flow->verbs, next) {
2544                 if (verbs->hrxq) {
2545                         if (flow->fate & MLX5_FLOW_FATE_DROP)
2546                                 mlx5_hrxq_drop_release(dev);
2547                         else
2548                                 mlx5_hrxq_release(dev, verbs->hrxq);
2549                         verbs->hrxq = NULL;
2550                 }
2551         }
2552         rte_errno = err; /* Restore rte_errno. */
2553         return -rte_errno;
2554 }
2555
2556 /**
2557  * Create a flow and add it to @p list.
2558  *
2559  * @param dev
2560  *   Pointer to Ethernet device.
2561  * @param list
2562  *   Pointer to a TAILQ flow list.
2563  * @param[in] attr
2564  *   Flow rule attributes.
2565  * @param[in] items
2566  *   Pattern specification (list terminated by the END pattern item).
2567  * @param[in] actions
2568  *   Associated actions (list terminated by the END action).
2569  * @param[out] error
2570  *   Perform verbose error reporting if not NULL.
2571  *
2572  * @return
2573  *   A flow on success, NULL otherwise and rte_errno is set.
2574  */
2575 static struct rte_flow *
2576 mlx5_flow_list_create(struct rte_eth_dev *dev,
2577                       struct mlx5_flows *list,
2578                       const struct rte_flow_attr *attr,
2579                       const struct rte_flow_item items[],
2580                       const struct rte_flow_action actions[],
2581                       struct rte_flow_error *error)
2582 {
2583         struct rte_flow *flow = NULL;
2584         size_t size = 0;
2585         int ret;
2586
2587         ret = mlx5_flow_merge(dev, flow, size, attr, items, actions, error);
2588         if (ret < 0)
2589                 return NULL;
2590         size = ret;
2591         flow = rte_calloc(__func__, 1, size, 0);
2592         if (!flow) {
2593                 rte_flow_error_set(error, ENOMEM,
2594                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2595                                    NULL,
2596                                    "not enough memory to create flow");
2597                 return NULL;
2598         }
2599         ret = mlx5_flow_merge(dev, flow, size, attr, items, actions, error);
2600         if (ret < 0) {
2601                 rte_free(flow);
2602                 return NULL;
2603         }
2604         assert((size_t)ret == size);
2605         if (dev->data->dev_started) {
2606                 ret = mlx5_flow_apply(dev, flow, error);
2607                 if (ret < 0) {
2608                         ret = rte_errno; /* Save rte_errno before cleanup. */
2609                         if (flow) {
2610                                 mlx5_flow_remove(dev, flow);
2611                                 rte_free(flow);
2612                         }
2613                         rte_errno = ret; /* Restore rte_errno. */
2614                         return NULL;
2615                 }
2616         }
2617         TAILQ_INSERT_TAIL(list, flow, next);
2618         mlx5_flow_rxq_flags_set(dev, flow);
2619         return flow;
2620 }
2621
2622 /**
2623  * Create a flow.
2624  *
2625  * @see rte_flow_create()
2626  * @see rte_flow_ops
2627  */
2628 struct rte_flow *
2629 mlx5_flow_create(struct rte_eth_dev *dev,
2630                  const struct rte_flow_attr *attr,
2631                  const struct rte_flow_item items[],
2632                  const struct rte_flow_action actions[],
2633                  struct rte_flow_error *error)
2634 {
2635         return mlx5_flow_list_create
2636                 (dev, &((struct priv *)dev->data->dev_private)->flows,
2637                  attr, items, actions, error);
2638 }
2639
2640 /**
2641  * Destroy a flow in a list.
2642  *
2643  * @param dev
2644  *   Pointer to Ethernet device.
2645  * @param list
2646  *   Pointer to a TAILQ flow list.
2647  * @param[in] flow
2648  *   Flow to destroy.
2649  */
2650 static void
2651 mlx5_flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list,
2652                        struct rte_flow *flow)
2653 {
2654         mlx5_flow_remove(dev, flow);
2655         TAILQ_REMOVE(list, flow, next);
2656         /*
2657          * Update RX queue flags only if port is started, otherwise it is
2658          * already clean.
2659          */
2660         if (dev->data->dev_started)
2661                 mlx5_flow_rxq_flags_trim(dev, flow);
2662         rte_free(flow);
2663 }
2664
2665 /**
2666  * Destroy all flows.
2667  *
2668  * @param dev
2669  *   Pointer to Ethernet device.
2670  * @param list
2671  *   Pointer to a TAILQ flow list.
2672  */
2673 void
2674 mlx5_flow_list_flush(struct rte_eth_dev *dev, struct mlx5_flows *list)
2675 {
2676         while (!TAILQ_EMPTY(list)) {
2677                 struct rte_flow *flow;
2678
2679                 flow = TAILQ_FIRST(list);
2680                 mlx5_flow_list_destroy(dev, list, flow);
2681         }
2682 }
2683
2684 /**
2685  * Remove all flows.
2686  *
2687  * @param dev
2688  *   Pointer to Ethernet device.
2689  * @param list
2690  *   Pointer to a TAILQ flow list.
2691  */
2692 void
2693 mlx5_flow_stop(struct rte_eth_dev *dev, struct mlx5_flows *list)
2694 {
2695         struct rte_flow *flow;
2696
2697         TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next)
2698                 mlx5_flow_remove(dev, flow);
2699         mlx5_flow_rxq_flags_clear(dev);
2700 }
2701
2702 /**
2703  * Add all flows.
2704  *
2705  * @param dev
2706  *   Pointer to Ethernet device.
2707  * @param list
2708  *   Pointer to a TAILQ flow list.
2709  *
2710  * @return
2711  *   0 on success, a negative errno value otherwise and rte_errno is set.
2712  */
2713 int
2714 mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list)
2715 {
2716         struct rte_flow *flow;
2717         struct rte_flow_error error;
2718         int ret = 0;
2719
2720         TAILQ_FOREACH(flow, list, next) {
2721                 ret = mlx5_flow_apply(dev, flow, &error);
2722                 if (ret < 0)
2723                         goto error;
2724                 mlx5_flow_rxq_flags_set(dev, flow);
2725         }
2726         return 0;
2727 error:
2728         ret = rte_errno; /* Save rte_errno before cleanup. */
2729         mlx5_flow_stop(dev, list);
2730         rte_errno = ret; /* Restore rte_errno. */
2731         return -rte_errno;
2732 }
2733
2734 /**
2735  * Verify the flow list is empty
2736  *
2737  * @param dev
2738  *  Pointer to Ethernet device.
2739  *
2740  * @return the number of flows not released.
2741  */
2742 int
2743 mlx5_flow_verify(struct rte_eth_dev *dev)
2744 {
2745         struct priv *priv = dev->data->dev_private;
2746         struct rte_flow *flow;
2747         int ret = 0;
2748
2749         TAILQ_FOREACH(flow, &priv->flows, next) {
2750                 DRV_LOG(DEBUG, "port %u flow %p still referenced",
2751                         dev->data->port_id, (void *)flow);
2752                 ++ret;
2753         }
2754         return ret;
2755 }
2756
2757 /**
2758  * Enable a control flow configured from the control plane.
2759  *
2760  * @param dev
2761  *   Pointer to Ethernet device.
2762  * @param eth_spec
2763  *   An Ethernet flow spec to apply.
2764  * @param eth_mask
2765  *   An Ethernet flow mask to apply.
2766  * @param vlan_spec
2767  *   A VLAN flow spec to apply.
2768  * @param vlan_mask
2769  *   A VLAN flow mask to apply.
2770  *
2771  * @return
2772  *   0 on success, a negative errno value otherwise and rte_errno is set.
2773  */
2774 int
2775 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
2776                     struct rte_flow_item_eth *eth_spec,
2777                     struct rte_flow_item_eth *eth_mask,
2778                     struct rte_flow_item_vlan *vlan_spec,
2779                     struct rte_flow_item_vlan *vlan_mask)
2780 {
2781         struct priv *priv = dev->data->dev_private;
2782         const struct rte_flow_attr attr = {
2783                 .ingress = 1,
2784                 .priority = MLX5_FLOW_PRIO_RSVD,
2785         };
2786         struct rte_flow_item items[] = {
2787                 {
2788                         .type = RTE_FLOW_ITEM_TYPE_ETH,
2789                         .spec = eth_spec,
2790                         .last = NULL,
2791                         .mask = eth_mask,
2792                 },
2793                 {
2794                         .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
2795                                 RTE_FLOW_ITEM_TYPE_END,
2796                         .spec = vlan_spec,
2797                         .last = NULL,
2798                         .mask = vlan_mask,
2799                 },
2800                 {
2801                         .type = RTE_FLOW_ITEM_TYPE_END,
2802                 },
2803         };
2804         uint16_t queue[priv->reta_idx_n];
2805         struct rte_flow_action_rss action_rss = {
2806                 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
2807                 .level = 0,
2808                 .types = priv->rss_conf.rss_hf,
2809                 .key_len = priv->rss_conf.rss_key_len,
2810                 .queue_num = priv->reta_idx_n,
2811                 .key = priv->rss_conf.rss_key,
2812                 .queue = queue,
2813         };
2814         struct rte_flow_action actions[] = {
2815                 {
2816                         .type = RTE_FLOW_ACTION_TYPE_RSS,
2817                         .conf = &action_rss,
2818                 },
2819                 {
2820                         .type = RTE_FLOW_ACTION_TYPE_END,
2821                 },
2822         };
2823         struct rte_flow *flow;
2824         struct rte_flow_error error;
2825         unsigned int i;
2826
2827         if (!priv->reta_idx_n) {
2828                 rte_errno = EINVAL;
2829                 return -rte_errno;
2830         }
2831         for (i = 0; i != priv->reta_idx_n; ++i)
2832                 queue[i] = (*priv->reta_idx)[i];
2833         flow = mlx5_flow_list_create(dev, &priv->ctrl_flows, &attr, items,
2834                                      actions, &error);
2835         if (!flow)
2836                 return -rte_errno;
2837         return 0;
2838 }
2839
2840 /**
2841  * Enable a flow control configured from the control plane.
2842  *
2843  * @param dev
2844  *   Pointer to Ethernet device.
2845  * @param eth_spec
2846  *   An Ethernet flow spec to apply.
2847  * @param eth_mask
2848  *   An Ethernet flow mask to apply.
2849  *
2850  * @return
2851  *   0 on success, a negative errno value otherwise and rte_errno is set.
2852  */
2853 int
2854 mlx5_ctrl_flow(struct rte_eth_dev *dev,
2855                struct rte_flow_item_eth *eth_spec,
2856                struct rte_flow_item_eth *eth_mask)
2857 {
2858         return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
2859 }
2860
2861 /**
2862  * Destroy a flow.
2863  *
2864  * @see rte_flow_destroy()
2865  * @see rte_flow_ops
2866  */
2867 int
2868 mlx5_flow_destroy(struct rte_eth_dev *dev,
2869                   struct rte_flow *flow,
2870                   struct rte_flow_error *error __rte_unused)
2871 {
2872         struct priv *priv = dev->data->dev_private;
2873
2874         mlx5_flow_list_destroy(dev, &priv->flows, flow);
2875         return 0;
2876 }
2877
2878 /**
2879  * Destroy all flows.
2880  *
2881  * @see rte_flow_flush()
2882  * @see rte_flow_ops
2883  */
2884 int
2885 mlx5_flow_flush(struct rte_eth_dev *dev,
2886                 struct rte_flow_error *error __rte_unused)
2887 {
2888         struct priv *priv = dev->data->dev_private;
2889
2890         mlx5_flow_list_flush(dev, &priv->flows);
2891         return 0;
2892 }
2893
2894 /**
2895  * Isolated mode.
2896  *
2897  * @see rte_flow_isolate()
2898  * @see rte_flow_ops
2899  */
2900 int
2901 mlx5_flow_isolate(struct rte_eth_dev *dev,
2902                   int enable,
2903                   struct rte_flow_error *error)
2904 {
2905         struct priv *priv = dev->data->dev_private;
2906
2907         if (dev->data->dev_started) {
2908                 rte_flow_error_set(error, EBUSY,
2909                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2910                                    NULL,
2911                                    "port must be stopped first");
2912                 return -rte_errno;
2913         }
2914         priv->isolated = !!enable;
2915         if (enable)
2916                 dev->dev_ops = &mlx5_dev_ops_isolate;
2917         else
2918                 dev->dev_ops = &mlx5_dev_ops;
2919         return 0;
2920 }
2921
2922 /**
2923  * Convert a flow director filter to a generic flow.
2924  *
2925  * @param dev
2926  *   Pointer to Ethernet device.
2927  * @param fdir_filter
2928  *   Flow director filter to add.
2929  * @param attributes
2930  *   Generic flow parameters structure.
2931  *
2932  * @return
2933  *   0 on success, a negative errno value otherwise and rte_errno is set.
2934  */
2935 static int
2936 mlx5_fdir_filter_convert(struct rte_eth_dev *dev,
2937                          const struct rte_eth_fdir_filter *fdir_filter,
2938                          struct mlx5_fdir *attributes)
2939 {
2940         struct priv *priv = dev->data->dev_private;
2941         const struct rte_eth_fdir_input *input = &fdir_filter->input;
2942         const struct rte_eth_fdir_masks *mask =
2943                 &dev->data->dev_conf.fdir_conf.mask;
2944
2945         /* Validate queue number. */
2946         if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
2947                 DRV_LOG(ERR, "port %u invalid queue number %d",
2948                         dev->data->port_id, fdir_filter->action.rx_queue);
2949                 rte_errno = EINVAL;
2950                 return -rte_errno;
2951         }
2952         attributes->attr.ingress = 1;
2953         attributes->items[0] = (struct rte_flow_item) {
2954                 .type = RTE_FLOW_ITEM_TYPE_ETH,
2955                 .spec = &attributes->l2,
2956                 .mask = &attributes->l2_mask,
2957         };
2958         switch (fdir_filter->action.behavior) {
2959         case RTE_ETH_FDIR_ACCEPT:
2960                 attributes->actions[0] = (struct rte_flow_action){
2961                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
2962                         .conf = &attributes->queue,
2963                 };
2964                 break;
2965         case RTE_ETH_FDIR_REJECT:
2966                 attributes->actions[0] = (struct rte_flow_action){
2967                         .type = RTE_FLOW_ACTION_TYPE_DROP,
2968                 };
2969                 break;
2970         default:
2971                 DRV_LOG(ERR, "port %u invalid behavior %d",
2972                         dev->data->port_id,
2973                         fdir_filter->action.behavior);
2974                 rte_errno = ENOTSUP;
2975                 return -rte_errno;
2976         }
2977         attributes->queue.index = fdir_filter->action.rx_queue;
2978         /* Handle L3. */
2979         switch (fdir_filter->input.flow_type) {
2980         case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
2981         case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
2982         case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
2983                 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2984                         .src_addr = input->flow.ip4_flow.src_ip,
2985                         .dst_addr = input->flow.ip4_flow.dst_ip,
2986                         .time_to_live = input->flow.ip4_flow.ttl,
2987                         .type_of_service = input->flow.ip4_flow.tos,
2988                         .next_proto_id = input->flow.ip4_flow.proto,
2989                 };
2990                 attributes->l3_mask.ipv4.hdr = (struct ipv4_hdr){
2991                         .src_addr = mask->ipv4_mask.src_ip,
2992                         .dst_addr = mask->ipv4_mask.dst_ip,
2993                         .time_to_live = mask->ipv4_mask.ttl,
2994                         .type_of_service = mask->ipv4_mask.tos,
2995                         .next_proto_id = mask->ipv4_mask.proto,
2996                 };
2997                 attributes->items[1] = (struct rte_flow_item){
2998                         .type = RTE_FLOW_ITEM_TYPE_IPV4,
2999                         .spec = &attributes->l3,
3000                         .mask = &attributes->l3_mask,
3001                 };
3002                 break;
3003         case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
3004         case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
3005         case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
3006                 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
3007                         .hop_limits = input->flow.ipv6_flow.hop_limits,
3008                         .proto = input->flow.ipv6_flow.proto,
3009                 };
3010
3011                 memcpy(attributes->l3.ipv6.hdr.src_addr,
3012                        input->flow.ipv6_flow.src_ip,
3013                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
3014                 memcpy(attributes->l3.ipv6.hdr.dst_addr,
3015                        input->flow.ipv6_flow.dst_ip,
3016                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
3017                 memcpy(attributes->l3_mask.ipv6.hdr.src_addr,
3018                        mask->ipv6_mask.src_ip,
3019                        RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
3020                 memcpy(attributes->l3_mask.ipv6.hdr.dst_addr,
3021                        mask->ipv6_mask.dst_ip,
3022                        RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
3023                 attributes->items[1] = (struct rte_flow_item){
3024                         .type = RTE_FLOW_ITEM_TYPE_IPV6,
3025                         .spec = &attributes->l3,
3026                         .mask = &attributes->l3_mask,
3027                 };
3028                 break;
3029         default:
3030                 DRV_LOG(ERR, "port %u invalid flow type%d",
3031                         dev->data->port_id, fdir_filter->input.flow_type);
3032                 rte_errno = ENOTSUP;
3033                 return -rte_errno;
3034         }
3035         /* Handle L4. */
3036         switch (fdir_filter->input.flow_type) {
3037         case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
3038                 attributes->l4.udp.hdr = (struct udp_hdr){
3039                         .src_port = input->flow.udp4_flow.src_port,
3040                         .dst_port = input->flow.udp4_flow.dst_port,
3041                 };
3042                 attributes->l4_mask.udp.hdr = (struct udp_hdr){
3043                         .src_port = mask->src_port_mask,
3044                         .dst_port = mask->dst_port_mask,
3045                 };
3046                 attributes->items[2] = (struct rte_flow_item){
3047                         .type = RTE_FLOW_ITEM_TYPE_UDP,
3048                         .spec = &attributes->l4,
3049                         .mask = &attributes->l4_mask,
3050                 };
3051                 break;
3052         case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
3053                 attributes->l4.tcp.hdr = (struct tcp_hdr){
3054                         .src_port = input->flow.tcp4_flow.src_port,
3055                         .dst_port = input->flow.tcp4_flow.dst_port,
3056                 };
3057                 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
3058                         .src_port = mask->src_port_mask,
3059                         .dst_port = mask->dst_port_mask,
3060                 };
3061                 attributes->items[2] = (struct rte_flow_item){
3062                         .type = RTE_FLOW_ITEM_TYPE_TCP,
3063                         .spec = &attributes->l4,
3064                         .mask = &attributes->l4_mask,
3065                 };
3066                 break;
3067         case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
3068                 attributes->l4.udp.hdr = (struct udp_hdr){
3069                         .src_port = input->flow.udp6_flow.src_port,
3070                         .dst_port = input->flow.udp6_flow.dst_port,
3071                 };
3072                 attributes->l4_mask.udp.hdr = (struct udp_hdr){
3073                         .src_port = mask->src_port_mask,
3074                         .dst_port = mask->dst_port_mask,
3075                 };
3076                 attributes->items[2] = (struct rte_flow_item){
3077                         .type = RTE_FLOW_ITEM_TYPE_UDP,
3078                         .spec = &attributes->l4,
3079                         .mask = &attributes->l4_mask,
3080                 };
3081                 break;
3082         case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
3083                 attributes->l4.tcp.hdr = (struct tcp_hdr){
3084                         .src_port = input->flow.tcp6_flow.src_port,
3085                         .dst_port = input->flow.tcp6_flow.dst_port,
3086                 };
3087                 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
3088                         .src_port = mask->src_port_mask,
3089                         .dst_port = mask->dst_port_mask,
3090                 };
3091                 attributes->items[2] = (struct rte_flow_item){
3092                         .type = RTE_FLOW_ITEM_TYPE_TCP,
3093                         .spec = &attributes->l4,
3094                         .mask = &attributes->l4_mask,
3095                 };
3096                 break;
3097         case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
3098         case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
3099                 break;
3100         default:
3101                 DRV_LOG(ERR, "port %u invalid flow type%d",
3102                         dev->data->port_id, fdir_filter->input.flow_type);
3103                 rte_errno = ENOTSUP;
3104                 return -rte_errno;
3105         }
3106         return 0;
3107 }
3108
3109 /**
3110  * Add new flow director filter and store it in list.
3111  *
3112  * @param dev
3113  *   Pointer to Ethernet device.
3114  * @param fdir_filter
3115  *   Flow director filter to add.
3116  *
3117  * @return
3118  *   0 on success, a negative errno value otherwise and rte_errno is set.
3119  */
3120 static int
3121 mlx5_fdir_filter_add(struct rte_eth_dev *dev,
3122                      const struct rte_eth_fdir_filter *fdir_filter)
3123 {
3124         struct priv *priv = dev->data->dev_private;
3125         struct mlx5_fdir attributes = {
3126                 .attr.group = 0,
3127                 .l2_mask = {
3128                         .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
3129                         .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
3130                         .type = 0,
3131                 },
3132         };
3133         struct rte_flow_error error;
3134         struct rte_flow *flow;
3135         int ret;
3136
3137         ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
3138         if (ret)
3139                 return ret;
3140         flow = mlx5_flow_list_create(dev, &priv->flows, &attributes.attr,
3141                                      attributes.items, attributes.actions,
3142                                      &error);
3143         if (flow) {
3144                 DRV_LOG(DEBUG, "port %u FDIR created %p", dev->data->port_id,
3145                         (void *)flow);
3146                 return 0;
3147         }
3148         return -rte_errno;
3149 }
3150
3151 /**
3152  * Delete specific filter.
3153  *
3154  * @param dev
3155  *   Pointer to Ethernet device.
3156  * @param fdir_filter
3157  *   Filter to be deleted.
3158  *
3159  * @return
3160  *   0 on success, a negative errno value otherwise and rte_errno is set.
3161  */
3162 static int
3163 mlx5_fdir_filter_delete(struct rte_eth_dev *dev __rte_unused,
3164                         const struct rte_eth_fdir_filter *fdir_filter
3165                         __rte_unused)
3166 {
3167         rte_errno = ENOTSUP;
3168         return -rte_errno;
3169 }
3170
3171 /**
3172  * Update queue for specific filter.
3173  *
3174  * @param dev
3175  *   Pointer to Ethernet device.
3176  * @param fdir_filter
3177  *   Filter to be updated.
3178  *
3179  * @return
3180  *   0 on success, a negative errno value otherwise and rte_errno is set.
3181  */
3182 static int
3183 mlx5_fdir_filter_update(struct rte_eth_dev *dev,
3184                         const struct rte_eth_fdir_filter *fdir_filter)
3185 {
3186         int ret;
3187
3188         ret = mlx5_fdir_filter_delete(dev, fdir_filter);
3189         if (ret)
3190                 return ret;
3191         return mlx5_fdir_filter_add(dev, fdir_filter);
3192 }
3193
3194 /**
3195  * Flush all filters.
3196  *
3197  * @param dev
3198  *   Pointer to Ethernet device.
3199  */
3200 static void
3201 mlx5_fdir_filter_flush(struct rte_eth_dev *dev)
3202 {
3203         struct priv *priv = dev->data->dev_private;
3204
3205         mlx5_flow_list_flush(dev, &priv->flows);
3206 }
3207
3208 /**
3209  * Get flow director information.
3210  *
3211  * @param dev
3212  *   Pointer to Ethernet device.
3213  * @param[out] fdir_info
3214  *   Resulting flow director information.
3215  */
3216 static void
3217 mlx5_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info)
3218 {
3219         struct rte_eth_fdir_masks *mask =
3220                 &dev->data->dev_conf.fdir_conf.mask;
3221
3222         fdir_info->mode = dev->data->dev_conf.fdir_conf.mode;
3223         fdir_info->guarant_spc = 0;
3224         rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
3225         fdir_info->max_flexpayload = 0;
3226         fdir_info->flow_types_mask[0] = 0;
3227         fdir_info->flex_payload_unit = 0;
3228         fdir_info->max_flex_payload_segment_num = 0;
3229         fdir_info->flex_payload_limit = 0;
3230         memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
3231 }
3232
3233 /**
3234  * Deal with flow director operations.
3235  *
3236  * @param dev
3237  *   Pointer to Ethernet device.
3238  * @param filter_op
3239  *   Operation to perform.
3240  * @param arg
3241  *   Pointer to operation-specific structure.
3242  *
3243  * @return
3244  *   0 on success, a negative errno value otherwise and rte_errno is set.
3245  */
3246 static int
3247 mlx5_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op,
3248                     void *arg)
3249 {
3250         enum rte_fdir_mode fdir_mode =
3251                 dev->data->dev_conf.fdir_conf.mode;
3252
3253         if (filter_op == RTE_ETH_FILTER_NOP)
3254                 return 0;
3255         if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
3256             fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
3257                 DRV_LOG(ERR, "port %u flow director mode %d not supported",
3258                         dev->data->port_id, fdir_mode);
3259                 rte_errno = EINVAL;
3260                 return -rte_errno;
3261         }
3262         switch (filter_op) {
3263         case RTE_ETH_FILTER_ADD:
3264                 return mlx5_fdir_filter_add(dev, arg);
3265         case RTE_ETH_FILTER_UPDATE:
3266                 return mlx5_fdir_filter_update(dev, arg);
3267         case RTE_ETH_FILTER_DELETE:
3268                 return mlx5_fdir_filter_delete(dev, arg);
3269         case RTE_ETH_FILTER_FLUSH:
3270                 mlx5_fdir_filter_flush(dev);
3271                 break;
3272         case RTE_ETH_FILTER_INFO:
3273                 mlx5_fdir_info_get(dev, arg);
3274                 break;
3275         default:
3276                 DRV_LOG(DEBUG, "port %u unknown operation %u",
3277                         dev->data->port_id, filter_op);
3278                 rte_errno = EINVAL;
3279                 return -rte_errno;
3280         }
3281         return 0;
3282 }
3283
3284 /**
3285  * Manage filter operations.
3286  *
3287  * @param dev
3288  *   Pointer to Ethernet device structure.
3289  * @param filter_type
3290  *   Filter type.
3291  * @param filter_op
3292  *   Operation to perform.
3293  * @param arg
3294  *   Pointer to operation-specific structure.
3295  *
3296  * @return
3297  *   0 on success, a negative errno value otherwise and rte_errno is set.
3298  */
3299 int
3300 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
3301                      enum rte_filter_type filter_type,
3302                      enum rte_filter_op filter_op,
3303                      void *arg)
3304 {
3305         switch (filter_type) {
3306         case RTE_ETH_FILTER_GENERIC:
3307                 if (filter_op != RTE_ETH_FILTER_GET) {
3308                         rte_errno = EINVAL;
3309                         return -rte_errno;
3310                 }
3311                 *(const void **)arg = &mlx5_flow_ops;
3312                 return 0;
3313         case RTE_ETH_FILTER_FDIR:
3314                 return mlx5_fdir_ctrl_func(dev, filter_op, arg);
3315         default:
3316                 DRV_LOG(ERR, "port %u filter type (%d) not supported",
3317                         dev->data->port_id, filter_type);
3318                 rte_errno = ENOTSUP;
3319                 return -rte_errno;
3320         }
3321         return 0;
3322 }