net/mlx5: add flow VXLAN-GPE item
[dpdk.git] / drivers / net / mlx5 / mlx5_flow.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2016 6WIND S.A.
3  * Copyright 2016 Mellanox Technologies, Ltd
4  */
5
6 #include <sys/queue.h>
7 #include <stdint.h>
8 #include <string.h>
9
10 /* Verbs header. */
11 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
12 #ifdef PEDANTIC
13 #pragma GCC diagnostic ignored "-Wpedantic"
14 #endif
15 #include <infiniband/verbs.h>
16 #ifdef PEDANTIC
17 #pragma GCC diagnostic error "-Wpedantic"
18 #endif
19
20 #include <rte_common.h>
21 #include <rte_ether.h>
22 #include <rte_eth_ctrl.h>
23 #include <rte_ethdev_driver.h>
24 #include <rte_flow.h>
25 #include <rte_flow_driver.h>
26 #include <rte_malloc.h>
27 #include <rte_ip.h>
28
29 #include "mlx5.h"
30 #include "mlx5_defs.h"
31 #include "mlx5_prm.h"
32 #include "mlx5_glue.h"
33
34 /* Dev ops structure defined in mlx5.c */
35 extern const struct eth_dev_ops mlx5_dev_ops;
36 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
37
38 /* Pattern outer Layer bits. */
39 #define MLX5_FLOW_LAYER_OUTER_L2 (1u << 0)
40 #define MLX5_FLOW_LAYER_OUTER_L3_IPV4 (1u << 1)
41 #define MLX5_FLOW_LAYER_OUTER_L3_IPV6 (1u << 2)
42 #define MLX5_FLOW_LAYER_OUTER_L4_UDP (1u << 3)
43 #define MLX5_FLOW_LAYER_OUTER_L4_TCP (1u << 4)
44 #define MLX5_FLOW_LAYER_OUTER_VLAN (1u << 5)
45
46 /* Pattern inner Layer bits. */
47 #define MLX5_FLOW_LAYER_INNER_L2 (1u << 6)
48 #define MLX5_FLOW_LAYER_INNER_L3_IPV4 (1u << 7)
49 #define MLX5_FLOW_LAYER_INNER_L3_IPV6 (1u << 8)
50 #define MLX5_FLOW_LAYER_INNER_L4_UDP (1u << 9)
51 #define MLX5_FLOW_LAYER_INNER_L4_TCP (1u << 10)
52 #define MLX5_FLOW_LAYER_INNER_VLAN (1u << 11)
53
54 /* Pattern tunnel Layer bits. */
55 #define MLX5_FLOW_LAYER_VXLAN (1u << 12)
56 #define MLX5_FLOW_LAYER_VXLAN_GPE (1u << 13)
57
58 /* Outer Masks. */
59 #define MLX5_FLOW_LAYER_OUTER_L3 \
60         (MLX5_FLOW_LAYER_OUTER_L3_IPV4 | MLX5_FLOW_LAYER_OUTER_L3_IPV6)
61 #define MLX5_FLOW_LAYER_OUTER_L4 \
62         (MLX5_FLOW_LAYER_OUTER_L4_UDP | MLX5_FLOW_LAYER_OUTER_L4_TCP)
63 #define MLX5_FLOW_LAYER_OUTER \
64         (MLX5_FLOW_LAYER_OUTER_L2 | MLX5_FLOW_LAYER_OUTER_L3 | \
65          MLX5_FLOW_LAYER_OUTER_L4)
66
67 /* Tunnel Masks. */
68 #define MLX5_FLOW_LAYER_TUNNEL \
69         (MLX5_FLOW_LAYER_VXLAN | MLX5_FLOW_LAYER_VXLAN_GPE)
70
71 /* Inner Masks. */
72 #define MLX5_FLOW_LAYER_INNER_L3 \
73         (MLX5_FLOW_LAYER_INNER_L3_IPV4 | MLX5_FLOW_LAYER_INNER_L3_IPV6)
74 #define MLX5_FLOW_LAYER_INNER_L4 \
75         (MLX5_FLOW_LAYER_INNER_L4_UDP | MLX5_FLOW_LAYER_INNER_L4_TCP)
76 #define MLX5_FLOW_LAYER_INNER \
77         (MLX5_FLOW_LAYER_INNER_L2 | MLX5_FLOW_LAYER_INNER_L3 | \
78          MLX5_FLOW_LAYER_INNER_L4)
79
80 /* Actions that modify the fate of matching traffic. */
81 #define MLX5_FLOW_FATE_DROP (1u << 0)
82 #define MLX5_FLOW_FATE_QUEUE (1u << 1)
83 #define MLX5_FLOW_FATE_RSS (1u << 2)
84
85 /* Modify a packet. */
86 #define MLX5_FLOW_MOD_FLAG (1u << 0)
87 #define MLX5_FLOW_MOD_MARK (1u << 1)
88
89 /* possible L3 layers protocols filtering. */
90 #define MLX5_IP_PROTOCOL_TCP 6
91 #define MLX5_IP_PROTOCOL_UDP 17
92
93 /* Priority reserved for default flows. */
94 #define MLX5_FLOW_PRIO_RSVD ((uint32_t)-1)
95
96 enum mlx5_expansion {
97         MLX5_EXPANSION_ROOT,
98         MLX5_EXPANSION_ROOT_OUTER,
99         MLX5_EXPANSION_OUTER_ETH,
100         MLX5_EXPANSION_OUTER_IPV4,
101         MLX5_EXPANSION_OUTER_IPV4_UDP,
102         MLX5_EXPANSION_OUTER_IPV4_TCP,
103         MLX5_EXPANSION_OUTER_IPV6,
104         MLX5_EXPANSION_OUTER_IPV6_UDP,
105         MLX5_EXPANSION_OUTER_IPV6_TCP,
106         MLX5_EXPANSION_VXLAN,
107         MLX5_EXPANSION_VXLAN_GPE,
108         MLX5_EXPANSION_ETH,
109         MLX5_EXPANSION_IPV4,
110         MLX5_EXPANSION_IPV4_UDP,
111         MLX5_EXPANSION_IPV4_TCP,
112         MLX5_EXPANSION_IPV6,
113         MLX5_EXPANSION_IPV6_UDP,
114         MLX5_EXPANSION_IPV6_TCP,
115 };
116
117 /** Supported expansion of items. */
118 static const struct rte_flow_expand_node mlx5_support_expansion[] = {
119         [MLX5_EXPANSION_ROOT] = {
120                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
121                                                  MLX5_EXPANSION_IPV4,
122                                                  MLX5_EXPANSION_IPV6),
123                 .type = RTE_FLOW_ITEM_TYPE_END,
124         },
125         [MLX5_EXPANSION_ROOT_OUTER] = {
126                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_ETH,
127                                                  MLX5_EXPANSION_OUTER_IPV4,
128                                                  MLX5_EXPANSION_OUTER_IPV6),
129                 .type = RTE_FLOW_ITEM_TYPE_END,
130         },
131         [MLX5_EXPANSION_OUTER_ETH] = {
132                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_IPV4,
133                                                  MLX5_EXPANSION_OUTER_IPV6),
134                 .type = RTE_FLOW_ITEM_TYPE_ETH,
135                 .rss_types = 0,
136         },
137         [MLX5_EXPANSION_OUTER_IPV4] = {
138                 .next = RTE_FLOW_EXPAND_RSS_NEXT
139                         (MLX5_EXPANSION_OUTER_IPV4_UDP,
140                          MLX5_EXPANSION_OUTER_IPV4_TCP),
141                 .type = RTE_FLOW_ITEM_TYPE_IPV4,
142                 .rss_types = ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
143                         ETH_RSS_NONFRAG_IPV4_OTHER,
144         },
145         [MLX5_EXPANSION_OUTER_IPV4_UDP] = {
146                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN,
147                                                  MLX5_EXPANSION_VXLAN_GPE),
148                 .type = RTE_FLOW_ITEM_TYPE_UDP,
149                 .rss_types = ETH_RSS_NONFRAG_IPV4_UDP,
150         },
151         [MLX5_EXPANSION_OUTER_IPV4_TCP] = {
152                 .type = RTE_FLOW_ITEM_TYPE_TCP,
153                 .rss_types = ETH_RSS_NONFRAG_IPV4_TCP,
154         },
155         [MLX5_EXPANSION_OUTER_IPV6] = {
156                 .next = RTE_FLOW_EXPAND_RSS_NEXT
157                         (MLX5_EXPANSION_OUTER_IPV6_UDP,
158                          MLX5_EXPANSION_OUTER_IPV6_TCP),
159                 .type = RTE_FLOW_ITEM_TYPE_IPV6,
160                 .rss_types = ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 |
161                         ETH_RSS_NONFRAG_IPV6_OTHER,
162         },
163         [MLX5_EXPANSION_OUTER_IPV6_UDP] = {
164                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN,
165                                                  MLX5_EXPANSION_VXLAN_GPE),
166                 .type = RTE_FLOW_ITEM_TYPE_UDP,
167                 .rss_types = ETH_RSS_NONFRAG_IPV6_UDP,
168         },
169         [MLX5_EXPANSION_OUTER_IPV6_TCP] = {
170                 .type = RTE_FLOW_ITEM_TYPE_TCP,
171                 .rss_types = ETH_RSS_NONFRAG_IPV6_TCP,
172         },
173         [MLX5_EXPANSION_VXLAN] = {
174                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH),
175                 .type = RTE_FLOW_ITEM_TYPE_VXLAN,
176         },
177         [MLX5_EXPANSION_VXLAN_GPE] = {
178                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
179                                                  MLX5_EXPANSION_IPV4,
180                                                  MLX5_EXPANSION_IPV6),
181                 .type = RTE_FLOW_ITEM_TYPE_VXLAN_GPE,
182         },
183         [MLX5_EXPANSION_ETH] = {
184                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
185                                                  MLX5_EXPANSION_IPV6),
186                 .type = RTE_FLOW_ITEM_TYPE_ETH,
187         },
188         [MLX5_EXPANSION_IPV4] = {
189                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4_UDP,
190                                                  MLX5_EXPANSION_IPV4_TCP),
191                 .type = RTE_FLOW_ITEM_TYPE_IPV4,
192                 .rss_types = ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
193                         ETH_RSS_NONFRAG_IPV4_OTHER,
194         },
195         [MLX5_EXPANSION_IPV4_UDP] = {
196                 .type = RTE_FLOW_ITEM_TYPE_UDP,
197                 .rss_types = ETH_RSS_NONFRAG_IPV4_UDP,
198         },
199         [MLX5_EXPANSION_IPV4_TCP] = {
200                 .type = RTE_FLOW_ITEM_TYPE_TCP,
201                 .rss_types = ETH_RSS_NONFRAG_IPV4_TCP,
202         },
203         [MLX5_EXPANSION_IPV6] = {
204                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV6_UDP,
205                                                  MLX5_EXPANSION_IPV6_TCP),
206                 .type = RTE_FLOW_ITEM_TYPE_IPV6,
207                 .rss_types = ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 |
208                         ETH_RSS_NONFRAG_IPV6_OTHER,
209         },
210         [MLX5_EXPANSION_IPV6_UDP] = {
211                 .type = RTE_FLOW_ITEM_TYPE_UDP,
212                 .rss_types = ETH_RSS_NONFRAG_IPV6_UDP,
213         },
214         [MLX5_EXPANSION_IPV6_TCP] = {
215                 .type = RTE_FLOW_ITEM_TYPE_TCP,
216                 .rss_types = ETH_RSS_NONFRAG_IPV6_TCP,
217         },
218 };
219
220 /** Handles information leading to a drop fate. */
221 struct mlx5_flow_verbs {
222         LIST_ENTRY(mlx5_flow_verbs) next;
223         unsigned int size; /**< Size of the attribute. */
224         struct {
225                 struct ibv_flow_attr *attr;
226                 /**< Pointer to the Specification buffer. */
227                 uint8_t *specs; /**< Pointer to the specifications. */
228         };
229         struct ibv_flow *flow; /**< Verbs flow pointer. */
230         struct mlx5_hrxq *hrxq; /**< Hash Rx queue object. */
231         uint64_t hash_fields; /**< Verbs hash Rx queue hash fields. */
232 };
233
234 /* Flow structure. */
235 struct rte_flow {
236         TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
237         struct rte_flow_attr attributes; /**< User flow attribute. */
238         uint32_t l3_protocol_en:1; /**< Protocol filtering requested. */
239         uint32_t layers;
240         /**< Bit-fields of present layers see MLX5_FLOW_LAYER_*. */
241         uint32_t modifier;
242         /**< Bit-fields of present modifier see MLX5_FLOW_MOD_*. */
243         uint32_t fate;
244         /**< Bit-fields of present fate see MLX5_FLOW_FATE_*. */
245         uint8_t l3_protocol; /**< valid when l3_protocol_en is set. */
246         LIST_HEAD(verbs, mlx5_flow_verbs) verbs; /**< Verbs flows list. */
247         struct mlx5_flow_verbs *cur_verbs;
248         /**< Current Verbs flow structure being filled. */
249         struct rte_flow_action_rss rss;/**< RSS context. */
250         uint8_t key[MLX5_RSS_HASH_KEY_LEN]; /**< RSS hash key. */
251         uint16_t (*queue)[]; /**< Destination queues to redirect traffic to. */
252 };
253
254 static const struct rte_flow_ops mlx5_flow_ops = {
255         .validate = mlx5_flow_validate,
256         .create = mlx5_flow_create,
257         .destroy = mlx5_flow_destroy,
258         .flush = mlx5_flow_flush,
259         .isolate = mlx5_flow_isolate,
260 };
261
262 /* Convert FDIR request to Generic flow. */
263 struct mlx5_fdir {
264         struct rte_flow_attr attr;
265         struct rte_flow_action actions[2];
266         struct rte_flow_item items[4];
267         struct rte_flow_item_eth l2;
268         struct rte_flow_item_eth l2_mask;
269         union {
270                 struct rte_flow_item_ipv4 ipv4;
271                 struct rte_flow_item_ipv6 ipv6;
272         } l3;
273         union {
274                 struct rte_flow_item_ipv4 ipv4;
275                 struct rte_flow_item_ipv6 ipv6;
276         } l3_mask;
277         union {
278                 struct rte_flow_item_udp udp;
279                 struct rte_flow_item_tcp tcp;
280         } l4;
281         union {
282                 struct rte_flow_item_udp udp;
283                 struct rte_flow_item_tcp tcp;
284         } l4_mask;
285         struct rte_flow_action_queue queue;
286 };
287
288 /* Verbs specification header. */
289 struct ibv_spec_header {
290         enum ibv_flow_spec_type type;
291         uint16_t size;
292 };
293
294 /*
295  * Number of sub priorities.
296  * For each kind of pattern matching i.e. L2, L3, L4 to have a correct
297  * matching on the NIC (firmware dependent) L4 most have the higher priority
298  * followed by L3 and ending with L2.
299  */
300 #define MLX5_PRIORITY_MAP_L2 2
301 #define MLX5_PRIORITY_MAP_L3 1
302 #define MLX5_PRIORITY_MAP_L4 0
303 #define MLX5_PRIORITY_MAP_MAX 3
304
305 /* Map of Verbs to Flow priority with 8 Verbs priorities. */
306 static const uint32_t priority_map_3[][MLX5_PRIORITY_MAP_MAX] = {
307         { 0, 1, 2 }, { 2, 3, 4 }, { 5, 6, 7 },
308 };
309
310 /* Map of Verbs to Flow priority with 16 Verbs priorities. */
311 static const uint32_t priority_map_5[][MLX5_PRIORITY_MAP_MAX] = {
312         { 0, 1, 2 }, { 3, 4, 5 }, { 6, 7, 8 },
313         { 9, 10, 11 }, { 12, 13, 14 },
314 };
315
316 /* Tunnel information. */
317 struct mlx5_flow_tunnel_info {
318         uint32_t tunnel; /**< Tunnel bit (see MLX5_FLOW_*). */
319         uint32_t ptype; /**< Tunnel Ptype (see RTE_PTYPE_*). */
320 };
321
322 static struct mlx5_flow_tunnel_info tunnels_info[] = {
323         {
324                 .tunnel = MLX5_FLOW_LAYER_VXLAN,
325                 .ptype = RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L4_UDP,
326         },
327         {
328                 .tunnel = MLX5_FLOW_LAYER_VXLAN_GPE,
329                 .ptype = RTE_PTYPE_TUNNEL_VXLAN_GPE | RTE_PTYPE_L4_UDP,
330         },
331 };
332
333 /**
334  * Discover the maximum number of priority available.
335  *
336  * @param[in] dev
337  *   Pointer to Ethernet device.
338  *
339  * @return
340  *   number of supported flow priority on success, a negative errno
341  *   value otherwise and rte_errno is set.
342  */
343 int
344 mlx5_flow_discover_priorities(struct rte_eth_dev *dev)
345 {
346         struct {
347                 struct ibv_flow_attr attr;
348                 struct ibv_flow_spec_eth eth;
349                 struct ibv_flow_spec_action_drop drop;
350         } flow_attr = {
351                 .attr = {
352                         .num_of_specs = 2,
353                 },
354                 .eth = {
355                         .type = IBV_FLOW_SPEC_ETH,
356                         .size = sizeof(struct ibv_flow_spec_eth),
357                 },
358                 .drop = {
359                         .size = sizeof(struct ibv_flow_spec_action_drop),
360                         .type = IBV_FLOW_SPEC_ACTION_DROP,
361                 },
362         };
363         struct ibv_flow *flow;
364         struct mlx5_hrxq *drop = mlx5_hrxq_drop_new(dev);
365         uint16_t vprio[] = { 8, 16 };
366         int i;
367         int priority = 0;
368
369         if (!drop) {
370                 rte_errno = ENOTSUP;
371                 return -rte_errno;
372         }
373         for (i = 0; i != RTE_DIM(vprio); i++) {
374                 flow_attr.attr.priority = vprio[i] - 1;
375                 flow = mlx5_glue->create_flow(drop->qp, &flow_attr.attr);
376                 if (!flow)
377                         break;
378                 claim_zero(mlx5_glue->destroy_flow(flow));
379                 priority = vprio[i];
380         }
381         switch (priority) {
382         case 8:
383                 priority = RTE_DIM(priority_map_3);
384                 break;
385         case 16:
386                 priority = RTE_DIM(priority_map_5);
387                 break;
388         default:
389                 rte_errno = ENOTSUP;
390                 DRV_LOG(ERR,
391                         "port %u verbs maximum priority: %d expected 8/16",
392                         dev->data->port_id, vprio[i]);
393                 return -rte_errno;
394         }
395         mlx5_hrxq_drop_release(dev);
396         DRV_LOG(INFO, "port %u flow maximum priority: %d",
397                 dev->data->port_id, priority);
398         return priority;
399 }
400
401 /**
402  * Adjust flow priority.
403  *
404  * @param dev
405  *   Pointer to Ethernet device.
406  * @param flow
407  *   Pointer to an rte flow.
408  */
409 static void
410 mlx5_flow_adjust_priority(struct rte_eth_dev *dev, struct rte_flow *flow)
411 {
412         struct priv *priv = dev->data->dev_private;
413         uint32_t priority = flow->attributes.priority;
414         uint32_t subpriority = flow->cur_verbs->attr->priority;
415
416         switch (priv->config.flow_prio) {
417         case RTE_DIM(priority_map_3):
418                 priority = priority_map_3[priority][subpriority];
419                 break;
420         case RTE_DIM(priority_map_5):
421                 priority = priority_map_5[priority][subpriority];
422                 break;
423         }
424         flow->cur_verbs->attr->priority = priority;
425 }
426
427 /**
428  * Verify the @p attributes will be correctly understood by the NIC and store
429  * them in the @p flow if everything is correct.
430  *
431  * @param[in] dev
432  *   Pointer to Ethernet device.
433  * @param[in] attributes
434  *   Pointer to flow attributes
435  * @param[in, out] flow
436  *   Pointer to the rte_flow structure.
437  * @param[out] error
438  *   Pointer to error structure.
439  *
440  * @return
441  *   0 on success, a negative errno value otherwise and rte_errno is set.
442  */
443 static int
444 mlx5_flow_attributes(struct rte_eth_dev *dev,
445                      const struct rte_flow_attr *attributes,
446                      struct rte_flow *flow,
447                      struct rte_flow_error *error)
448 {
449         uint32_t priority_max =
450                 ((struct priv *)dev->data->dev_private)->config.flow_prio - 1;
451
452         if (attributes->group)
453                 return rte_flow_error_set(error, ENOTSUP,
454                                           RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
455                                           NULL,
456                                           "groups is not supported");
457         if (attributes->priority != MLX5_FLOW_PRIO_RSVD &&
458             attributes->priority >= priority_max)
459                 return rte_flow_error_set(error, ENOTSUP,
460                                           RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
461                                           NULL,
462                                           "priority out of range");
463         if (attributes->egress)
464                 return rte_flow_error_set(error, ENOTSUP,
465                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
466                                           NULL,
467                                           "egress is not supported");
468         if (attributes->transfer)
469                 return rte_flow_error_set(error, ENOTSUP,
470                                           RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
471                                           NULL,
472                                           "transfer is not supported");
473         if (!attributes->ingress)
474                 return rte_flow_error_set(error, ENOTSUP,
475                                           RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
476                                           NULL,
477                                           "ingress attribute is mandatory");
478         flow->attributes = *attributes;
479         if (attributes->priority == MLX5_FLOW_PRIO_RSVD)
480                 flow->attributes.priority = priority_max;
481         return 0;
482 }
483
484 /**
485  * Verify the @p item specifications (spec, last, mask) are compatible with the
486  * NIC capabilities.
487  *
488  * @param[in] item
489  *   Item specification.
490  * @param[in] mask
491  *   @p item->mask or flow default bit-masks.
492  * @param[in] nic_mask
493  *   Bit-masks covering supported fields by the NIC to compare with user mask.
494  * @param[in] size
495  *   Bit-masks size in bytes.
496  * @param[out] error
497  *   Pointer to error structure.
498  *
499  * @return
500  *   0 on success, a negative errno value otherwise and rte_errno is set.
501  */
502 static int
503 mlx5_flow_item_acceptable(const struct rte_flow_item *item,
504                           const uint8_t *mask,
505                           const uint8_t *nic_mask,
506                           unsigned int size,
507                           struct rte_flow_error *error)
508 {
509         unsigned int i;
510
511         assert(nic_mask);
512         for (i = 0; i < size; ++i)
513                 if ((nic_mask[i] | mask[i]) != nic_mask[i])
514                         return rte_flow_error_set(error, ENOTSUP,
515                                                   RTE_FLOW_ERROR_TYPE_ITEM,
516                                                   item,
517                                                   "mask enables non supported"
518                                                   " bits");
519         if (!item->spec && (item->mask || item->last))
520                 return rte_flow_error_set(error, EINVAL,
521                                           RTE_FLOW_ERROR_TYPE_ITEM,
522                                           item,
523                                           "mask/last without a spec is not"
524                                           " supported");
525         if (item->spec && item->last) {
526                 uint8_t spec[size];
527                 uint8_t last[size];
528                 unsigned int i;
529                 int ret;
530
531                 for (i = 0; i < size; ++i) {
532                         spec[i] = ((const uint8_t *)item->spec)[i] & mask[i];
533                         last[i] = ((const uint8_t *)item->last)[i] & mask[i];
534                 }
535                 ret = memcmp(spec, last, size);
536                 if (ret != 0)
537                         return rte_flow_error_set(error, ENOTSUP,
538                                                   RTE_FLOW_ERROR_TYPE_ITEM,
539                                                   item,
540                                                   "range is not supported");
541         }
542         return 0;
543 }
544
545 /**
546  * Add a verbs item specification into @p flow.
547  *
548  * @param[in, out] flow
549  *   Pointer to flow structure.
550  * @param[in] src
551  *   Create specification.
552  * @param[in] size
553  *   Size in bytes of the specification to copy.
554  */
555 static void
556 mlx5_flow_spec_verbs_add(struct rte_flow *flow, void *src, unsigned int size)
557 {
558         struct mlx5_flow_verbs *verbs = flow->cur_verbs;
559
560         if (verbs->specs) {
561                 void *dst;
562
563                 dst = (void *)(verbs->specs + verbs->size);
564                 memcpy(dst, src, size);
565                 ++verbs->attr->num_of_specs;
566         }
567         verbs->size += size;
568 }
569
570 /**
571  * Adjust verbs hash fields according to the @p flow information.
572  *
573  * @param[in, out] flow.
574  *   Pointer to flow structure.
575  * @param[in] tunnel
576  *   1 when the hash field is for a tunnel item.
577  * @param[in] layer_types
578  *   ETH_RSS_* types.
579  * @param[in] hash_fields
580  *   Item hash fields.
581  */
582 static void
583 mlx5_flow_verbs_hashfields_adjust(struct rte_flow *flow,
584                                   int tunnel __rte_unused,
585                                   uint32_t layer_types, uint64_t hash_fields)
586 {
587 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
588         hash_fields |= (tunnel ? IBV_RX_HASH_INNER : 0);
589         if (flow->rss.level == 2 && !tunnel)
590                 hash_fields = 0;
591         else if (flow->rss.level < 2 && tunnel)
592                 hash_fields = 0;
593 #endif
594         if (!(flow->rss.types & layer_types))
595                 hash_fields = 0;
596         flow->cur_verbs->hash_fields |= hash_fields;
597 }
598
599 /**
600  * Convert the @p item into a Verbs specification after ensuring the NIC
601  * will understand and process it correctly.
602  * If the necessary size for the conversion is greater than the @p flow_size,
603  * nothing is written in @p flow, the validation is still performed.
604  *
605  * @param[in] item
606  *   Item specification.
607  * @param[in, out] flow
608  *   Pointer to flow structure.
609  * @param[in] flow_size
610  *   Size in bytes of the available space in @p flow, if too small, nothing is
611  *   written.
612  * @param[out] error
613  *   Pointer to error structure.
614  *
615  * @return
616  *   On success the number of bytes consumed/necessary, if the returned value
617  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
618  *   otherwise another call with this returned memory size should be done.
619  *   On error, a negative errno value is returned and rte_errno is set.
620  */
621 static int
622 mlx5_flow_item_eth(const struct rte_flow_item *item, struct rte_flow *flow,
623                    const size_t flow_size, struct rte_flow_error *error)
624 {
625         const struct rte_flow_item_eth *spec = item->spec;
626         const struct rte_flow_item_eth *mask = item->mask;
627         const struct rte_flow_item_eth nic_mask = {
628                 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
629                 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
630                 .type = RTE_BE16(0xffff),
631         };
632         const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
633         const unsigned int size = sizeof(struct ibv_flow_spec_eth);
634         struct ibv_flow_spec_eth eth = {
635                 .type = IBV_FLOW_SPEC_ETH | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
636                 .size = size,
637         };
638         int ret;
639
640         if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
641                             MLX5_FLOW_LAYER_OUTER_L2))
642                 return rte_flow_error_set(error, ENOTSUP,
643                                           RTE_FLOW_ERROR_TYPE_ITEM,
644                                           item,
645                                           "L2 layers already configured");
646         if (!mask)
647                 mask = &rte_flow_item_eth_mask;
648         ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
649                                         (const uint8_t *)&nic_mask,
650                                         sizeof(struct rte_flow_item_eth),
651                                         error);
652         if (ret)
653                 return ret;
654         flow->layers |= tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
655                 MLX5_FLOW_LAYER_OUTER_L2;
656         if (size > flow_size)
657                 return size;
658         if (spec) {
659                 unsigned int i;
660
661                 memcpy(&eth.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
662                 memcpy(&eth.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
663                 eth.val.ether_type = spec->type;
664                 memcpy(&eth.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
665                 memcpy(&eth.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
666                 eth.mask.ether_type = mask->type;
667                 /* Remove unwanted bits from values. */
668                 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
669                         eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
670                         eth.val.src_mac[i] &= eth.mask.src_mac[i];
671                 }
672                 eth.val.ether_type &= eth.mask.ether_type;
673         }
674         flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2;
675         mlx5_flow_spec_verbs_add(flow, &eth, size);
676         return size;
677 }
678
679 /**
680  * Update the VLAN tag in the Verbs Ethernet specification.
681  *
682  * @param[in, out] attr
683  *   Pointer to Verbs attributes structure.
684  * @param[in] eth
685  *   Verbs structure containing the VLAN information to copy.
686  */
687 static void
688 mlx5_flow_item_vlan_update(struct ibv_flow_attr *attr,
689                            struct ibv_flow_spec_eth *eth)
690 {
691         unsigned int i;
692         const enum ibv_flow_spec_type search = eth->type;
693         struct ibv_spec_header *hdr = (struct ibv_spec_header *)
694                 ((uint8_t *)attr + sizeof(struct ibv_flow_attr));
695
696         for (i = 0; i != attr->num_of_specs; ++i) {
697                 if (hdr->type == search) {
698                         struct ibv_flow_spec_eth *e =
699                                 (struct ibv_flow_spec_eth *)hdr;
700
701                         e->val.vlan_tag = eth->val.vlan_tag;
702                         e->mask.vlan_tag = eth->mask.vlan_tag;
703                         e->val.ether_type = eth->val.ether_type;
704                         e->mask.ether_type = eth->mask.ether_type;
705                         break;
706                 }
707                 hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size);
708         }
709 }
710
711 /**
712  * Convert the @p item into @p flow (or by updating the already present
713  * Ethernet Verbs) specification after ensuring the NIC will understand and
714  * process it correctly.
715  * If the necessary size for the conversion is greater than the @p flow_size,
716  * nothing is written in @p flow, the validation is still performed.
717  *
718  * @param[in] item
719  *   Item specification.
720  * @param[in, out] flow
721  *   Pointer to flow structure.
722  * @param[in] flow_size
723  *   Size in bytes of the available space in @p flow, if too small, nothing is
724  *   written.
725  * @param[out] error
726  *   Pointer to error structure.
727  *
728  * @return
729  *   On success the number of bytes consumed/necessary, if the returned value
730  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
731  *   otherwise another call with this returned memory size should be done.
732  *   On error, a negative errno value is returned and rte_errno is set.
733  */
734 static int
735 mlx5_flow_item_vlan(const struct rte_flow_item *item, struct rte_flow *flow,
736                     const size_t flow_size, struct rte_flow_error *error)
737 {
738         const struct rte_flow_item_vlan *spec = item->spec;
739         const struct rte_flow_item_vlan *mask = item->mask;
740         const struct rte_flow_item_vlan nic_mask = {
741                 .tci = RTE_BE16(0x0fff),
742                 .inner_type = RTE_BE16(0xffff),
743         };
744         unsigned int size = sizeof(struct ibv_flow_spec_eth);
745         const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
746         struct ibv_flow_spec_eth eth = {
747                 .type = IBV_FLOW_SPEC_ETH | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
748                 .size = size,
749         };
750         int ret;
751         const uint32_t l34m = tunnel ? (MLX5_FLOW_LAYER_INNER_L3 |
752                                         MLX5_FLOW_LAYER_INNER_L4) :
753                 (MLX5_FLOW_LAYER_OUTER_L3 | MLX5_FLOW_LAYER_OUTER_L4);
754         const uint32_t vlanm = tunnel ? MLX5_FLOW_LAYER_INNER_VLAN :
755                 MLX5_FLOW_LAYER_OUTER_VLAN;
756         const uint32_t l2m = tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
757                 MLX5_FLOW_LAYER_OUTER_L2;
758
759         if (flow->layers & vlanm)
760                 return rte_flow_error_set(error, ENOTSUP,
761                                           RTE_FLOW_ERROR_TYPE_ITEM,
762                                           item,
763                                           "VLAN layer already configured");
764         else if ((flow->layers & l34m) != 0)
765                 return rte_flow_error_set(error, ENOTSUP,
766                                           RTE_FLOW_ERROR_TYPE_ITEM,
767                                           item,
768                                           "L2 layer cannot follow L3/L4 layer");
769         if (!mask)
770                 mask = &rte_flow_item_vlan_mask;
771         ret = mlx5_flow_item_acceptable
772                 (item, (const uint8_t *)mask,
773                  (const uint8_t *)&nic_mask,
774                  sizeof(struct rte_flow_item_vlan), error);
775         if (ret)
776                 return ret;
777         if (spec) {
778                 eth.val.vlan_tag = spec->tci;
779                 eth.mask.vlan_tag = mask->tci;
780                 eth.val.vlan_tag &= eth.mask.vlan_tag;
781                 eth.val.ether_type = spec->inner_type;
782                 eth.mask.ether_type = mask->inner_type;
783                 eth.val.ether_type &= eth.mask.ether_type;
784         }
785         /*
786          * From verbs perspective an empty VLAN is equivalent
787          * to a packet without VLAN layer.
788          */
789         if (!eth.mask.vlan_tag)
790                 return rte_flow_error_set(error, EINVAL,
791                                           RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
792                                           item->spec,
793                                           "VLAN cannot be empty");
794         if (!(flow->layers & l2m)) {
795                 if (size <= flow_size) {
796                         flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2;
797                         mlx5_flow_spec_verbs_add(flow, &eth, size);
798                 }
799         } else {
800                 if (flow->cur_verbs)
801                         mlx5_flow_item_vlan_update(flow->cur_verbs->attr,
802                                                    &eth);
803                 size = 0; /* Only an update is done in eth specification. */
804         }
805         flow->layers |= tunnel ?
806                 (MLX5_FLOW_LAYER_INNER_L2 | MLX5_FLOW_LAYER_INNER_VLAN) :
807                 (MLX5_FLOW_LAYER_OUTER_L2 | MLX5_FLOW_LAYER_OUTER_VLAN);
808         return size;
809 }
810
811 /**
812  * Convert the @p item into a Verbs specification after ensuring the NIC
813  * will understand and process it correctly.
814  * If the necessary size for the conversion is greater than the @p flow_size,
815  * nothing is written in @p flow, the validation is still performed.
816  *
817  * @param[in] item
818  *   Item specification.
819  * @param[in, out] flow
820  *   Pointer to flow structure.
821  * @param[in] flow_size
822  *   Size in bytes of the available space in @p flow, if too small, nothing is
823  *   written.
824  * @param[out] error
825  *   Pointer to error structure.
826  *
827  * @return
828  *   On success the number of bytes consumed/necessary, if the returned value
829  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
830  *   otherwise another call with this returned memory size should be done.
831  *   On error, a negative errno value is returned and rte_errno is set.
832  */
833 static int
834 mlx5_flow_item_ipv4(const struct rte_flow_item *item, struct rte_flow *flow,
835                     const size_t flow_size, struct rte_flow_error *error)
836 {
837         const struct rte_flow_item_ipv4 *spec = item->spec;
838         const struct rte_flow_item_ipv4 *mask = item->mask;
839         const struct rte_flow_item_ipv4 nic_mask = {
840                 .hdr = {
841                         .src_addr = RTE_BE32(0xffffffff),
842                         .dst_addr = RTE_BE32(0xffffffff),
843                         .type_of_service = 0xff,
844                         .next_proto_id = 0xff,
845                 },
846         };
847         const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
848         unsigned int size = sizeof(struct ibv_flow_spec_ipv4_ext);
849         struct ibv_flow_spec_ipv4_ext ipv4 = {
850                 .type = IBV_FLOW_SPEC_IPV4_EXT |
851                         (tunnel ? IBV_FLOW_SPEC_INNER : 0),
852                 .size = size,
853         };
854         int ret;
855
856         if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
857                             MLX5_FLOW_LAYER_OUTER_L3))
858                 return rte_flow_error_set(error, ENOTSUP,
859                                           RTE_FLOW_ERROR_TYPE_ITEM,
860                                           item,
861                                           "multiple L3 layers not supported");
862         else if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
863                                  MLX5_FLOW_LAYER_OUTER_L4))
864                 return rte_flow_error_set(error, ENOTSUP,
865                                           RTE_FLOW_ERROR_TYPE_ITEM,
866                                           item,
867                                           "L3 cannot follow an L4 layer.");
868         if (!mask)
869                 mask = &rte_flow_item_ipv4_mask;
870         ret = mlx5_flow_item_acceptable
871                 (item, (const uint8_t *)mask,
872                  (const uint8_t *)&nic_mask,
873                  sizeof(struct rte_flow_item_ipv4), error);
874         if (ret < 0)
875                 return ret;
876         flow->layers |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
877                 MLX5_FLOW_LAYER_OUTER_L3_IPV4;
878         if (spec) {
879                 ipv4.val = (struct ibv_flow_ipv4_ext_filter){
880                         .src_ip = spec->hdr.src_addr,
881                         .dst_ip = spec->hdr.dst_addr,
882                         .proto = spec->hdr.next_proto_id,
883                         .tos = spec->hdr.type_of_service,
884                 };
885                 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
886                         .src_ip = mask->hdr.src_addr,
887                         .dst_ip = mask->hdr.dst_addr,
888                         .proto = mask->hdr.next_proto_id,
889                         .tos = mask->hdr.type_of_service,
890                 };
891                 /* Remove unwanted bits from values. */
892                 ipv4.val.src_ip &= ipv4.mask.src_ip;
893                 ipv4.val.dst_ip &= ipv4.mask.dst_ip;
894                 ipv4.val.proto &= ipv4.mask.proto;
895                 ipv4.val.tos &= ipv4.mask.tos;
896         }
897         flow->l3_protocol_en = !!ipv4.mask.proto;
898         flow->l3_protocol = ipv4.val.proto;
899         if (size <= flow_size) {
900                 mlx5_flow_verbs_hashfields_adjust
901                         (flow, tunnel,
902                          (ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
903                           ETH_RSS_NONFRAG_IPV4_OTHER),
904                          (IBV_RX_HASH_SRC_IPV4 | IBV_RX_HASH_DST_IPV4));
905                 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L3;
906                 mlx5_flow_spec_verbs_add(flow, &ipv4, size);
907         }
908         return size;
909 }
910
911 /**
912  * Convert the @p item into a Verbs specification after ensuring the NIC
913  * will understand and process it correctly.
914  * If the necessary size for the conversion is greater than the @p flow_size,
915  * nothing is written in @p flow, the validation is still performed.
916  *
917  * @param[in] item
918  *   Item specification.
919  * @param[in, out] flow
920  *   Pointer to flow structure.
921  * @param[in] flow_size
922  *   Size in bytes of the available space in @p flow, if too small, nothing is
923  *   written.
924  * @param[out] error
925  *   Pointer to error structure.
926  *
927  * @return
928  *   On success the number of bytes consumed/necessary, if the returned value
929  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
930  *   otherwise another call with this returned memory size should be done.
931  *   On error, a negative errno value is returned and rte_errno is set.
932  */
933 static int
934 mlx5_flow_item_ipv6(const struct rte_flow_item *item, struct rte_flow *flow,
935                     const size_t flow_size, struct rte_flow_error *error)
936 {
937         const struct rte_flow_item_ipv6 *spec = item->spec;
938         const struct rte_flow_item_ipv6 *mask = item->mask;
939         const struct rte_flow_item_ipv6 nic_mask = {
940                 .hdr = {
941                         .src_addr =
942                                 "\xff\xff\xff\xff\xff\xff\xff\xff"
943                                 "\xff\xff\xff\xff\xff\xff\xff\xff",
944                         .dst_addr =
945                                 "\xff\xff\xff\xff\xff\xff\xff\xff"
946                                 "\xff\xff\xff\xff\xff\xff\xff\xff",
947                         .vtc_flow = RTE_BE32(0xffffffff),
948                         .proto = 0xff,
949                         .hop_limits = 0xff,
950                 },
951         };
952         const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
953         unsigned int size = sizeof(struct ibv_flow_spec_ipv6);
954         struct ibv_flow_spec_ipv6 ipv6 = {
955                 .type = IBV_FLOW_SPEC_IPV6 | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
956                 .size = size,
957         };
958         int ret;
959
960         if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
961                             MLX5_FLOW_LAYER_OUTER_L3))
962                 return rte_flow_error_set(error, ENOTSUP,
963                                           RTE_FLOW_ERROR_TYPE_ITEM,
964                                           item,
965                                           "multiple L3 layers not supported");
966         else if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
967                                  MLX5_FLOW_LAYER_OUTER_L4))
968                 return rte_flow_error_set(error, ENOTSUP,
969                                           RTE_FLOW_ERROR_TYPE_ITEM,
970                                           item,
971                                           "L3 cannot follow an L4 layer.");
972         if (!mask)
973                 mask = &rte_flow_item_ipv6_mask;
974         ret = mlx5_flow_item_acceptable
975                 (item, (const uint8_t *)mask,
976                  (const uint8_t *)&nic_mask,
977                  sizeof(struct rte_flow_item_ipv6), error);
978         if (ret < 0)
979                 return ret;
980         flow->layers |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
981                 MLX5_FLOW_LAYER_OUTER_L3_IPV6;
982         if (spec) {
983                 unsigned int i;
984                 uint32_t vtc_flow_val;
985                 uint32_t vtc_flow_mask;
986
987                 memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
988                        RTE_DIM(ipv6.val.src_ip));
989                 memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
990                        RTE_DIM(ipv6.val.dst_ip));
991                 memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
992                        RTE_DIM(ipv6.mask.src_ip));
993                 memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
994                        RTE_DIM(ipv6.mask.dst_ip));
995                 vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
996                 vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
997                 ipv6.val.flow_label =
998                         rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >>
999                                          IPV6_HDR_FL_SHIFT);
1000                 ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >>
1001                                          IPV6_HDR_TC_SHIFT;
1002                 ipv6.val.next_hdr = spec->hdr.proto;
1003                 ipv6.val.hop_limit = spec->hdr.hop_limits;
1004                 ipv6.mask.flow_label =
1005                         rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >>
1006                                          IPV6_HDR_FL_SHIFT);
1007                 ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >>
1008                                           IPV6_HDR_TC_SHIFT;
1009                 ipv6.mask.next_hdr = mask->hdr.proto;
1010                 ipv6.mask.hop_limit = mask->hdr.hop_limits;
1011                 /* Remove unwanted bits from values. */
1012                 for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
1013                         ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
1014                         ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
1015                 }
1016                 ipv6.val.flow_label &= ipv6.mask.flow_label;
1017                 ipv6.val.traffic_class &= ipv6.mask.traffic_class;
1018                 ipv6.val.next_hdr &= ipv6.mask.next_hdr;
1019                 ipv6.val.hop_limit &= ipv6.mask.hop_limit;
1020         }
1021         flow->l3_protocol_en = !!ipv6.mask.next_hdr;
1022         flow->l3_protocol = ipv6.val.next_hdr;
1023         if (size <= flow_size) {
1024                 mlx5_flow_verbs_hashfields_adjust
1025                         (flow, tunnel,
1026                          (ETH_RSS_IPV6 | ETH_RSS_NONFRAG_IPV6_OTHER),
1027                          (IBV_RX_HASH_SRC_IPV6 | IBV_RX_HASH_DST_IPV6));
1028                 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L3;
1029                 mlx5_flow_spec_verbs_add(flow, &ipv6, size);
1030         }
1031         return size;
1032 }
1033
1034 /**
1035  * Convert the @p item into a Verbs specification after ensuring the NIC
1036  * will understand and process it correctly.
1037  * If the necessary size for the conversion is greater than the @p flow_size,
1038  * nothing is written in @p flow, the validation is still performed.
1039  *
1040  * @param[in] item
1041  *   Item specification.
1042  * @param[in, out] flow
1043  *   Pointer to flow structure.
1044  * @param[in] flow_size
1045  *   Size in bytes of the available space in @p flow, if too small, nothing is
1046  *   written.
1047  * @param[out] error
1048  *   Pointer to error structure.
1049  *
1050  * @return
1051  *   On success the number of bytes consumed/necessary, if the returned value
1052  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
1053  *   otherwise another call with this returned memory size should be done.
1054  *   On error, a negative errno value is returned and rte_errno is set.
1055  */
1056 static int
1057 mlx5_flow_item_udp(const struct rte_flow_item *item, struct rte_flow *flow,
1058                    const size_t flow_size, struct rte_flow_error *error)
1059 {
1060         const struct rte_flow_item_udp *spec = item->spec;
1061         const struct rte_flow_item_udp *mask = item->mask;
1062         const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
1063         unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
1064         struct ibv_flow_spec_tcp_udp udp = {
1065                 .type = IBV_FLOW_SPEC_UDP | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
1066                 .size = size,
1067         };
1068         int ret;
1069
1070         if (flow->l3_protocol_en && flow->l3_protocol != MLX5_IP_PROTOCOL_UDP)
1071                 return rte_flow_error_set(error, ENOTSUP,
1072                                           RTE_FLOW_ERROR_TYPE_ITEM,
1073                                           item,
1074                                           "protocol filtering not compatible"
1075                                           " with UDP layer");
1076         if (!(flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
1077                               MLX5_FLOW_LAYER_OUTER_L3)))
1078                 return rte_flow_error_set(error, ENOTSUP,
1079                                           RTE_FLOW_ERROR_TYPE_ITEM,
1080                                           item,
1081                                           "L3 is mandatory to filter"
1082                                           " on L4");
1083         if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
1084                             MLX5_FLOW_LAYER_OUTER_L4))
1085                 return rte_flow_error_set(error, ENOTSUP,
1086                                           RTE_FLOW_ERROR_TYPE_ITEM,
1087                                           item,
1088                                           "L4 layer is already"
1089                                           " present");
1090         if (!mask)
1091                 mask = &rte_flow_item_udp_mask;
1092         ret = mlx5_flow_item_acceptable
1093                 (item, (const uint8_t *)mask,
1094                  (const uint8_t *)&rte_flow_item_udp_mask,
1095                  sizeof(struct rte_flow_item_udp), error);
1096         if (ret < 0)
1097                 return ret;
1098         flow->layers |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP :
1099                 MLX5_FLOW_LAYER_OUTER_L4_UDP;
1100         if (spec) {
1101                 udp.val.dst_port = spec->hdr.dst_port;
1102                 udp.val.src_port = spec->hdr.src_port;
1103                 udp.mask.dst_port = mask->hdr.dst_port;
1104                 udp.mask.src_port = mask->hdr.src_port;
1105                 /* Remove unwanted bits from values. */
1106                 udp.val.src_port &= udp.mask.src_port;
1107                 udp.val.dst_port &= udp.mask.dst_port;
1108         }
1109         if (size <= flow_size) {
1110                 mlx5_flow_verbs_hashfields_adjust(flow, tunnel, ETH_RSS_UDP,
1111                                                   (IBV_RX_HASH_SRC_PORT_UDP |
1112                                                    IBV_RX_HASH_DST_PORT_UDP));
1113                 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L4;
1114                 mlx5_flow_spec_verbs_add(flow, &udp, size);
1115         }
1116         return size;
1117 }
1118
1119 /**
1120  * Convert the @p item into a Verbs specification after ensuring the NIC
1121  * will understand and process it correctly.
1122  * If the necessary size for the conversion is greater than the @p flow_size,
1123  * nothing is written in @p flow, the validation is still performed.
1124  *
1125  * @param[in] item
1126  *   Item specification.
1127  * @param[in, out] flow
1128  *   Pointer to flow structure.
1129  * @param[in] flow_size
1130  *   Size in bytes of the available space in @p flow, if too small, nothing is
1131  *   written.
1132  * @param[out] error
1133  *   Pointer to error structure.
1134  *
1135  * @return
1136  *   On success the number of bytes consumed/necessary, if the returned value
1137  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
1138  *   otherwise another call with this returned memory size should be done.
1139  *   On error, a negative errno value is returned and rte_errno is set.
1140  */
1141 static int
1142 mlx5_flow_item_tcp(const struct rte_flow_item *item, struct rte_flow *flow,
1143                    const size_t flow_size, struct rte_flow_error *error)
1144 {
1145         const struct rte_flow_item_tcp *spec = item->spec;
1146         const struct rte_flow_item_tcp *mask = item->mask;
1147         const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
1148         unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
1149         struct ibv_flow_spec_tcp_udp tcp = {
1150                 .type = IBV_FLOW_SPEC_TCP | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
1151                 .size = size,
1152         };
1153         int ret;
1154
1155         if (flow->l3_protocol_en && flow->l3_protocol != MLX5_IP_PROTOCOL_TCP)
1156                 return rte_flow_error_set(error, ENOTSUP,
1157                                           RTE_FLOW_ERROR_TYPE_ITEM,
1158                                           item,
1159                                           "protocol filtering not compatible"
1160                                           " with TCP layer");
1161         if (!(flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
1162                               MLX5_FLOW_LAYER_OUTER_L3)))
1163                 return rte_flow_error_set(error, ENOTSUP,
1164                                           RTE_FLOW_ERROR_TYPE_ITEM,
1165                                           item,
1166                                           "L3 is mandatory to filter on L4");
1167         if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
1168                             MLX5_FLOW_LAYER_OUTER_L4))
1169                 return rte_flow_error_set(error, ENOTSUP,
1170                                           RTE_FLOW_ERROR_TYPE_ITEM,
1171                                           item,
1172                                           "L4 layer is already present");
1173         if (!mask)
1174                 mask = &rte_flow_item_tcp_mask;
1175         ret = mlx5_flow_item_acceptable
1176                 (item, (const uint8_t *)mask,
1177                  (const uint8_t *)&rte_flow_item_tcp_mask,
1178                  sizeof(struct rte_flow_item_tcp), error);
1179         if (ret < 0)
1180                 return ret;
1181         flow->layers |=  tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP :
1182                 MLX5_FLOW_LAYER_OUTER_L4_TCP;
1183         if (spec) {
1184                 tcp.val.dst_port = spec->hdr.dst_port;
1185                 tcp.val.src_port = spec->hdr.src_port;
1186                 tcp.mask.dst_port = mask->hdr.dst_port;
1187                 tcp.mask.src_port = mask->hdr.src_port;
1188                 /* Remove unwanted bits from values. */
1189                 tcp.val.src_port &= tcp.mask.src_port;
1190                 tcp.val.dst_port &= tcp.mask.dst_port;
1191         }
1192         if (size <= flow_size) {
1193                 mlx5_flow_verbs_hashfields_adjust(flow, tunnel, ETH_RSS_TCP,
1194                                                   (IBV_RX_HASH_SRC_PORT_TCP |
1195                                                    IBV_RX_HASH_DST_PORT_TCP));
1196                 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L4;
1197                 mlx5_flow_spec_verbs_add(flow, &tcp, size);
1198         }
1199         return size;
1200 }
1201
1202 /**
1203  * Convert the @p item into a Verbs specification after ensuring the NIC
1204  * will understand and process it correctly.
1205  * If the necessary size for the conversion is greater than the @p flow_size,
1206  * nothing is written in @p flow, the validation is still performed.
1207  *
1208  * @param[in] item
1209  *   Item specification.
1210  * @param[in, out] flow
1211  *   Pointer to flow structure.
1212  * @param[in] flow_size
1213  *   Size in bytes of the available space in @p flow, if too small, nothing is
1214  *   written.
1215  * @param[out] error
1216  *   Pointer to error structure.
1217  *
1218  * @return
1219  *   On success the number of bytes consumed/necessary, if the returned value
1220  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
1221  *   otherwise another call with this returned memory size should be done.
1222  *   On error, a negative errno value is returned and rte_errno is set.
1223  */
1224 static int
1225 mlx5_flow_item_vxlan(const struct rte_flow_item *item, struct rte_flow *flow,
1226                      const size_t flow_size, struct rte_flow_error *error)
1227 {
1228         const struct rte_flow_item_vxlan *spec = item->spec;
1229         const struct rte_flow_item_vxlan *mask = item->mask;
1230         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1231         struct ibv_flow_spec_tunnel vxlan = {
1232                 .type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
1233                 .size = size,
1234         };
1235         int ret;
1236         union vni {
1237                 uint32_t vlan_id;
1238                 uint8_t vni[4];
1239         } id = { .vlan_id = 0, };
1240
1241         if (flow->layers & MLX5_FLOW_LAYER_TUNNEL)
1242                 return rte_flow_error_set(error, ENOTSUP,
1243                                           RTE_FLOW_ERROR_TYPE_ITEM,
1244                                           item,
1245                                           "a tunnel is already present");
1246         /*
1247          * Verify only UDPv4 is present as defined in
1248          * https://tools.ietf.org/html/rfc7348
1249          */
1250         if (!(flow->layers & MLX5_FLOW_LAYER_OUTER_L4_UDP))
1251                 return rte_flow_error_set(error, ENOTSUP,
1252                                           RTE_FLOW_ERROR_TYPE_ITEM,
1253                                           item,
1254                                           "no outer UDP layer found");
1255         if (!mask)
1256                 mask = &rte_flow_item_vxlan_mask;
1257         ret = mlx5_flow_item_acceptable
1258                 (item, (const uint8_t *)mask,
1259                  (const uint8_t *)&rte_flow_item_vxlan_mask,
1260                  sizeof(struct rte_flow_item_vxlan), error);
1261         if (ret < 0)
1262                 return ret;
1263         if (spec) {
1264                 memcpy(&id.vni[1], spec->vni, 3);
1265                 vxlan.val.tunnel_id = id.vlan_id;
1266                 memcpy(&id.vni[1], mask->vni, 3);
1267                 vxlan.mask.tunnel_id = id.vlan_id;
1268                 /* Remove unwanted bits from values. */
1269                 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
1270         }
1271         /*
1272          * Tunnel id 0 is equivalent as not adding a VXLAN layer, if
1273          * only this layer is defined in the Verbs specification it is
1274          * interpreted as wildcard and all packets will match this
1275          * rule, if it follows a full stack layer (ex: eth / ipv4 /
1276          * udp), all packets matching the layers before will also
1277          * match this rule.  To avoid such situation, VNI 0 is
1278          * currently refused.
1279          */
1280         if (!vxlan.val.tunnel_id)
1281                 return rte_flow_error_set(error, EINVAL,
1282                                           RTE_FLOW_ERROR_TYPE_ITEM,
1283                                           item,
1284                                           "VXLAN vni cannot be 0");
1285         if (!(flow->layers & MLX5_FLOW_LAYER_OUTER))
1286                 return rte_flow_error_set(error, EINVAL,
1287                                           RTE_FLOW_ERROR_TYPE_ITEM,
1288                                           item,
1289                                           "VXLAN tunnel must be fully defined");
1290         if (size <= flow_size) {
1291                 mlx5_flow_spec_verbs_add(flow, &vxlan, size);
1292                 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2;
1293         }
1294         flow->layers |= MLX5_FLOW_LAYER_VXLAN;
1295         return size;
1296 }
1297
1298 /**
1299  * Convert the @p item into a Verbs specification after ensuring the NIC
1300  * will understand and process it correctly.
1301  * If the necessary size for the conversion is greater than the @p flow_size,
1302  * nothing is written in @p flow, the validation is still performed.
1303  *
1304  * @param dev
1305  *   Pointer to Ethernet device.
1306  * @param[in] item
1307  *   Item specification.
1308  * @param[in, out] flow
1309  *   Pointer to flow structure.
1310  * @param[in] flow_size
1311  *   Size in bytes of the available space in @p flow, if too small, nothing is
1312  *   written.
1313  * @param[out] error
1314  *   Pointer to error structure.
1315  *
1316  * @return
1317  *   On success the number of bytes consumed/necessary, if the returned value
1318  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
1319  *   otherwise another call with this returned memory size should be done.
1320  *   On error, a negative errno value is returned and rte_errno is set.
1321  */
1322 static int
1323 mlx5_flow_item_vxlan_gpe(struct rte_eth_dev *dev,
1324                          const struct rte_flow_item *item,
1325                          struct rte_flow *flow, const size_t flow_size,
1326                          struct rte_flow_error *error)
1327 {
1328         const struct rte_flow_item_vxlan_gpe *spec = item->spec;
1329         const struct rte_flow_item_vxlan_gpe *mask = item->mask;
1330         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1331         struct ibv_flow_spec_tunnel vxlan_gpe = {
1332                 .type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
1333                 .size = size,
1334         };
1335         int ret;
1336         union vni {
1337                 uint32_t vlan_id;
1338                 uint8_t vni[4];
1339         } id = { .vlan_id = 0, };
1340
1341         if (!((struct priv *)dev->data->dev_private)->config.l3_vxlan_en)
1342                 return rte_flow_error_set(error, ENOTSUP,
1343                                           RTE_FLOW_ERROR_TYPE_ITEM,
1344                                           item,
1345                                           "L3 VXLAN is not enabled by device"
1346                                           " parameter and/or not configured in"
1347                                           " firmware");
1348         if (flow->layers & MLX5_FLOW_LAYER_TUNNEL)
1349                 return rte_flow_error_set(error, ENOTSUP,
1350                                           RTE_FLOW_ERROR_TYPE_ITEM,
1351                                           item,
1352                                           "a tunnel is already present");
1353         /*
1354          * Verify only UDPv4 is present as defined in
1355          * https://tools.ietf.org/html/rfc7348
1356          */
1357         if (!(flow->layers & MLX5_FLOW_LAYER_OUTER_L4_UDP))
1358                 return rte_flow_error_set(error, ENOTSUP,
1359                                           RTE_FLOW_ERROR_TYPE_ITEM,
1360                                           item,
1361                                           "no outer UDP layer found");
1362         if (!mask)
1363                 mask = &rte_flow_item_vxlan_gpe_mask;
1364         ret = mlx5_flow_item_acceptable
1365                 (item, (const uint8_t *)mask,
1366                  (const uint8_t *)&rte_flow_item_vxlan_gpe_mask,
1367                  sizeof(struct rte_flow_item_vxlan_gpe), error);
1368         if (ret < 0)
1369                 return ret;
1370         if (spec) {
1371                 memcpy(&id.vni[1], spec->vni, 3);
1372                 vxlan_gpe.val.tunnel_id = id.vlan_id;
1373                 memcpy(&id.vni[1], mask->vni, 3);
1374                 vxlan_gpe.mask.tunnel_id = id.vlan_id;
1375                 if (spec->protocol)
1376                         return rte_flow_error_set
1377                                 (error, EINVAL,
1378                                  RTE_FLOW_ERROR_TYPE_ITEM,
1379                                  item,
1380                                  "VxLAN-GPE protocol not supported");
1381                 /* Remove unwanted bits from values. */
1382                 vxlan_gpe.val.tunnel_id &= vxlan_gpe.mask.tunnel_id;
1383         }
1384         /*
1385          * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
1386          * layer is defined in the Verbs specification it is interpreted as
1387          * wildcard and all packets will match this rule, if it follows a full
1388          * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
1389          * before will also match this rule.  To avoid such situation, VNI 0
1390          * is currently refused.
1391          */
1392         if (!vxlan_gpe.val.tunnel_id)
1393                 return rte_flow_error_set(error, EINVAL,
1394                                           RTE_FLOW_ERROR_TYPE_ITEM,
1395                                           item,
1396                                           "VXLAN-GPE vni cannot be 0");
1397         if (!(flow->layers & MLX5_FLOW_LAYER_OUTER))
1398                 return rte_flow_error_set(error, EINVAL,
1399                                           RTE_FLOW_ERROR_TYPE_ITEM,
1400                                           item,
1401                                           "VXLAN-GPE tunnel must be fully"
1402                                           " defined");
1403         if (size <= flow_size) {
1404                 mlx5_flow_spec_verbs_add(flow, &vxlan_gpe, size);
1405                 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2;
1406         }
1407         flow->layers |= MLX5_FLOW_LAYER_VXLAN_GPE;
1408         return size;
1409 }
1410
1411 /**
1412  * Convert the @p pattern into a Verbs specifications after ensuring the NIC
1413  * will understand and process it correctly.
1414  * The conversion is performed item per item, each of them is written into
1415  * the @p flow if its size is lesser or equal to @p flow_size.
1416  * Validation and memory consumption computation are still performed until the
1417  * end of @p pattern, unless an error is encountered.
1418  *
1419  * @param[in] pattern
1420  *   Flow pattern.
1421  * @param[in, out] flow
1422  *   Pointer to the rte_flow structure.
1423  * @param[in] flow_size
1424  *   Size in bytes of the available space in @p flow, if too small some
1425  *   garbage may be present.
1426  * @param[out] error
1427  *   Pointer to error structure.
1428  *
1429  * @return
1430  *   On success the number of bytes consumed/necessary, if the returned value
1431  *   is lesser or equal to @p flow_size, the @pattern  has fully been
1432  *   converted, otherwise another call with this returned memory size should
1433  *   be done.
1434  *   On error, a negative errno value is returned and rte_errno is set.
1435  */
1436 static int
1437 mlx5_flow_items(struct rte_eth_dev *dev,
1438                 const struct rte_flow_item pattern[],
1439                 struct rte_flow *flow, const size_t flow_size,
1440                 struct rte_flow_error *error)
1441 {
1442         int remain = flow_size;
1443         size_t size = 0;
1444
1445         for (; pattern->type != RTE_FLOW_ITEM_TYPE_END; pattern++) {
1446                 int ret = 0;
1447
1448                 switch (pattern->type) {
1449                 case RTE_FLOW_ITEM_TYPE_VOID:
1450                         break;
1451                 case RTE_FLOW_ITEM_TYPE_ETH:
1452                         ret = mlx5_flow_item_eth(pattern, flow, remain, error);
1453                         break;
1454                 case RTE_FLOW_ITEM_TYPE_VLAN:
1455                         ret = mlx5_flow_item_vlan(pattern, flow, remain, error);
1456                         break;
1457                 case RTE_FLOW_ITEM_TYPE_IPV4:
1458                         ret = mlx5_flow_item_ipv4(pattern, flow, remain, error);
1459                         break;
1460                 case RTE_FLOW_ITEM_TYPE_IPV6:
1461                         ret = mlx5_flow_item_ipv6(pattern, flow, remain, error);
1462                         break;
1463                 case RTE_FLOW_ITEM_TYPE_UDP:
1464                         ret = mlx5_flow_item_udp(pattern, flow, remain, error);
1465                         break;
1466                 case RTE_FLOW_ITEM_TYPE_TCP:
1467                         ret = mlx5_flow_item_tcp(pattern, flow, remain, error);
1468                         break;
1469                 case RTE_FLOW_ITEM_TYPE_VXLAN:
1470                         ret = mlx5_flow_item_vxlan(pattern, flow, remain,
1471                                                    error);
1472                         break;
1473                 case RTE_FLOW_ITEM_TYPE_VXLAN_GPE:
1474                         ret = mlx5_flow_item_vxlan_gpe(dev, pattern, flow,
1475                                                        remain, error);
1476                         break;
1477                 default:
1478                         return rte_flow_error_set(error, ENOTSUP,
1479                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1480                                                   pattern,
1481                                                   "item not supported");
1482                 }
1483                 if (ret < 0)
1484                         return ret;
1485                 if (remain > ret)
1486                         remain -= ret;
1487                 else
1488                         remain = 0;
1489                 size += ret;
1490         }
1491         if (!flow->layers) {
1492                 const struct rte_flow_item item = {
1493                         .type = RTE_FLOW_ITEM_TYPE_ETH,
1494                 };
1495
1496                 return mlx5_flow_item_eth(&item, flow, flow_size, error);
1497         }
1498         return size;
1499 }
1500
1501 /**
1502  * Convert the @p action into a Verbs specification after ensuring the NIC
1503  * will understand and process it correctly.
1504  * If the necessary size for the conversion is greater than the @p flow_size,
1505  * nothing is written in @p flow, the validation is still performed.
1506  *
1507  * @param[in] action
1508  *   Action configuration.
1509  * @param[in, out] flow
1510  *   Pointer to flow structure.
1511  * @param[in] flow_size
1512  *   Size in bytes of the available space in @p flow, if too small, nothing is
1513  *   written.
1514  * @param[out] error
1515  *   Pointer to error structure.
1516  *
1517  * @return
1518  *   On success the number of bytes consumed/necessary, if the returned value
1519  *   is lesser or equal to @p flow_size, the @p action has fully been
1520  *   converted, otherwise another call with this returned memory size should
1521  *   be done.
1522  *   On error, a negative errno value is returned and rte_errno is set.
1523  */
1524 static int
1525 mlx5_flow_action_drop(const struct rte_flow_action *action,
1526                       struct rte_flow *flow, const size_t flow_size,
1527                       struct rte_flow_error *error)
1528 {
1529         unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
1530         struct ibv_flow_spec_action_drop drop = {
1531                         .type = IBV_FLOW_SPEC_ACTION_DROP,
1532                         .size = size,
1533         };
1534
1535         if (flow->fate)
1536                 return rte_flow_error_set(error, ENOTSUP,
1537                                           RTE_FLOW_ERROR_TYPE_ACTION,
1538                                           action,
1539                                           "multiple fate actions are not"
1540                                           " supported");
1541         if (flow->modifier & (MLX5_FLOW_MOD_FLAG | MLX5_FLOW_MOD_MARK))
1542                 return rte_flow_error_set(error, ENOTSUP,
1543                                           RTE_FLOW_ERROR_TYPE_ACTION,
1544                                           action,
1545                                           "drop is not compatible with"
1546                                           " flag/mark action");
1547         if (size < flow_size)
1548                 mlx5_flow_spec_verbs_add(flow, &drop, size);
1549         flow->fate |= MLX5_FLOW_FATE_DROP;
1550         return size;
1551 }
1552
1553 /**
1554  * Convert the @p action into @p flow after ensuring the NIC will understand
1555  * and process it correctly.
1556  *
1557  * @param[in] dev
1558  *   Pointer to Ethernet device structure.
1559  * @param[in] action
1560  *   Action configuration.
1561  * @param[in, out] flow
1562  *   Pointer to flow structure.
1563  * @param[out] error
1564  *   Pointer to error structure.
1565  *
1566  * @return
1567  *   0 on success, a negative errno value otherwise and rte_errno is set.
1568  */
1569 static int
1570 mlx5_flow_action_queue(struct rte_eth_dev *dev,
1571                        const struct rte_flow_action *action,
1572                        struct rte_flow *flow,
1573                        struct rte_flow_error *error)
1574 {
1575         struct priv *priv = dev->data->dev_private;
1576         const struct rte_flow_action_queue *queue = action->conf;
1577
1578         if (flow->fate)
1579                 return rte_flow_error_set(error, ENOTSUP,
1580                                           RTE_FLOW_ERROR_TYPE_ACTION,
1581                                           action,
1582                                           "multiple fate actions are not"
1583                                           " supported");
1584         if (queue->index >= priv->rxqs_n)
1585                 return rte_flow_error_set(error, EINVAL,
1586                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1587                                           &queue->index,
1588                                           "queue index out of range");
1589         if (!(*priv->rxqs)[queue->index])
1590                 return rte_flow_error_set(error, EINVAL,
1591                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1592                                           &queue->index,
1593                                           "queue is not configured");
1594         if (flow->queue)
1595                 (*flow->queue)[0] = queue->index;
1596         flow->rss.queue_num = 1;
1597         flow->fate |= MLX5_FLOW_FATE_QUEUE;
1598         return 0;
1599 }
1600
1601 /**
1602  * Ensure the @p action will be understood and used correctly by the  NIC.
1603  *
1604  * @param dev
1605  *   Pointer to Ethernet device structure.
1606  * @param action[in]
1607  *   Pointer to flow actions array.
1608  * @param flow[in, out]
1609  *   Pointer to the rte_flow structure.
1610  * @param error[in, out]
1611  *   Pointer to error structure.
1612  *
1613  * @return
1614  *   On success @p flow->queue array and @p flow->rss are filled and valid.
1615  *   On error, a negative errno value is returned and rte_errno is set.
1616  */
1617 static int
1618 mlx5_flow_action_rss(struct rte_eth_dev *dev,
1619                      const struct rte_flow_action *action,
1620                      struct rte_flow *flow,
1621                      struct rte_flow_error *error)
1622 {
1623         struct priv *priv = dev->data->dev_private;
1624         const struct rte_flow_action_rss *rss = action->conf;
1625         unsigned int i;
1626
1627         if (flow->fate)
1628                 return rte_flow_error_set(error, ENOTSUP,
1629                                           RTE_FLOW_ERROR_TYPE_ACTION,
1630                                           action,
1631                                           "multiple fate actions are not"
1632                                           " supported");
1633         if (rss->func != RTE_ETH_HASH_FUNCTION_DEFAULT &&
1634             rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ)
1635                 return rte_flow_error_set(error, ENOTSUP,
1636                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1637                                           &rss->func,
1638                                           "RSS hash function not supported");
1639 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
1640         if (rss->level > 2)
1641 #else
1642         if (rss->level > 1)
1643 #endif
1644                 return rte_flow_error_set(error, ENOTSUP,
1645                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1646                                           &rss->level,
1647                                           "tunnel RSS is not supported");
1648         if (rss->key_len < MLX5_RSS_HASH_KEY_LEN)
1649                 return rte_flow_error_set(error, ENOTSUP,
1650                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1651                                           &rss->key_len,
1652                                           "RSS hash key too small");
1653         if (rss->key_len > MLX5_RSS_HASH_KEY_LEN)
1654                 return rte_flow_error_set(error, ENOTSUP,
1655                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1656                                           &rss->key_len,
1657                                           "RSS hash key too large");
1658         if (rss->queue_num > priv->config.ind_table_max_size)
1659                 return rte_flow_error_set(error, ENOTSUP,
1660                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1661                                           &rss->queue_num,
1662                                           "number of queues too large");
1663         if (rss->types & MLX5_RSS_HF_MASK)
1664                 return rte_flow_error_set(error, ENOTSUP,
1665                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1666                                           &rss->types,
1667                                           "some RSS protocols are not"
1668                                           " supported");
1669         for (i = 0; i != rss->queue_num; ++i) {
1670                 if (!(*priv->rxqs)[rss->queue[i]])
1671                         return rte_flow_error_set
1672                                 (error, EINVAL,
1673                                  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1674                                  &rss->queue[i],
1675                                  "queue is not configured");
1676         }
1677         if (flow->queue)
1678                 memcpy((*flow->queue), rss->queue,
1679                        rss->queue_num * sizeof(uint16_t));
1680         flow->rss.queue_num = rss->queue_num;
1681         memcpy(flow->key, rss->key, MLX5_RSS_HASH_KEY_LEN);
1682         flow->rss.types = rss->types;
1683         flow->rss.level = rss->level;
1684         flow->fate |= MLX5_FLOW_FATE_RSS;
1685         return 0;
1686 }
1687
1688 /**
1689  * Convert the @p action into a Verbs specification after ensuring the NIC
1690  * will understand and process it correctly.
1691  * If the necessary size for the conversion is greater than the @p flow_size,
1692  * nothing is written in @p flow, the validation is still performed.
1693  *
1694  * @param[in] action
1695  *   Action configuration.
1696  * @param[in, out] flow
1697  *   Pointer to flow structure.
1698  * @param[in] flow_size
1699  *   Size in bytes of the available space in @p flow, if too small, nothing is
1700  *   written.
1701  * @param[out] error
1702  *   Pointer to error structure.
1703  *
1704  * @return
1705  *   On success the number of bytes consumed/necessary, if the returned value
1706  *   is lesser or equal to @p flow_size, the @p action has fully been
1707  *   converted, otherwise another call with this returned memory size should
1708  *   be done.
1709  *   On error, a negative errno value is returned and rte_errno is set.
1710  */
1711 static int
1712 mlx5_flow_action_flag(const struct rte_flow_action *action,
1713                       struct rte_flow *flow, const size_t flow_size,
1714                       struct rte_flow_error *error)
1715 {
1716         unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1717         struct ibv_flow_spec_action_tag tag = {
1718                 .type = IBV_FLOW_SPEC_ACTION_TAG,
1719                 .size = size,
1720                 .tag_id = mlx5_flow_mark_set(MLX5_FLOW_MARK_DEFAULT),
1721         };
1722         struct mlx5_flow_verbs *verbs = flow->cur_verbs;
1723
1724         if (flow->modifier & MLX5_FLOW_MOD_FLAG)
1725                 return rte_flow_error_set(error, ENOTSUP,
1726                                           RTE_FLOW_ERROR_TYPE_ACTION,
1727                                           action,
1728                                           "flag action already present");
1729         if (flow->fate & MLX5_FLOW_FATE_DROP)
1730                 return rte_flow_error_set(error, ENOTSUP,
1731                                           RTE_FLOW_ERROR_TYPE_ACTION,
1732                                           action,
1733                                           "flag is not compatible with drop"
1734                                           " action");
1735         if (flow->modifier & MLX5_FLOW_MOD_MARK)
1736                 size = 0;
1737         else if (size <= flow_size && verbs)
1738                 mlx5_flow_spec_verbs_add(flow, &tag, size);
1739         flow->modifier |= MLX5_FLOW_MOD_FLAG;
1740         return size;
1741 }
1742
1743 /**
1744  * Update verbs specification to modify the flag to mark.
1745  *
1746  * @param[in, out] verbs
1747  *   Pointer to the mlx5_flow_verbs structure.
1748  * @param[in] mark_id
1749  *   Mark identifier to replace the flag.
1750  */
1751 static void
1752 mlx5_flow_verbs_mark_update(struct mlx5_flow_verbs *verbs, uint32_t mark_id)
1753 {
1754         struct ibv_spec_header *hdr;
1755         int i;
1756
1757         if (!verbs)
1758                 return;
1759         /* Update Verbs specification. */
1760         hdr = (struct ibv_spec_header *)verbs->specs;
1761         if (!hdr)
1762                 return;
1763         for (i = 0; i != verbs->attr->num_of_specs; ++i) {
1764                 if (hdr->type == IBV_FLOW_SPEC_ACTION_TAG) {
1765                         struct ibv_flow_spec_action_tag *t =
1766                                 (struct ibv_flow_spec_action_tag *)hdr;
1767
1768                         t->tag_id = mlx5_flow_mark_set(mark_id);
1769                 }
1770                 hdr = (struct ibv_spec_header *)((uintptr_t)hdr + hdr->size);
1771         }
1772 }
1773
1774 /**
1775  * Convert the @p action into @p flow (or by updating the already present
1776  * Flag Verbs specification) after ensuring the NIC will understand and
1777  * process it correctly.
1778  * If the necessary size for the conversion is greater than the @p flow_size,
1779  * nothing is written in @p flow, the validation is still performed.
1780  *
1781  * @param[in] action
1782  *   Action configuration.
1783  * @param[in, out] flow
1784  *   Pointer to flow structure.
1785  * @param[in] flow_size
1786  *   Size in bytes of the available space in @p flow, if too small, nothing is
1787  *   written.
1788  * @param[out] error
1789  *   Pointer to error structure.
1790  *
1791  * @return
1792  *   On success the number of bytes consumed/necessary, if the returned value
1793  *   is lesser or equal to @p flow_size, the @p action has fully been
1794  *   converted, otherwise another call with this returned memory size should
1795  *   be done.
1796  *   On error, a negative errno value is returned and rte_errno is set.
1797  */
1798 static int
1799 mlx5_flow_action_mark(const struct rte_flow_action *action,
1800                       struct rte_flow *flow, const size_t flow_size,
1801                       struct rte_flow_error *error)
1802 {
1803         const struct rte_flow_action_mark *mark = action->conf;
1804         unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1805         struct ibv_flow_spec_action_tag tag = {
1806                 .type = IBV_FLOW_SPEC_ACTION_TAG,
1807                 .size = size,
1808         };
1809         struct mlx5_flow_verbs *verbs = flow->cur_verbs;
1810
1811         if (!mark)
1812                 return rte_flow_error_set(error, EINVAL,
1813                                           RTE_FLOW_ERROR_TYPE_ACTION,
1814                                           action,
1815                                           "configuration cannot be null");
1816         if (mark->id >= MLX5_FLOW_MARK_MAX)
1817                 return rte_flow_error_set(error, EINVAL,
1818                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1819                                           &mark->id,
1820                                           "mark id must in 0 <= id < "
1821                                           RTE_STR(MLX5_FLOW_MARK_MAX));
1822         if (flow->modifier & MLX5_FLOW_MOD_MARK)
1823                 return rte_flow_error_set(error, ENOTSUP,
1824                                           RTE_FLOW_ERROR_TYPE_ACTION,
1825                                           action,
1826                                           "mark action already present");
1827         if (flow->fate & MLX5_FLOW_FATE_DROP)
1828                 return rte_flow_error_set(error, ENOTSUP,
1829                                           RTE_FLOW_ERROR_TYPE_ACTION,
1830                                           action,
1831                                           "mark is not compatible with drop"
1832                                           " action");
1833         if (flow->modifier & MLX5_FLOW_MOD_FLAG) {
1834                 mlx5_flow_verbs_mark_update(verbs, mark->id);
1835                 size = 0;
1836         } else if (size <= flow_size) {
1837                 tag.tag_id = mlx5_flow_mark_set(mark->id);
1838                 mlx5_flow_spec_verbs_add(flow, &tag, size);
1839         }
1840         flow->modifier |= MLX5_FLOW_MOD_MARK;
1841         return size;
1842 }
1843
1844 /**
1845  * Convert the @p action into @p flow after ensuring the NIC will understand
1846  * and process it correctly.
1847  * The conversion is performed action per action, each of them is written into
1848  * the @p flow if its size is lesser or equal to @p flow_size.
1849  * Validation and memory consumption computation are still performed until the
1850  * end of @p action, unless an error is encountered.
1851  *
1852  * @param[in] dev
1853  *   Pointer to Ethernet device structure.
1854  * @param[in] actions
1855  *   Pointer to flow actions array.
1856  * @param[in, out] flow
1857  *   Pointer to the rte_flow structure.
1858  * @param[in] flow_size
1859  *   Size in bytes of the available space in @p flow, if too small some
1860  *   garbage may be present.
1861  * @param[out] error
1862  *   Pointer to error structure.
1863  *
1864  * @return
1865  *   On success the number of bytes consumed/necessary, if the returned value
1866  *   is lesser or equal to @p flow_size, the @p actions has fully been
1867  *   converted, otherwise another call with this returned memory size should
1868  *   be done.
1869  *   On error, a negative errno value is returned and rte_errno is set.
1870  */
1871 static int
1872 mlx5_flow_actions(struct rte_eth_dev *dev,
1873                   const struct rte_flow_action actions[],
1874                   struct rte_flow *flow, const size_t flow_size,
1875                   struct rte_flow_error *error)
1876 {
1877         size_t size = 0;
1878         int remain = flow_size;
1879         int ret = 0;
1880
1881         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1882                 switch (actions->type) {
1883                 case RTE_FLOW_ACTION_TYPE_VOID:
1884                         break;
1885                 case RTE_FLOW_ACTION_TYPE_FLAG:
1886                         ret = mlx5_flow_action_flag(actions, flow, remain,
1887                                                     error);
1888                         break;
1889                 case RTE_FLOW_ACTION_TYPE_MARK:
1890                         ret = mlx5_flow_action_mark(actions, flow, remain,
1891                                                     error);
1892                         break;
1893                 case RTE_FLOW_ACTION_TYPE_DROP:
1894                         ret = mlx5_flow_action_drop(actions, flow, remain,
1895                                                     error);
1896                         break;
1897                 case RTE_FLOW_ACTION_TYPE_QUEUE:
1898                         ret = mlx5_flow_action_queue(dev, actions, flow, error);
1899                         break;
1900                 case RTE_FLOW_ACTION_TYPE_RSS:
1901                         ret = mlx5_flow_action_rss(dev, actions, flow, error);
1902                         break;
1903                 default:
1904                         return rte_flow_error_set(error, ENOTSUP,
1905                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1906                                                   actions,
1907                                                   "action not supported");
1908                 }
1909                 if (ret < 0)
1910                         return ret;
1911                 if (remain > ret)
1912                         remain -= ret;
1913                 else
1914                         remain = 0;
1915                 size += ret;
1916         }
1917         if (!flow->fate)
1918                 return rte_flow_error_set(error, ENOTSUP,
1919                                           RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1920                                           NULL,
1921                                           "no fate action found");
1922         return size;
1923 }
1924
1925 /**
1926  * Convert the @p attributes, @p pattern, @p action, into an flow for the NIC
1927  * after ensuring the NIC will understand and process it correctly.
1928  * The conversion is only performed item/action per item/action, each of
1929  * them is written into the @p flow if its size is lesser or equal to @p
1930  * flow_size.
1931  * Validation and memory consumption computation are still performed until the
1932  * end, unless an error is encountered.
1933  *
1934  * @param[in] dev
1935  *   Pointer to Ethernet device.
1936  * @param[in, out] flow
1937  *   Pointer to flow structure.
1938  * @param[in] flow_size
1939  *   Size in bytes of the available space in @p flow, if too small some
1940  *   garbage may be present.
1941  * @param[in] attributes
1942  *   Flow rule attributes.
1943  * @param[in] pattern
1944  *   Pattern specification (list terminated by the END pattern item).
1945  * @param[in] actions
1946  *   Associated actions (list terminated by the END action).
1947  * @param[out] error
1948  *   Perform verbose error reporting if not NULL.
1949  *
1950  * @return
1951  *   On success the number of bytes consumed/necessary, if the returned value
1952  *   is lesser or equal to @p flow_size, the flow has fully been converted and
1953  *   can be applied, otherwise another call with this returned memory size
1954  *   should be done.
1955  *   On error, a negative errno value is returned and rte_errno is set.
1956  */
1957 static int
1958 mlx5_flow_merge(struct rte_eth_dev *dev, struct rte_flow *flow,
1959                 const size_t flow_size,
1960                 const struct rte_flow_attr *attributes,
1961                 const struct rte_flow_item pattern[],
1962                 const struct rte_flow_action actions[],
1963                 struct rte_flow_error *error)
1964 {
1965         struct rte_flow local_flow = { .layers = 0, };
1966         size_t size = sizeof(*flow);
1967         union {
1968                 struct rte_flow_expand_rss buf;
1969                 uint8_t buffer[2048];
1970         } expand_buffer;
1971         struct rte_flow_expand_rss *buf = &expand_buffer.buf;
1972         struct mlx5_flow_verbs *original_verbs = NULL;
1973         size_t original_verbs_size = 0;
1974         uint32_t original_layers = 0;
1975         int expanded_pattern_idx = 0;
1976         int ret;
1977         uint32_t i;
1978
1979         if (size > flow_size)
1980                 flow = &local_flow;
1981         ret = mlx5_flow_attributes(dev, attributes, flow, error);
1982         if (ret < 0)
1983                 return ret;
1984         ret = mlx5_flow_actions(dev, actions, &local_flow, 0, error);
1985         if (ret < 0)
1986                 return ret;
1987         if (local_flow.rss.types) {
1988                 ret = rte_flow_expand_rss(buf, sizeof(expand_buffer.buffer),
1989                                           pattern, local_flow.rss.types,
1990                                           mlx5_support_expansion,
1991                                           local_flow.rss.level < 2 ?
1992                                           MLX5_EXPANSION_ROOT :
1993                                           MLX5_EXPANSION_ROOT_OUTER);
1994                 assert(ret > 0 &&
1995                        (unsigned int)ret < sizeof(expand_buffer.buffer));
1996         } else {
1997                 buf->entries = 1;
1998                 buf->entry[0].pattern = (void *)(uintptr_t)pattern;
1999         }
2000         size += RTE_ALIGN_CEIL(local_flow.rss.queue_num * sizeof(uint16_t),
2001                                sizeof(void *));
2002         if (size <= flow_size)
2003                 flow->queue = (void *)(flow + 1);
2004         LIST_INIT(&flow->verbs);
2005         flow->layers = 0;
2006         flow->modifier = 0;
2007         flow->fate = 0;
2008         for (i = 0; i != buf->entries; ++i) {
2009                 size_t off = size;
2010                 size_t off2;
2011
2012                 flow->layers = original_layers;
2013                 size += sizeof(struct ibv_flow_attr) +
2014                         sizeof(struct mlx5_flow_verbs);
2015                 off2 = size;
2016                 if (size < flow_size) {
2017                         flow->cur_verbs = (void *)((uintptr_t)flow + off);
2018                         flow->cur_verbs->attr = (void *)(flow->cur_verbs + 1);
2019                         flow->cur_verbs->specs =
2020                                 (void *)(flow->cur_verbs->attr + 1);
2021                 }
2022                 /* First iteration convert the pattern into Verbs. */
2023                 if (i == 0) {
2024                         /* Actions don't need to be converted several time. */
2025                         ret = mlx5_flow_actions(dev, actions, flow,
2026                                                 (size < flow_size) ?
2027                                                 flow_size - size : 0,
2028                                                 error);
2029                         if (ret < 0)
2030                                 return ret;
2031                         size += ret;
2032                 } else {
2033                         /*
2034                          * Next iteration means the pattern has already been
2035                          * converted and an expansion is necessary to match
2036                          * the user RSS request.  For that only the expanded
2037                          * items will be converted, the common part with the
2038                          * user pattern are just copied into the next buffer
2039                          * zone.
2040                          */
2041                         size += original_verbs_size;
2042                         if (size < flow_size) {
2043                                 rte_memcpy(flow->cur_verbs->attr,
2044                                            original_verbs->attr,
2045                                            original_verbs_size +
2046                                            sizeof(struct ibv_flow_attr));
2047                                 flow->cur_verbs->size = original_verbs_size;
2048                         }
2049                 }
2050                 ret = mlx5_flow_items
2051                         (dev,
2052                          (const struct rte_flow_item *)
2053                          &buf->entry[i].pattern[expanded_pattern_idx],
2054                          flow,
2055                          (size < flow_size) ? flow_size - size : 0, error);
2056                 if (ret < 0)
2057                         return ret;
2058                 size += ret;
2059                 if (size <= flow_size) {
2060                         mlx5_flow_adjust_priority(dev, flow);
2061                         LIST_INSERT_HEAD(&flow->verbs, flow->cur_verbs, next);
2062                 }
2063                 /*
2064                  * Keep a pointer of the first verbs conversion and the layers
2065                  * it has encountered.
2066                  */
2067                 if (i == 0) {
2068                         original_verbs = flow->cur_verbs;
2069                         original_verbs_size = size - off2;
2070                         original_layers = flow->layers;
2071                         /*
2072                          * move the index of the expanded pattern to the
2073                          * first item not addressed yet.
2074                          */
2075                         if (pattern->type == RTE_FLOW_ITEM_TYPE_END) {
2076                                 expanded_pattern_idx++;
2077                         } else {
2078                                 const struct rte_flow_item *item = pattern;
2079
2080                                 for (item = pattern;
2081                                      item->type != RTE_FLOW_ITEM_TYPE_END;
2082                                      ++item)
2083                                         expanded_pattern_idx++;
2084                         }
2085                 }
2086         }
2087         /* Restore the origin layers in the flow. */
2088         flow->layers = original_layers;
2089         return size;
2090 }
2091
2092 /**
2093  * Lookup and set the ptype in the data Rx part.  A single Ptype can be used,
2094  * if several tunnel rules are used on this queue, the tunnel ptype will be
2095  * cleared.
2096  *
2097  * @param rxq_ctrl
2098  *   Rx queue to update.
2099  */
2100 static void
2101 mlx5_flow_rxq_tunnel_ptype_update(struct mlx5_rxq_ctrl *rxq_ctrl)
2102 {
2103         unsigned int i;
2104         uint32_t tunnel_ptype = 0;
2105
2106         /* Look up for the ptype to use. */
2107         for (i = 0; i != MLX5_FLOW_TUNNEL; ++i) {
2108                 if (!rxq_ctrl->flow_tunnels_n[i])
2109                         continue;
2110                 if (!tunnel_ptype) {
2111                         tunnel_ptype = tunnels_info[i].ptype;
2112                 } else {
2113                         tunnel_ptype = 0;
2114                         break;
2115                 }
2116         }
2117         rxq_ctrl->rxq.tunnel = tunnel_ptype;
2118 }
2119
2120 /**
2121  * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) according to the flow.
2122  *
2123  * @param[in] dev
2124  *   Pointer to Ethernet device.
2125  * @param[in] flow
2126  *   Pointer to flow structure.
2127  */
2128 static void
2129 mlx5_flow_rxq_flags_set(struct rte_eth_dev *dev, struct rte_flow *flow)
2130 {
2131         struct priv *priv = dev->data->dev_private;
2132         const int mark = !!(flow->modifier &
2133                             (MLX5_FLOW_MOD_FLAG | MLX5_FLOW_MOD_MARK));
2134         const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
2135         unsigned int i;
2136
2137         for (i = 0; i != flow->rss.queue_num; ++i) {
2138                 int idx = (*flow->queue)[i];
2139                 struct mlx5_rxq_ctrl *rxq_ctrl =
2140                         container_of((*priv->rxqs)[idx],
2141                                      struct mlx5_rxq_ctrl, rxq);
2142
2143                 if (mark) {
2144                         rxq_ctrl->rxq.mark = 1;
2145                         rxq_ctrl->flow_mark_n++;
2146                 }
2147                 if (tunnel) {
2148                         unsigned int j;
2149
2150                         /* Increase the counter matching the flow. */
2151                         for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) {
2152                                 if ((tunnels_info[j].tunnel & flow->layers) ==
2153                                     tunnels_info[j].tunnel) {
2154                                         rxq_ctrl->flow_tunnels_n[j]++;
2155                                         break;
2156                                 }
2157                         }
2158                         mlx5_flow_rxq_tunnel_ptype_update(rxq_ctrl);
2159                 }
2160         }
2161 }
2162
2163 /**
2164  * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the
2165  * @p flow if no other flow uses it with the same kind of request.
2166  *
2167  * @param dev
2168  *   Pointer to Ethernet device.
2169  * @param[in] flow
2170  *   Pointer to the flow.
2171  */
2172 static void
2173 mlx5_flow_rxq_flags_trim(struct rte_eth_dev *dev, struct rte_flow *flow)
2174 {
2175         struct priv *priv = dev->data->dev_private;
2176         const int mark = !!(flow->modifier &
2177                             (MLX5_FLOW_MOD_FLAG | MLX5_FLOW_MOD_MARK));
2178         const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
2179         unsigned int i;
2180
2181         assert(dev->data->dev_started);
2182         for (i = 0; i != flow->rss.queue_num; ++i) {
2183                 int idx = (*flow->queue)[i];
2184                 struct mlx5_rxq_ctrl *rxq_ctrl =
2185                         container_of((*priv->rxqs)[idx],
2186                                      struct mlx5_rxq_ctrl, rxq);
2187
2188                 if (mark) {
2189                         rxq_ctrl->flow_mark_n--;
2190                         rxq_ctrl->rxq.mark = !!rxq_ctrl->flow_mark_n;
2191                 }
2192                 if (tunnel) {
2193                         unsigned int j;
2194
2195                         /* Decrease the counter matching the flow. */
2196                         for (j = 0; j != MLX5_FLOW_TUNNEL; ++j) {
2197                                 if ((tunnels_info[j].tunnel & flow->layers) ==
2198                                     tunnels_info[j].tunnel) {
2199                                         rxq_ctrl->flow_tunnels_n[j]--;
2200                                         break;
2201                                 }
2202                         }
2203                         mlx5_flow_rxq_tunnel_ptype_update(rxq_ctrl);
2204                 }
2205         }
2206 }
2207
2208 /**
2209  * Clear the Mark/Flag and Tunnel ptype information in all Rx queues.
2210  *
2211  * @param dev
2212  *   Pointer to Ethernet device.
2213  */
2214 static void
2215 mlx5_flow_rxq_flags_clear(struct rte_eth_dev *dev)
2216 {
2217         struct priv *priv = dev->data->dev_private;
2218         unsigned int i;
2219         unsigned int idx;
2220
2221         for (idx = 0, i = 0; idx != priv->rxqs_n; ++i) {
2222                 struct mlx5_rxq_ctrl *rxq_ctrl;
2223                 unsigned int j;
2224
2225                 if (!(*priv->rxqs)[idx])
2226                         continue;
2227                 rxq_ctrl = container_of((*priv->rxqs)[idx],
2228                                         struct mlx5_rxq_ctrl, rxq);
2229                 rxq_ctrl->flow_mark_n = 0;
2230                 rxq_ctrl->rxq.mark = 0;
2231                 for (j = 0; j != MLX5_FLOW_TUNNEL; ++j)
2232                         rxq_ctrl->flow_tunnels_n[j] = 0;
2233                 rxq_ctrl->rxq.tunnel = 0;
2234                 ++idx;
2235         }
2236 }
2237
2238 /**
2239  * Validate a flow supported by the NIC.
2240  *
2241  * @see rte_flow_validate()
2242  * @see rte_flow_ops
2243  */
2244 int
2245 mlx5_flow_validate(struct rte_eth_dev *dev,
2246                    const struct rte_flow_attr *attr,
2247                    const struct rte_flow_item items[],
2248                    const struct rte_flow_action actions[],
2249                    struct rte_flow_error *error)
2250 {
2251         int ret = mlx5_flow_merge(dev, NULL, 0, attr, items, actions, error);
2252
2253         if (ret < 0)
2254                 return ret;
2255         return 0;
2256 }
2257
2258 /**
2259  * Remove the flow.
2260  *
2261  * @param[in] dev
2262  *   Pointer to Ethernet device.
2263  * @param[in, out] flow
2264  *   Pointer to flow structure.
2265  */
2266 static void
2267 mlx5_flow_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
2268 {
2269         struct mlx5_flow_verbs *verbs;
2270
2271         LIST_FOREACH(verbs, &flow->verbs, next) {
2272                 if (verbs->flow) {
2273                         claim_zero(mlx5_glue->destroy_flow(verbs->flow));
2274                         verbs->flow = NULL;
2275                 }
2276                 if (verbs->hrxq) {
2277                         if (flow->fate & MLX5_FLOW_FATE_DROP)
2278                                 mlx5_hrxq_drop_release(dev);
2279                         else
2280                                 mlx5_hrxq_release(dev, verbs->hrxq);
2281                         verbs->hrxq = NULL;
2282                 }
2283         }
2284 }
2285
2286 /**
2287  * Apply the flow.
2288  *
2289  * @param[in] dev
2290  *   Pointer to Ethernet device structure.
2291  * @param[in, out] flow
2292  *   Pointer to flow structure.
2293  * @param[out] error
2294  *   Pointer to error structure.
2295  *
2296  * @return
2297  *   0 on success, a negative errno value otherwise and rte_errno is set.
2298  */
2299 static int
2300 mlx5_flow_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
2301                 struct rte_flow_error *error)
2302 {
2303         struct mlx5_flow_verbs *verbs;
2304         int err;
2305
2306         LIST_FOREACH(verbs, &flow->verbs, next) {
2307                 if (flow->fate & MLX5_FLOW_FATE_DROP) {
2308                         verbs->hrxq = mlx5_hrxq_drop_new(dev);
2309                         if (!verbs->hrxq) {
2310                                 rte_flow_error_set
2311                                         (error, errno,
2312                                          RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2313                                          NULL,
2314                                          "cannot get drop hash queue");
2315                                 goto error;
2316                         }
2317                 } else {
2318                         struct mlx5_hrxq *hrxq;
2319
2320                         hrxq = mlx5_hrxq_get(dev, flow->key,
2321                                              MLX5_RSS_HASH_KEY_LEN,
2322                                              verbs->hash_fields,
2323                                              (*flow->queue),
2324                                              flow->rss.queue_num);
2325                         if (!hrxq)
2326                                 hrxq = mlx5_hrxq_new(dev, flow->key,
2327                                                      MLX5_RSS_HASH_KEY_LEN,
2328                                                      verbs->hash_fields,
2329                                                      (*flow->queue),
2330                                                      flow->rss.queue_num);
2331                         if (!hrxq) {
2332                                 rte_flow_error_set
2333                                         (error, rte_errno,
2334                                          RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2335                                          NULL,
2336                                          "cannot get hash queue");
2337                                 goto error;
2338                         }
2339                         verbs->hrxq = hrxq;
2340                 }
2341                 verbs->flow =
2342                         mlx5_glue->create_flow(verbs->hrxq->qp, verbs->attr);
2343                 if (!verbs->flow) {
2344                         rte_flow_error_set(error, errno,
2345                                            RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2346                                            NULL,
2347                                            "hardware refuses to create flow");
2348                         goto error;
2349                 }
2350         }
2351         return 0;
2352 error:
2353         err = rte_errno; /* Save rte_errno before cleanup. */
2354         LIST_FOREACH(verbs, &flow->verbs, next) {
2355                 if (verbs->hrxq) {
2356                         if (flow->fate & MLX5_FLOW_FATE_DROP)
2357                                 mlx5_hrxq_drop_release(dev);
2358                         else
2359                                 mlx5_hrxq_release(dev, verbs->hrxq);
2360                         verbs->hrxq = NULL;
2361                 }
2362         }
2363         rte_errno = err; /* Restore rte_errno. */
2364         return -rte_errno;
2365 }
2366
2367 /**
2368  * Create a flow and add it to @p list.
2369  *
2370  * @param dev
2371  *   Pointer to Ethernet device.
2372  * @param list
2373  *   Pointer to a TAILQ flow list.
2374  * @param[in] attr
2375  *   Flow rule attributes.
2376  * @param[in] items
2377  *   Pattern specification (list terminated by the END pattern item).
2378  * @param[in] actions
2379  *   Associated actions (list terminated by the END action).
2380  * @param[out] error
2381  *   Perform verbose error reporting if not NULL.
2382  *
2383  * @return
2384  *   A flow on success, NULL otherwise and rte_errno is set.
2385  */
2386 static struct rte_flow *
2387 mlx5_flow_list_create(struct rte_eth_dev *dev,
2388                       struct mlx5_flows *list,
2389                       const struct rte_flow_attr *attr,
2390                       const struct rte_flow_item items[],
2391                       const struct rte_flow_action actions[],
2392                       struct rte_flow_error *error)
2393 {
2394         struct rte_flow *flow = NULL;
2395         size_t size = 0;
2396         int ret;
2397
2398         ret = mlx5_flow_merge(dev, flow, size, attr, items, actions, error);
2399         if (ret < 0)
2400                 return NULL;
2401         size = ret;
2402         flow = rte_calloc(__func__, 1, size, 0);
2403         if (!flow) {
2404                 rte_flow_error_set(error, ENOMEM,
2405                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2406                                    NULL,
2407                                    "not enough memory to create flow");
2408                 return NULL;
2409         }
2410         ret = mlx5_flow_merge(dev, flow, size, attr, items, actions, error);
2411         if (ret < 0) {
2412                 rte_free(flow);
2413                 return NULL;
2414         }
2415         assert((size_t)ret == size);
2416         if (dev->data->dev_started) {
2417                 ret = mlx5_flow_apply(dev, flow, error);
2418                 if (ret < 0) {
2419                         ret = rte_errno; /* Save rte_errno before cleanup. */
2420                         if (flow) {
2421                                 mlx5_flow_remove(dev, flow);
2422                                 rte_free(flow);
2423                         }
2424                         rte_errno = ret; /* Restore rte_errno. */
2425                         return NULL;
2426                 }
2427         }
2428         TAILQ_INSERT_TAIL(list, flow, next);
2429         mlx5_flow_rxq_flags_set(dev, flow);
2430         return flow;
2431 }
2432
2433 /**
2434  * Create a flow.
2435  *
2436  * @see rte_flow_create()
2437  * @see rte_flow_ops
2438  */
2439 struct rte_flow *
2440 mlx5_flow_create(struct rte_eth_dev *dev,
2441                  const struct rte_flow_attr *attr,
2442                  const struct rte_flow_item items[],
2443                  const struct rte_flow_action actions[],
2444                  struct rte_flow_error *error)
2445 {
2446         return mlx5_flow_list_create
2447                 (dev, &((struct priv *)dev->data->dev_private)->flows,
2448                  attr, items, actions, error);
2449 }
2450
2451 /**
2452  * Destroy a flow in a list.
2453  *
2454  * @param dev
2455  *   Pointer to Ethernet device.
2456  * @param list
2457  *   Pointer to a TAILQ flow list.
2458  * @param[in] flow
2459  *   Flow to destroy.
2460  */
2461 static void
2462 mlx5_flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list,
2463                        struct rte_flow *flow)
2464 {
2465         mlx5_flow_remove(dev, flow);
2466         TAILQ_REMOVE(list, flow, next);
2467         /*
2468          * Update RX queue flags only if port is started, otherwise it is
2469          * already clean.
2470          */
2471         if (dev->data->dev_started)
2472                 mlx5_flow_rxq_flags_trim(dev, flow);
2473         rte_free(flow);
2474 }
2475
2476 /**
2477  * Destroy all flows.
2478  *
2479  * @param dev
2480  *   Pointer to Ethernet device.
2481  * @param list
2482  *   Pointer to a TAILQ flow list.
2483  */
2484 void
2485 mlx5_flow_list_flush(struct rte_eth_dev *dev, struct mlx5_flows *list)
2486 {
2487         while (!TAILQ_EMPTY(list)) {
2488                 struct rte_flow *flow;
2489
2490                 flow = TAILQ_FIRST(list);
2491                 mlx5_flow_list_destroy(dev, list, flow);
2492         }
2493 }
2494
2495 /**
2496  * Remove all flows.
2497  *
2498  * @param dev
2499  *   Pointer to Ethernet device.
2500  * @param list
2501  *   Pointer to a TAILQ flow list.
2502  */
2503 void
2504 mlx5_flow_stop(struct rte_eth_dev *dev, struct mlx5_flows *list)
2505 {
2506         struct rte_flow *flow;
2507
2508         TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next)
2509                 mlx5_flow_remove(dev, flow);
2510         mlx5_flow_rxq_flags_clear(dev);
2511 }
2512
2513 /**
2514  * Add all flows.
2515  *
2516  * @param dev
2517  *   Pointer to Ethernet device.
2518  * @param list
2519  *   Pointer to a TAILQ flow list.
2520  *
2521  * @return
2522  *   0 on success, a negative errno value otherwise and rte_errno is set.
2523  */
2524 int
2525 mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list)
2526 {
2527         struct rte_flow *flow;
2528         struct rte_flow_error error;
2529         int ret = 0;
2530
2531         TAILQ_FOREACH(flow, list, next) {
2532                 ret = mlx5_flow_apply(dev, flow, &error);
2533                 if (ret < 0)
2534                         goto error;
2535                 mlx5_flow_rxq_flags_set(dev, flow);
2536         }
2537         return 0;
2538 error:
2539         ret = rte_errno; /* Save rte_errno before cleanup. */
2540         mlx5_flow_stop(dev, list);
2541         rte_errno = ret; /* Restore rte_errno. */
2542         return -rte_errno;
2543 }
2544
2545 /**
2546  * Verify the flow list is empty
2547  *
2548  * @param dev
2549  *  Pointer to Ethernet device.
2550  *
2551  * @return the number of flows not released.
2552  */
2553 int
2554 mlx5_flow_verify(struct rte_eth_dev *dev)
2555 {
2556         struct priv *priv = dev->data->dev_private;
2557         struct rte_flow *flow;
2558         int ret = 0;
2559
2560         TAILQ_FOREACH(flow, &priv->flows, next) {
2561                 DRV_LOG(DEBUG, "port %u flow %p still referenced",
2562                         dev->data->port_id, (void *)flow);
2563                 ++ret;
2564         }
2565         return ret;
2566 }
2567
2568 /**
2569  * Enable a control flow configured from the control plane.
2570  *
2571  * @param dev
2572  *   Pointer to Ethernet device.
2573  * @param eth_spec
2574  *   An Ethernet flow spec to apply.
2575  * @param eth_mask
2576  *   An Ethernet flow mask to apply.
2577  * @param vlan_spec
2578  *   A VLAN flow spec to apply.
2579  * @param vlan_mask
2580  *   A VLAN flow mask to apply.
2581  *
2582  * @return
2583  *   0 on success, a negative errno value otherwise and rte_errno is set.
2584  */
2585 int
2586 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
2587                     struct rte_flow_item_eth *eth_spec,
2588                     struct rte_flow_item_eth *eth_mask,
2589                     struct rte_flow_item_vlan *vlan_spec,
2590                     struct rte_flow_item_vlan *vlan_mask)
2591 {
2592         struct priv *priv = dev->data->dev_private;
2593         const struct rte_flow_attr attr = {
2594                 .ingress = 1,
2595                 .priority = MLX5_FLOW_PRIO_RSVD,
2596         };
2597         struct rte_flow_item items[] = {
2598                 {
2599                         .type = RTE_FLOW_ITEM_TYPE_ETH,
2600                         .spec = eth_spec,
2601                         .last = NULL,
2602                         .mask = eth_mask,
2603                 },
2604                 {
2605                         .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
2606                                 RTE_FLOW_ITEM_TYPE_END,
2607                         .spec = vlan_spec,
2608                         .last = NULL,
2609                         .mask = vlan_mask,
2610                 },
2611                 {
2612                         .type = RTE_FLOW_ITEM_TYPE_END,
2613                 },
2614         };
2615         uint16_t queue[priv->reta_idx_n];
2616         struct rte_flow_action_rss action_rss = {
2617                 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
2618                 .level = 0,
2619                 .types = priv->rss_conf.rss_hf,
2620                 .key_len = priv->rss_conf.rss_key_len,
2621                 .queue_num = priv->reta_idx_n,
2622                 .key = priv->rss_conf.rss_key,
2623                 .queue = queue,
2624         };
2625         struct rte_flow_action actions[] = {
2626                 {
2627                         .type = RTE_FLOW_ACTION_TYPE_RSS,
2628                         .conf = &action_rss,
2629                 },
2630                 {
2631                         .type = RTE_FLOW_ACTION_TYPE_END,
2632                 },
2633         };
2634         struct rte_flow *flow;
2635         struct rte_flow_error error;
2636         unsigned int i;
2637
2638         if (!priv->reta_idx_n) {
2639                 rte_errno = EINVAL;
2640                 return -rte_errno;
2641         }
2642         for (i = 0; i != priv->reta_idx_n; ++i)
2643                 queue[i] = (*priv->reta_idx)[i];
2644         flow = mlx5_flow_list_create(dev, &priv->ctrl_flows, &attr, items,
2645                                      actions, &error);
2646         if (!flow)
2647                 return -rte_errno;
2648         return 0;
2649 }
2650
2651 /**
2652  * Enable a flow control configured from the control plane.
2653  *
2654  * @param dev
2655  *   Pointer to Ethernet device.
2656  * @param eth_spec
2657  *   An Ethernet flow spec to apply.
2658  * @param eth_mask
2659  *   An Ethernet flow mask to apply.
2660  *
2661  * @return
2662  *   0 on success, a negative errno value otherwise and rte_errno is set.
2663  */
2664 int
2665 mlx5_ctrl_flow(struct rte_eth_dev *dev,
2666                struct rte_flow_item_eth *eth_spec,
2667                struct rte_flow_item_eth *eth_mask)
2668 {
2669         return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
2670 }
2671
2672 /**
2673  * Destroy a flow.
2674  *
2675  * @see rte_flow_destroy()
2676  * @see rte_flow_ops
2677  */
2678 int
2679 mlx5_flow_destroy(struct rte_eth_dev *dev,
2680                   struct rte_flow *flow,
2681                   struct rte_flow_error *error __rte_unused)
2682 {
2683         struct priv *priv = dev->data->dev_private;
2684
2685         mlx5_flow_list_destroy(dev, &priv->flows, flow);
2686         return 0;
2687 }
2688
2689 /**
2690  * Destroy all flows.
2691  *
2692  * @see rte_flow_flush()
2693  * @see rte_flow_ops
2694  */
2695 int
2696 mlx5_flow_flush(struct rte_eth_dev *dev,
2697                 struct rte_flow_error *error __rte_unused)
2698 {
2699         struct priv *priv = dev->data->dev_private;
2700
2701         mlx5_flow_list_flush(dev, &priv->flows);
2702         return 0;
2703 }
2704
2705 /**
2706  * Isolated mode.
2707  *
2708  * @see rte_flow_isolate()
2709  * @see rte_flow_ops
2710  */
2711 int
2712 mlx5_flow_isolate(struct rte_eth_dev *dev,
2713                   int enable,
2714                   struct rte_flow_error *error)
2715 {
2716         struct priv *priv = dev->data->dev_private;
2717
2718         if (dev->data->dev_started) {
2719                 rte_flow_error_set(error, EBUSY,
2720                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2721                                    NULL,
2722                                    "port must be stopped first");
2723                 return -rte_errno;
2724         }
2725         priv->isolated = !!enable;
2726         if (enable)
2727                 dev->dev_ops = &mlx5_dev_ops_isolate;
2728         else
2729                 dev->dev_ops = &mlx5_dev_ops;
2730         return 0;
2731 }
2732
2733 /**
2734  * Convert a flow director filter to a generic flow.
2735  *
2736  * @param dev
2737  *   Pointer to Ethernet device.
2738  * @param fdir_filter
2739  *   Flow director filter to add.
2740  * @param attributes
2741  *   Generic flow parameters structure.
2742  *
2743  * @return
2744  *   0 on success, a negative errno value otherwise and rte_errno is set.
2745  */
2746 static int
2747 mlx5_fdir_filter_convert(struct rte_eth_dev *dev,
2748                          const struct rte_eth_fdir_filter *fdir_filter,
2749                          struct mlx5_fdir *attributes)
2750 {
2751         struct priv *priv = dev->data->dev_private;
2752         const struct rte_eth_fdir_input *input = &fdir_filter->input;
2753         const struct rte_eth_fdir_masks *mask =
2754                 &dev->data->dev_conf.fdir_conf.mask;
2755
2756         /* Validate queue number. */
2757         if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
2758                 DRV_LOG(ERR, "port %u invalid queue number %d",
2759                         dev->data->port_id, fdir_filter->action.rx_queue);
2760                 rte_errno = EINVAL;
2761                 return -rte_errno;
2762         }
2763         attributes->attr.ingress = 1;
2764         attributes->items[0] = (struct rte_flow_item) {
2765                 .type = RTE_FLOW_ITEM_TYPE_ETH,
2766                 .spec = &attributes->l2,
2767                 .mask = &attributes->l2_mask,
2768         };
2769         switch (fdir_filter->action.behavior) {
2770         case RTE_ETH_FDIR_ACCEPT:
2771                 attributes->actions[0] = (struct rte_flow_action){
2772                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
2773                         .conf = &attributes->queue,
2774                 };
2775                 break;
2776         case RTE_ETH_FDIR_REJECT:
2777                 attributes->actions[0] = (struct rte_flow_action){
2778                         .type = RTE_FLOW_ACTION_TYPE_DROP,
2779                 };
2780                 break;
2781         default:
2782                 DRV_LOG(ERR, "port %u invalid behavior %d",
2783                         dev->data->port_id,
2784                         fdir_filter->action.behavior);
2785                 rte_errno = ENOTSUP;
2786                 return -rte_errno;
2787         }
2788         attributes->queue.index = fdir_filter->action.rx_queue;
2789         /* Handle L3. */
2790         switch (fdir_filter->input.flow_type) {
2791         case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
2792         case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
2793         case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
2794                 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2795                         .src_addr = input->flow.ip4_flow.src_ip,
2796                         .dst_addr = input->flow.ip4_flow.dst_ip,
2797                         .time_to_live = input->flow.ip4_flow.ttl,
2798                         .type_of_service = input->flow.ip4_flow.tos,
2799                         .next_proto_id = input->flow.ip4_flow.proto,
2800                 };
2801                 attributes->l3_mask.ipv4.hdr = (struct ipv4_hdr){
2802                         .src_addr = mask->ipv4_mask.src_ip,
2803                         .dst_addr = mask->ipv4_mask.dst_ip,
2804                         .time_to_live = mask->ipv4_mask.ttl,
2805                         .type_of_service = mask->ipv4_mask.tos,
2806                         .next_proto_id = mask->ipv4_mask.proto,
2807                 };
2808                 attributes->items[1] = (struct rte_flow_item){
2809                         .type = RTE_FLOW_ITEM_TYPE_IPV4,
2810                         .spec = &attributes->l3,
2811                         .mask = &attributes->l3_mask,
2812                 };
2813                 break;
2814         case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
2815         case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
2816         case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
2817                 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2818                         .hop_limits = input->flow.ipv6_flow.hop_limits,
2819                         .proto = input->flow.ipv6_flow.proto,
2820                 };
2821
2822                 memcpy(attributes->l3.ipv6.hdr.src_addr,
2823                        input->flow.ipv6_flow.src_ip,
2824                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2825                 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2826                        input->flow.ipv6_flow.dst_ip,
2827                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2828                 memcpy(attributes->l3_mask.ipv6.hdr.src_addr,
2829                        mask->ipv6_mask.src_ip,
2830                        RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
2831                 memcpy(attributes->l3_mask.ipv6.hdr.dst_addr,
2832                        mask->ipv6_mask.dst_ip,
2833                        RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
2834                 attributes->items[1] = (struct rte_flow_item){
2835                         .type = RTE_FLOW_ITEM_TYPE_IPV6,
2836                         .spec = &attributes->l3,
2837                         .mask = &attributes->l3_mask,
2838                 };
2839                 break;
2840         default:
2841                 DRV_LOG(ERR, "port %u invalid flow type%d",
2842                         dev->data->port_id, fdir_filter->input.flow_type);
2843                 rte_errno = ENOTSUP;
2844                 return -rte_errno;
2845         }
2846         /* Handle L4. */
2847         switch (fdir_filter->input.flow_type) {
2848         case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
2849                 attributes->l4.udp.hdr = (struct udp_hdr){
2850                         .src_port = input->flow.udp4_flow.src_port,
2851                         .dst_port = input->flow.udp4_flow.dst_port,
2852                 };
2853                 attributes->l4_mask.udp.hdr = (struct udp_hdr){
2854                         .src_port = mask->src_port_mask,
2855                         .dst_port = mask->dst_port_mask,
2856                 };
2857                 attributes->items[2] = (struct rte_flow_item){
2858                         .type = RTE_FLOW_ITEM_TYPE_UDP,
2859                         .spec = &attributes->l4,
2860                         .mask = &attributes->l4_mask,
2861                 };
2862                 break;
2863         case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
2864                 attributes->l4.tcp.hdr = (struct tcp_hdr){
2865                         .src_port = input->flow.tcp4_flow.src_port,
2866                         .dst_port = input->flow.tcp4_flow.dst_port,
2867                 };
2868                 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
2869                         .src_port = mask->src_port_mask,
2870                         .dst_port = mask->dst_port_mask,
2871                 };
2872                 attributes->items[2] = (struct rte_flow_item){
2873                         .type = RTE_FLOW_ITEM_TYPE_TCP,
2874                         .spec = &attributes->l4,
2875                         .mask = &attributes->l4_mask,
2876                 };
2877                 break;
2878         case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
2879                 attributes->l4.udp.hdr = (struct udp_hdr){
2880                         .src_port = input->flow.udp6_flow.src_port,
2881                         .dst_port = input->flow.udp6_flow.dst_port,
2882                 };
2883                 attributes->l4_mask.udp.hdr = (struct udp_hdr){
2884                         .src_port = mask->src_port_mask,
2885                         .dst_port = mask->dst_port_mask,
2886                 };
2887                 attributes->items[2] = (struct rte_flow_item){
2888                         .type = RTE_FLOW_ITEM_TYPE_UDP,
2889                         .spec = &attributes->l4,
2890                         .mask = &attributes->l4_mask,
2891                 };
2892                 break;
2893         case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
2894                 attributes->l4.tcp.hdr = (struct tcp_hdr){
2895                         .src_port = input->flow.tcp6_flow.src_port,
2896                         .dst_port = input->flow.tcp6_flow.dst_port,
2897                 };
2898                 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
2899                         .src_port = mask->src_port_mask,
2900                         .dst_port = mask->dst_port_mask,
2901                 };
2902                 attributes->items[2] = (struct rte_flow_item){
2903                         .type = RTE_FLOW_ITEM_TYPE_TCP,
2904                         .spec = &attributes->l4,
2905                         .mask = &attributes->l4_mask,
2906                 };
2907                 break;
2908         case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
2909         case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
2910                 break;
2911         default:
2912                 DRV_LOG(ERR, "port %u invalid flow type%d",
2913                         dev->data->port_id, fdir_filter->input.flow_type);
2914                 rte_errno = ENOTSUP;
2915                 return -rte_errno;
2916         }
2917         return 0;
2918 }
2919
2920 /**
2921  * Add new flow director filter and store it in list.
2922  *
2923  * @param dev
2924  *   Pointer to Ethernet device.
2925  * @param fdir_filter
2926  *   Flow director filter to add.
2927  *
2928  * @return
2929  *   0 on success, a negative errno value otherwise and rte_errno is set.
2930  */
2931 static int
2932 mlx5_fdir_filter_add(struct rte_eth_dev *dev,
2933                      const struct rte_eth_fdir_filter *fdir_filter)
2934 {
2935         struct priv *priv = dev->data->dev_private;
2936         struct mlx5_fdir attributes = {
2937                 .attr.group = 0,
2938                 .l2_mask = {
2939                         .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2940                         .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2941                         .type = 0,
2942                 },
2943         };
2944         struct rte_flow_error error;
2945         struct rte_flow *flow;
2946         int ret;
2947
2948         ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
2949         if (ret)
2950                 return ret;
2951         flow = mlx5_flow_list_create(dev, &priv->flows, &attributes.attr,
2952                                      attributes.items, attributes.actions,
2953                                      &error);
2954         if (flow) {
2955                 DRV_LOG(DEBUG, "port %u FDIR created %p", dev->data->port_id,
2956                         (void *)flow);
2957                 return 0;
2958         }
2959         return -rte_errno;
2960 }
2961
2962 /**
2963  * Delete specific filter.
2964  *
2965  * @param dev
2966  *   Pointer to Ethernet device.
2967  * @param fdir_filter
2968  *   Filter to be deleted.
2969  *
2970  * @return
2971  *   0 on success, a negative errno value otherwise and rte_errno is set.
2972  */
2973 static int
2974 mlx5_fdir_filter_delete(struct rte_eth_dev *dev __rte_unused,
2975                         const struct rte_eth_fdir_filter *fdir_filter
2976                         __rte_unused)
2977 {
2978         rte_errno = ENOTSUP;
2979         return -rte_errno;
2980 }
2981
2982 /**
2983  * Update queue for specific filter.
2984  *
2985  * @param dev
2986  *   Pointer to Ethernet device.
2987  * @param fdir_filter
2988  *   Filter to be updated.
2989  *
2990  * @return
2991  *   0 on success, a negative errno value otherwise and rte_errno is set.
2992  */
2993 static int
2994 mlx5_fdir_filter_update(struct rte_eth_dev *dev,
2995                         const struct rte_eth_fdir_filter *fdir_filter)
2996 {
2997         int ret;
2998
2999         ret = mlx5_fdir_filter_delete(dev, fdir_filter);
3000         if (ret)
3001                 return ret;
3002         return mlx5_fdir_filter_add(dev, fdir_filter);
3003 }
3004
3005 /**
3006  * Flush all filters.
3007  *
3008  * @param dev
3009  *   Pointer to Ethernet device.
3010  */
3011 static void
3012 mlx5_fdir_filter_flush(struct rte_eth_dev *dev)
3013 {
3014         struct priv *priv = dev->data->dev_private;
3015
3016         mlx5_flow_list_flush(dev, &priv->flows);
3017 }
3018
3019 /**
3020  * Get flow director information.
3021  *
3022  * @param dev
3023  *   Pointer to Ethernet device.
3024  * @param[out] fdir_info
3025  *   Resulting flow director information.
3026  */
3027 static void
3028 mlx5_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info)
3029 {
3030         struct rte_eth_fdir_masks *mask =
3031                 &dev->data->dev_conf.fdir_conf.mask;
3032
3033         fdir_info->mode = dev->data->dev_conf.fdir_conf.mode;
3034         fdir_info->guarant_spc = 0;
3035         rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
3036         fdir_info->max_flexpayload = 0;
3037         fdir_info->flow_types_mask[0] = 0;
3038         fdir_info->flex_payload_unit = 0;
3039         fdir_info->max_flex_payload_segment_num = 0;
3040         fdir_info->flex_payload_limit = 0;
3041         memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
3042 }
3043
3044 /**
3045  * Deal with flow director operations.
3046  *
3047  * @param dev
3048  *   Pointer to Ethernet device.
3049  * @param filter_op
3050  *   Operation to perform.
3051  * @param arg
3052  *   Pointer to operation-specific structure.
3053  *
3054  * @return
3055  *   0 on success, a negative errno value otherwise and rte_errno is set.
3056  */
3057 static int
3058 mlx5_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op,
3059                     void *arg)
3060 {
3061         enum rte_fdir_mode fdir_mode =
3062                 dev->data->dev_conf.fdir_conf.mode;
3063
3064         if (filter_op == RTE_ETH_FILTER_NOP)
3065                 return 0;
3066         if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
3067             fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
3068                 DRV_LOG(ERR, "port %u flow director mode %d not supported",
3069                         dev->data->port_id, fdir_mode);
3070                 rte_errno = EINVAL;
3071                 return -rte_errno;
3072         }
3073         switch (filter_op) {
3074         case RTE_ETH_FILTER_ADD:
3075                 return mlx5_fdir_filter_add(dev, arg);
3076         case RTE_ETH_FILTER_UPDATE:
3077                 return mlx5_fdir_filter_update(dev, arg);
3078         case RTE_ETH_FILTER_DELETE:
3079                 return mlx5_fdir_filter_delete(dev, arg);
3080         case RTE_ETH_FILTER_FLUSH:
3081                 mlx5_fdir_filter_flush(dev);
3082                 break;
3083         case RTE_ETH_FILTER_INFO:
3084                 mlx5_fdir_info_get(dev, arg);
3085                 break;
3086         default:
3087                 DRV_LOG(DEBUG, "port %u unknown operation %u",
3088                         dev->data->port_id, filter_op);
3089                 rte_errno = EINVAL;
3090                 return -rte_errno;
3091         }
3092         return 0;
3093 }
3094
3095 /**
3096  * Manage filter operations.
3097  *
3098  * @param dev
3099  *   Pointer to Ethernet device structure.
3100  * @param filter_type
3101  *   Filter type.
3102  * @param filter_op
3103  *   Operation to perform.
3104  * @param arg
3105  *   Pointer to operation-specific structure.
3106  *
3107  * @return
3108  *   0 on success, a negative errno value otherwise and rte_errno is set.
3109  */
3110 int
3111 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
3112                      enum rte_filter_type filter_type,
3113                      enum rte_filter_op filter_op,
3114                      void *arg)
3115 {
3116         switch (filter_type) {
3117         case RTE_ETH_FILTER_GENERIC:
3118                 if (filter_op != RTE_ETH_FILTER_GET) {
3119                         rte_errno = EINVAL;
3120                         return -rte_errno;
3121                 }
3122                 *(const void **)arg = &mlx5_flow_ops;
3123                 return 0;
3124         case RTE_ETH_FILTER_FDIR:
3125                 return mlx5_fdir_ctrl_func(dev, filter_op, arg);
3126         default:
3127                 DRV_LOG(ERR, "port %u filter type (%d) not supported",
3128                         dev->data->port_id, filter_type);
3129                 rte_errno = ENOTSUP;
3130                 return -rte_errno;
3131         }
3132         return 0;
3133 }