eecc513bf357dfb3f63973159d680d28c1e89555
[dpdk.git] / drivers / net / mlx5 / mlx5_flow.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2016 6WIND S.A.
3  * Copyright 2016 Mellanox Technologies, Ltd
4  */
5
6 #include <sys/queue.h>
7 #include <stdint.h>
8 #include <string.h>
9
10 /* Verbs header. */
11 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
12 #ifdef PEDANTIC
13 #pragma GCC diagnostic ignored "-Wpedantic"
14 #endif
15 #include <infiniband/verbs.h>
16 #ifdef PEDANTIC
17 #pragma GCC diagnostic error "-Wpedantic"
18 #endif
19
20 #include <rte_common.h>
21 #include <rte_ether.h>
22 #include <rte_eth_ctrl.h>
23 #include <rte_ethdev_driver.h>
24 #include <rte_flow.h>
25 #include <rte_flow_driver.h>
26 #include <rte_malloc.h>
27 #include <rte_ip.h>
28
29 #include "mlx5.h"
30 #include "mlx5_defs.h"
31 #include "mlx5_prm.h"
32 #include "mlx5_glue.h"
33
34 /* Dev ops structure defined in mlx5.c */
35 extern const struct eth_dev_ops mlx5_dev_ops;
36 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
37
38 /* Pattern outer Layer bits. */
39 #define MLX5_FLOW_LAYER_OUTER_L2 (1u << 0)
40 #define MLX5_FLOW_LAYER_OUTER_L3_IPV4 (1u << 1)
41 #define MLX5_FLOW_LAYER_OUTER_L3_IPV6 (1u << 2)
42 #define MLX5_FLOW_LAYER_OUTER_L4_UDP (1u << 3)
43 #define MLX5_FLOW_LAYER_OUTER_L4_TCP (1u << 4)
44 #define MLX5_FLOW_LAYER_OUTER_VLAN (1u << 5)
45
46 /* Pattern inner Layer bits. */
47 #define MLX5_FLOW_LAYER_INNER_L2 (1u << 6)
48 #define MLX5_FLOW_LAYER_INNER_L3_IPV4 (1u << 7)
49 #define MLX5_FLOW_LAYER_INNER_L3_IPV6 (1u << 8)
50 #define MLX5_FLOW_LAYER_INNER_L4_UDP (1u << 9)
51 #define MLX5_FLOW_LAYER_INNER_L4_TCP (1u << 10)
52 #define MLX5_FLOW_LAYER_INNER_VLAN (1u << 11)
53
54 /* Outer Masks. */
55 #define MLX5_FLOW_LAYER_OUTER_L3 \
56         (MLX5_FLOW_LAYER_OUTER_L3_IPV4 | MLX5_FLOW_LAYER_OUTER_L3_IPV6)
57 #define MLX5_FLOW_LAYER_OUTER_L4 \
58         (MLX5_FLOW_LAYER_OUTER_L4_UDP | MLX5_FLOW_LAYER_OUTER_L4_TCP)
59 #define MLX5_FLOW_LAYER_OUTER \
60         (MLX5_FLOW_LAYER_OUTER_L2 | MLX5_FLOW_LAYER_OUTER_L3 | \
61          MLX5_FLOW_LAYER_OUTER_L4)
62
63 /* Tunnel Masks. */
64 #define MLX5_FLOW_LAYER_TUNNEL 0
65
66 /* Inner Masks. */
67 #define MLX5_FLOW_LAYER_INNER_L3 \
68         (MLX5_FLOW_LAYER_INNER_L3_IPV4 | MLX5_FLOW_LAYER_INNER_L3_IPV6)
69 #define MLX5_FLOW_LAYER_INNER_L4 \
70         (MLX5_FLOW_LAYER_INNER_L4_UDP | MLX5_FLOW_LAYER_INNER_L4_TCP)
71 #define MLX5_FLOW_LAYER_INNER \
72         (MLX5_FLOW_LAYER_INNER_L2 | MLX5_FLOW_LAYER_INNER_L3 | \
73          MLX5_FLOW_LAYER_INNER_L4)
74
75 /* Actions that modify the fate of matching traffic. */
76 #define MLX5_FLOW_FATE_DROP (1u << 0)
77 #define MLX5_FLOW_FATE_QUEUE (1u << 1)
78 #define MLX5_FLOW_FATE_RSS (1u << 2)
79
80 /* Modify a packet. */
81 #define MLX5_FLOW_MOD_FLAG (1u << 0)
82 #define MLX5_FLOW_MOD_MARK (1u << 1)
83
84 /* possible L3 layers protocols filtering. */
85 #define MLX5_IP_PROTOCOL_TCP 6
86 #define MLX5_IP_PROTOCOL_UDP 17
87
88 /* Priority reserved for default flows. */
89 #define MLX5_FLOW_PRIO_RSVD ((uint32_t)-1)
90
91 enum mlx5_expansion {
92         MLX5_EXPANSION_ROOT,
93         MLX5_EXPANSION_ROOT_OUTER,
94         MLX5_EXPANSION_OUTER_ETH,
95         MLX5_EXPANSION_OUTER_IPV4,
96         MLX5_EXPANSION_OUTER_IPV4_UDP,
97         MLX5_EXPANSION_OUTER_IPV4_TCP,
98         MLX5_EXPANSION_OUTER_IPV6,
99         MLX5_EXPANSION_OUTER_IPV6_UDP,
100         MLX5_EXPANSION_OUTER_IPV6_TCP,
101         MLX5_EXPANSION_ETH,
102         MLX5_EXPANSION_IPV4,
103         MLX5_EXPANSION_IPV4_UDP,
104         MLX5_EXPANSION_IPV4_TCP,
105         MLX5_EXPANSION_IPV6,
106         MLX5_EXPANSION_IPV6_UDP,
107         MLX5_EXPANSION_IPV6_TCP,
108 };
109
110 /** Supported expansion of items. */
111 static const struct rte_flow_expand_node mlx5_support_expansion[] = {
112         [MLX5_EXPANSION_ROOT] = {
113                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
114                                                  MLX5_EXPANSION_IPV4,
115                                                  MLX5_EXPANSION_IPV6),
116                 .type = RTE_FLOW_ITEM_TYPE_END,
117         },
118         [MLX5_EXPANSION_ROOT_OUTER] = {
119                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_ETH,
120                                                  MLX5_EXPANSION_OUTER_IPV4,
121                                                  MLX5_EXPANSION_OUTER_IPV6),
122                 .type = RTE_FLOW_ITEM_TYPE_END,
123         },
124         [MLX5_EXPANSION_OUTER_ETH] = {
125                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_IPV4,
126                                                  MLX5_EXPANSION_OUTER_IPV6),
127                 .type = RTE_FLOW_ITEM_TYPE_ETH,
128                 .rss_types = 0,
129         },
130         [MLX5_EXPANSION_OUTER_IPV4] = {
131                 .next = RTE_FLOW_EXPAND_RSS_NEXT
132                         (MLX5_EXPANSION_OUTER_IPV4_UDP,
133                          MLX5_EXPANSION_OUTER_IPV4_TCP),
134                 .type = RTE_FLOW_ITEM_TYPE_IPV4,
135                 .rss_types = ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
136                         ETH_RSS_NONFRAG_IPV4_OTHER,
137         },
138         [MLX5_EXPANSION_OUTER_IPV4_UDP] = {
139                 .type = RTE_FLOW_ITEM_TYPE_UDP,
140                 .rss_types = ETH_RSS_NONFRAG_IPV4_UDP,
141         },
142         [MLX5_EXPANSION_OUTER_IPV4_TCP] = {
143                 .type = RTE_FLOW_ITEM_TYPE_TCP,
144                 .rss_types = ETH_RSS_NONFRAG_IPV4_TCP,
145         },
146         [MLX5_EXPANSION_OUTER_IPV6] = {
147                 .next = RTE_FLOW_EXPAND_RSS_NEXT
148                         (MLX5_EXPANSION_OUTER_IPV6_UDP,
149                          MLX5_EXPANSION_OUTER_IPV6_TCP),
150                 .type = RTE_FLOW_ITEM_TYPE_IPV6,
151                 .rss_types = ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 |
152                         ETH_RSS_NONFRAG_IPV6_OTHER,
153         },
154         [MLX5_EXPANSION_OUTER_IPV6_UDP] = {
155                 .type = RTE_FLOW_ITEM_TYPE_UDP,
156                 .rss_types = ETH_RSS_NONFRAG_IPV6_UDP,
157         },
158         [MLX5_EXPANSION_OUTER_IPV6_TCP] = {
159                 .type = RTE_FLOW_ITEM_TYPE_TCP,
160                 .rss_types = ETH_RSS_NONFRAG_IPV6_TCP,
161         },
162         [MLX5_EXPANSION_ETH] = {
163                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
164                                                  MLX5_EXPANSION_IPV6),
165                 .type = RTE_FLOW_ITEM_TYPE_ETH,
166         },
167         [MLX5_EXPANSION_IPV4] = {
168                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4_UDP,
169                                                  MLX5_EXPANSION_IPV4_TCP),
170                 .type = RTE_FLOW_ITEM_TYPE_IPV4,
171                 .rss_types = ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
172                         ETH_RSS_NONFRAG_IPV4_OTHER,
173         },
174         [MLX5_EXPANSION_IPV4_UDP] = {
175                 .type = RTE_FLOW_ITEM_TYPE_UDP,
176                 .rss_types = ETH_RSS_NONFRAG_IPV4_UDP,
177         },
178         [MLX5_EXPANSION_IPV4_TCP] = {
179                 .type = RTE_FLOW_ITEM_TYPE_TCP,
180                 .rss_types = ETH_RSS_NONFRAG_IPV4_TCP,
181         },
182         [MLX5_EXPANSION_IPV6] = {
183                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV6_UDP,
184                                                  MLX5_EXPANSION_IPV6_TCP),
185                 .type = RTE_FLOW_ITEM_TYPE_IPV6,
186                 .rss_types = ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 |
187                         ETH_RSS_NONFRAG_IPV6_OTHER,
188         },
189         [MLX5_EXPANSION_IPV6_UDP] = {
190                 .type = RTE_FLOW_ITEM_TYPE_UDP,
191                 .rss_types = ETH_RSS_NONFRAG_IPV6_UDP,
192         },
193         [MLX5_EXPANSION_IPV6_TCP] = {
194                 .type = RTE_FLOW_ITEM_TYPE_TCP,
195                 .rss_types = ETH_RSS_NONFRAG_IPV6_TCP,
196         },
197 };
198
199 /** Handles information leading to a drop fate. */
200 struct mlx5_flow_verbs {
201         LIST_ENTRY(mlx5_flow_verbs) next;
202         unsigned int size; /**< Size of the attribute. */
203         struct {
204                 struct ibv_flow_attr *attr;
205                 /**< Pointer to the Specification buffer. */
206                 uint8_t *specs; /**< Pointer to the specifications. */
207         };
208         struct ibv_flow *flow; /**< Verbs flow pointer. */
209         struct mlx5_hrxq *hrxq; /**< Hash Rx queue object. */
210         uint64_t hash_fields; /**< Verbs hash Rx queue hash fields. */
211 };
212
213 /* Flow structure. */
214 struct rte_flow {
215         TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
216         struct rte_flow_attr attributes; /**< User flow attribute. */
217         uint32_t l3_protocol_en:1; /**< Protocol filtering requested. */
218         uint32_t layers;
219         /**< Bit-fields of present layers see MLX5_FLOW_LAYER_*. */
220         uint32_t modifier;
221         /**< Bit-fields of present modifier see MLX5_FLOW_MOD_*. */
222         uint32_t fate;
223         /**< Bit-fields of present fate see MLX5_FLOW_FATE_*. */
224         uint8_t l3_protocol; /**< valid when l3_protocol_en is set. */
225         LIST_HEAD(verbs, mlx5_flow_verbs) verbs; /**< Verbs flows list. */
226         struct mlx5_flow_verbs *cur_verbs;
227         /**< Current Verbs flow structure being filled. */
228         struct rte_flow_action_rss rss;/**< RSS context. */
229         uint8_t key[MLX5_RSS_HASH_KEY_LEN]; /**< RSS hash key. */
230         uint16_t (*queue)[]; /**< Destination queues to redirect traffic to. */
231 };
232
233 static const struct rte_flow_ops mlx5_flow_ops = {
234         .validate = mlx5_flow_validate,
235         .create = mlx5_flow_create,
236         .destroy = mlx5_flow_destroy,
237         .flush = mlx5_flow_flush,
238         .isolate = mlx5_flow_isolate,
239 };
240
241 /* Convert FDIR request to Generic flow. */
242 struct mlx5_fdir {
243         struct rte_flow_attr attr;
244         struct rte_flow_action actions[2];
245         struct rte_flow_item items[4];
246         struct rte_flow_item_eth l2;
247         struct rte_flow_item_eth l2_mask;
248         union {
249                 struct rte_flow_item_ipv4 ipv4;
250                 struct rte_flow_item_ipv6 ipv6;
251         } l3;
252         union {
253                 struct rte_flow_item_ipv4 ipv4;
254                 struct rte_flow_item_ipv6 ipv6;
255         } l3_mask;
256         union {
257                 struct rte_flow_item_udp udp;
258                 struct rte_flow_item_tcp tcp;
259         } l4;
260         union {
261                 struct rte_flow_item_udp udp;
262                 struct rte_flow_item_tcp tcp;
263         } l4_mask;
264         struct rte_flow_action_queue queue;
265 };
266
267 /* Verbs specification header. */
268 struct ibv_spec_header {
269         enum ibv_flow_spec_type type;
270         uint16_t size;
271 };
272
273 /*
274  * Number of sub priorities.
275  * For each kind of pattern matching i.e. L2, L3, L4 to have a correct
276  * matching on the NIC (firmware dependent) L4 most have the higher priority
277  * followed by L3 and ending with L2.
278  */
279 #define MLX5_PRIORITY_MAP_L2 2
280 #define MLX5_PRIORITY_MAP_L3 1
281 #define MLX5_PRIORITY_MAP_L4 0
282 #define MLX5_PRIORITY_MAP_MAX 3
283
284 /* Map of Verbs to Flow priority with 8 Verbs priorities. */
285 static const uint32_t priority_map_3[][MLX5_PRIORITY_MAP_MAX] = {
286         { 0, 1, 2 }, { 2, 3, 4 }, { 5, 6, 7 },
287 };
288
289 /* Map of Verbs to Flow priority with 16 Verbs priorities. */
290 static const uint32_t priority_map_5[][MLX5_PRIORITY_MAP_MAX] = {
291         { 0, 1, 2 }, { 3, 4, 5 }, { 6, 7, 8 },
292         { 9, 10, 11 }, { 12, 13, 14 },
293 };
294
295 /**
296  * Discover the maximum number of priority available.
297  *
298  * @param[in] dev
299  *   Pointer to Ethernet device.
300  *
301  * @return
302  *   number of supported flow priority on success, a negative errno
303  *   value otherwise and rte_errno is set.
304  */
305 int
306 mlx5_flow_discover_priorities(struct rte_eth_dev *dev)
307 {
308         struct {
309                 struct ibv_flow_attr attr;
310                 struct ibv_flow_spec_eth eth;
311                 struct ibv_flow_spec_action_drop drop;
312         } flow_attr = {
313                 .attr = {
314                         .num_of_specs = 2,
315                 },
316                 .eth = {
317                         .type = IBV_FLOW_SPEC_ETH,
318                         .size = sizeof(struct ibv_flow_spec_eth),
319                 },
320                 .drop = {
321                         .size = sizeof(struct ibv_flow_spec_action_drop),
322                         .type = IBV_FLOW_SPEC_ACTION_DROP,
323                 },
324         };
325         struct ibv_flow *flow;
326         struct mlx5_hrxq *drop = mlx5_hrxq_drop_new(dev);
327         uint16_t vprio[] = { 8, 16 };
328         int i;
329         int priority = 0;
330
331         if (!drop) {
332                 rte_errno = ENOTSUP;
333                 return -rte_errno;
334         }
335         for (i = 0; i != RTE_DIM(vprio); i++) {
336                 flow_attr.attr.priority = vprio[i] - 1;
337                 flow = mlx5_glue->create_flow(drop->qp, &flow_attr.attr);
338                 if (!flow)
339                         break;
340                 claim_zero(mlx5_glue->destroy_flow(flow));
341                 priority = vprio[i];
342         }
343         switch (priority) {
344         case 8:
345                 priority = RTE_DIM(priority_map_3);
346                 break;
347         case 16:
348                 priority = RTE_DIM(priority_map_5);
349                 break;
350         default:
351                 rte_errno = ENOTSUP;
352                 DRV_LOG(ERR,
353                         "port %u verbs maximum priority: %d expected 8/16",
354                         dev->data->port_id, vprio[i]);
355                 return -rte_errno;
356         }
357         mlx5_hrxq_drop_release(dev);
358         DRV_LOG(INFO, "port %u flow maximum priority: %d",
359                 dev->data->port_id, priority);
360         return priority;
361 }
362
363 /**
364  * Adjust flow priority.
365  *
366  * @param dev
367  *   Pointer to Ethernet device.
368  * @param flow
369  *   Pointer to an rte flow.
370  */
371 static void
372 mlx5_flow_adjust_priority(struct rte_eth_dev *dev, struct rte_flow *flow)
373 {
374         struct priv *priv = dev->data->dev_private;
375         uint32_t priority = flow->attributes.priority;
376         uint32_t subpriority = flow->cur_verbs->attr->priority;
377
378         switch (priv->config.flow_prio) {
379         case RTE_DIM(priority_map_3):
380                 priority = priority_map_3[priority][subpriority];
381                 break;
382         case RTE_DIM(priority_map_5):
383                 priority = priority_map_5[priority][subpriority];
384                 break;
385         }
386         flow->cur_verbs->attr->priority = priority;
387 }
388
389 /**
390  * Verify the @p attributes will be correctly understood by the NIC and store
391  * them in the @p flow if everything is correct.
392  *
393  * @param[in] dev
394  *   Pointer to Ethernet device.
395  * @param[in] attributes
396  *   Pointer to flow attributes
397  * @param[in, out] flow
398  *   Pointer to the rte_flow structure.
399  * @param[out] error
400  *   Pointer to error structure.
401  *
402  * @return
403  *   0 on success, a negative errno value otherwise and rte_errno is set.
404  */
405 static int
406 mlx5_flow_attributes(struct rte_eth_dev *dev,
407                      const struct rte_flow_attr *attributes,
408                      struct rte_flow *flow,
409                      struct rte_flow_error *error)
410 {
411         uint32_t priority_max =
412                 ((struct priv *)dev->data->dev_private)->config.flow_prio - 1;
413
414         if (attributes->group)
415                 return rte_flow_error_set(error, ENOTSUP,
416                                           RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
417                                           NULL,
418                                           "groups is not supported");
419         if (attributes->priority != MLX5_FLOW_PRIO_RSVD &&
420             attributes->priority >= priority_max)
421                 return rte_flow_error_set(error, ENOTSUP,
422                                           RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
423                                           NULL,
424                                           "priority out of range");
425         if (attributes->egress)
426                 return rte_flow_error_set(error, ENOTSUP,
427                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
428                                           NULL,
429                                           "egress is not supported");
430         if (attributes->transfer)
431                 return rte_flow_error_set(error, ENOTSUP,
432                                           RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
433                                           NULL,
434                                           "transfer is not supported");
435         if (!attributes->ingress)
436                 return rte_flow_error_set(error, ENOTSUP,
437                                           RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
438                                           NULL,
439                                           "ingress attribute is mandatory");
440         flow->attributes = *attributes;
441         if (attributes->priority == MLX5_FLOW_PRIO_RSVD)
442                 flow->attributes.priority = priority_max;
443         return 0;
444 }
445
446 /**
447  * Verify the @p item specifications (spec, last, mask) are compatible with the
448  * NIC capabilities.
449  *
450  * @param[in] item
451  *   Item specification.
452  * @param[in] mask
453  *   @p item->mask or flow default bit-masks.
454  * @param[in] nic_mask
455  *   Bit-masks covering supported fields by the NIC to compare with user mask.
456  * @param[in] size
457  *   Bit-masks size in bytes.
458  * @param[out] error
459  *   Pointer to error structure.
460  *
461  * @return
462  *   0 on success, a negative errno value otherwise and rte_errno is set.
463  */
464 static int
465 mlx5_flow_item_acceptable(const struct rte_flow_item *item,
466                           const uint8_t *mask,
467                           const uint8_t *nic_mask,
468                           unsigned int size,
469                           struct rte_flow_error *error)
470 {
471         unsigned int i;
472
473         assert(nic_mask);
474         for (i = 0; i < size; ++i)
475                 if ((nic_mask[i] | mask[i]) != nic_mask[i])
476                         return rte_flow_error_set(error, ENOTSUP,
477                                                   RTE_FLOW_ERROR_TYPE_ITEM,
478                                                   item,
479                                                   "mask enables non supported"
480                                                   " bits");
481         if (!item->spec && (item->mask || item->last))
482                 return rte_flow_error_set(error, EINVAL,
483                                           RTE_FLOW_ERROR_TYPE_ITEM,
484                                           item,
485                                           "mask/last without a spec is not"
486                                           " supported");
487         if (item->spec && item->last) {
488                 uint8_t spec[size];
489                 uint8_t last[size];
490                 unsigned int i;
491                 int ret;
492
493                 for (i = 0; i < size; ++i) {
494                         spec[i] = ((const uint8_t *)item->spec)[i] & mask[i];
495                         last[i] = ((const uint8_t *)item->last)[i] & mask[i];
496                 }
497                 ret = memcmp(spec, last, size);
498                 if (ret != 0)
499                         return rte_flow_error_set(error, ENOTSUP,
500                                                   RTE_FLOW_ERROR_TYPE_ITEM,
501                                                   item,
502                                                   "range is not supported");
503         }
504         return 0;
505 }
506
507 /**
508  * Add a verbs item specification into @p flow.
509  *
510  * @param[in, out] flow
511  *   Pointer to flow structure.
512  * @param[in] src
513  *   Create specification.
514  * @param[in] size
515  *   Size in bytes of the specification to copy.
516  */
517 static void
518 mlx5_flow_spec_verbs_add(struct rte_flow *flow, void *src, unsigned int size)
519 {
520         struct mlx5_flow_verbs *verbs = flow->cur_verbs;
521
522         if (verbs->specs) {
523                 void *dst;
524
525                 dst = (void *)(verbs->specs + verbs->size);
526                 memcpy(dst, src, size);
527                 ++verbs->attr->num_of_specs;
528         }
529         verbs->size += size;
530 }
531
532 /**
533  * Adjust verbs hash fields according to the @p flow information.
534  *
535  * @param[in, out] flow.
536  *   Pointer to flow structure.
537  * @param[in] tunnel
538  *   1 when the hash field is for a tunnel item.
539  * @param[in] layer_types
540  *   ETH_RSS_* types.
541  * @param[in] hash_fields
542  *   Item hash fields.
543  */
544 static void
545 mlx5_flow_verbs_hashfields_adjust(struct rte_flow *flow,
546                                   int tunnel __rte_unused,
547                                   uint32_t layer_types, uint64_t hash_fields)
548 {
549 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
550         hash_fields |= (tunnel ? IBV_RX_HASH_INNER : 0);
551         if (flow->rss.level == 2 && !tunnel)
552                 hash_fields = 0;
553         else if (flow->rss.level < 2 && tunnel)
554                 hash_fields = 0;
555 #endif
556         if (!(flow->rss.types & layer_types))
557                 hash_fields = 0;
558         flow->cur_verbs->hash_fields |= hash_fields;
559 }
560
561 /**
562  * Convert the @p item into a Verbs specification after ensuring the NIC
563  * will understand and process it correctly.
564  * If the necessary size for the conversion is greater than the @p flow_size,
565  * nothing is written in @p flow, the validation is still performed.
566  *
567  * @param[in] item
568  *   Item specification.
569  * @param[in, out] flow
570  *   Pointer to flow structure.
571  * @param[in] flow_size
572  *   Size in bytes of the available space in @p flow, if too small, nothing is
573  *   written.
574  * @param[out] error
575  *   Pointer to error structure.
576  *
577  * @return
578  *   On success the number of bytes consumed/necessary, if the returned value
579  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
580  *   otherwise another call with this returned memory size should be done.
581  *   On error, a negative errno value is returned and rte_errno is set.
582  */
583 static int
584 mlx5_flow_item_eth(const struct rte_flow_item *item, struct rte_flow *flow,
585                    const size_t flow_size, struct rte_flow_error *error)
586 {
587         const struct rte_flow_item_eth *spec = item->spec;
588         const struct rte_flow_item_eth *mask = item->mask;
589         const struct rte_flow_item_eth nic_mask = {
590                 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
591                 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
592                 .type = RTE_BE16(0xffff),
593         };
594         const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
595         const unsigned int size = sizeof(struct ibv_flow_spec_eth);
596         struct ibv_flow_spec_eth eth = {
597                 .type = IBV_FLOW_SPEC_ETH | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
598                 .size = size,
599         };
600         int ret;
601
602         if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
603                             MLX5_FLOW_LAYER_OUTER_L2))
604                 return rte_flow_error_set(error, ENOTSUP,
605                                           RTE_FLOW_ERROR_TYPE_ITEM,
606                                           item,
607                                           "L2 layers already configured");
608         if (!mask)
609                 mask = &rte_flow_item_eth_mask;
610         ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
611                                         (const uint8_t *)&nic_mask,
612                                         sizeof(struct rte_flow_item_eth),
613                                         error);
614         if (ret)
615                 return ret;
616         flow->layers |= tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
617                 MLX5_FLOW_LAYER_OUTER_L2;
618         if (size > flow_size)
619                 return size;
620         if (spec) {
621                 unsigned int i;
622
623                 memcpy(&eth.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
624                 memcpy(&eth.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
625                 eth.val.ether_type = spec->type;
626                 memcpy(&eth.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
627                 memcpy(&eth.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
628                 eth.mask.ether_type = mask->type;
629                 /* Remove unwanted bits from values. */
630                 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
631                         eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
632                         eth.val.src_mac[i] &= eth.mask.src_mac[i];
633                 }
634                 eth.val.ether_type &= eth.mask.ether_type;
635         }
636         flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2;
637         mlx5_flow_spec_verbs_add(flow, &eth, size);
638         return size;
639 }
640
641 /**
642  * Update the VLAN tag in the Verbs Ethernet specification.
643  *
644  * @param[in, out] attr
645  *   Pointer to Verbs attributes structure.
646  * @param[in] eth
647  *   Verbs structure containing the VLAN information to copy.
648  */
649 static void
650 mlx5_flow_item_vlan_update(struct ibv_flow_attr *attr,
651                            struct ibv_flow_spec_eth *eth)
652 {
653         unsigned int i;
654         const enum ibv_flow_spec_type search = eth->type;
655         struct ibv_spec_header *hdr = (struct ibv_spec_header *)
656                 ((uint8_t *)attr + sizeof(struct ibv_flow_attr));
657
658         for (i = 0; i != attr->num_of_specs; ++i) {
659                 if (hdr->type == search) {
660                         struct ibv_flow_spec_eth *e =
661                                 (struct ibv_flow_spec_eth *)hdr;
662
663                         e->val.vlan_tag = eth->val.vlan_tag;
664                         e->mask.vlan_tag = eth->mask.vlan_tag;
665                         e->val.ether_type = eth->val.ether_type;
666                         e->mask.ether_type = eth->mask.ether_type;
667                         break;
668                 }
669                 hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size);
670         }
671 }
672
673 /**
674  * Convert the @p item into @p flow (or by updating the already present
675  * Ethernet Verbs) specification after ensuring the NIC will understand and
676  * process it correctly.
677  * If the necessary size for the conversion is greater than the @p flow_size,
678  * nothing is written in @p flow, the validation is still performed.
679  *
680  * @param[in] item
681  *   Item specification.
682  * @param[in, out] flow
683  *   Pointer to flow structure.
684  * @param[in] flow_size
685  *   Size in bytes of the available space in @p flow, if too small, nothing is
686  *   written.
687  * @param[out] error
688  *   Pointer to error structure.
689  *
690  * @return
691  *   On success the number of bytes consumed/necessary, if the returned value
692  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
693  *   otherwise another call with this returned memory size should be done.
694  *   On error, a negative errno value is returned and rte_errno is set.
695  */
696 static int
697 mlx5_flow_item_vlan(const struct rte_flow_item *item, struct rte_flow *flow,
698                     const size_t flow_size, struct rte_flow_error *error)
699 {
700         const struct rte_flow_item_vlan *spec = item->spec;
701         const struct rte_flow_item_vlan *mask = item->mask;
702         const struct rte_flow_item_vlan nic_mask = {
703                 .tci = RTE_BE16(0x0fff),
704                 .inner_type = RTE_BE16(0xffff),
705         };
706         unsigned int size = sizeof(struct ibv_flow_spec_eth);
707         const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
708         struct ibv_flow_spec_eth eth = {
709                 .type = IBV_FLOW_SPEC_ETH | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
710                 .size = size,
711         };
712         int ret;
713         const uint32_t l34m = tunnel ? (MLX5_FLOW_LAYER_INNER_L3 |
714                                         MLX5_FLOW_LAYER_INNER_L4) :
715                 (MLX5_FLOW_LAYER_OUTER_L3 | MLX5_FLOW_LAYER_OUTER_L4);
716         const uint32_t vlanm = tunnel ? MLX5_FLOW_LAYER_INNER_VLAN :
717                 MLX5_FLOW_LAYER_OUTER_VLAN;
718         const uint32_t l2m = tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
719                 MLX5_FLOW_LAYER_OUTER_L2;
720
721         if (flow->layers & vlanm)
722                 return rte_flow_error_set(error, ENOTSUP,
723                                           RTE_FLOW_ERROR_TYPE_ITEM,
724                                           item,
725                                           "VLAN layer already configured");
726         else if ((flow->layers & l34m) != 0)
727                 return rte_flow_error_set(error, ENOTSUP,
728                                           RTE_FLOW_ERROR_TYPE_ITEM,
729                                           item,
730                                           "L2 layer cannot follow L3/L4 layer");
731         if (!mask)
732                 mask = &rte_flow_item_vlan_mask;
733         ret = mlx5_flow_item_acceptable
734                 (item, (const uint8_t *)mask,
735                  (const uint8_t *)&nic_mask,
736                  sizeof(struct rte_flow_item_vlan), error);
737         if (ret)
738                 return ret;
739         if (spec) {
740                 eth.val.vlan_tag = spec->tci;
741                 eth.mask.vlan_tag = mask->tci;
742                 eth.val.vlan_tag &= eth.mask.vlan_tag;
743                 eth.val.ether_type = spec->inner_type;
744                 eth.mask.ether_type = mask->inner_type;
745                 eth.val.ether_type &= eth.mask.ether_type;
746         }
747         /*
748          * From verbs perspective an empty VLAN is equivalent
749          * to a packet without VLAN layer.
750          */
751         if (!eth.mask.vlan_tag)
752                 return rte_flow_error_set(error, EINVAL,
753                                           RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
754                                           item->spec,
755                                           "VLAN cannot be empty");
756         if (!(flow->layers & l2m)) {
757                 if (size <= flow_size) {
758                         flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2;
759                         mlx5_flow_spec_verbs_add(flow, &eth, size);
760                 }
761         } else {
762                 if (flow->cur_verbs)
763                         mlx5_flow_item_vlan_update(flow->cur_verbs->attr,
764                                                    &eth);
765                 size = 0; /* Only an update is done in eth specification. */
766         }
767         flow->layers |= tunnel ?
768                 (MLX5_FLOW_LAYER_INNER_L2 | MLX5_FLOW_LAYER_INNER_VLAN) :
769                 (MLX5_FLOW_LAYER_OUTER_L2 | MLX5_FLOW_LAYER_OUTER_VLAN);
770         return size;
771 }
772
773 /**
774  * Convert the @p item into a Verbs specification after ensuring the NIC
775  * will understand and process it correctly.
776  * If the necessary size for the conversion is greater than the @p flow_size,
777  * nothing is written in @p flow, the validation is still performed.
778  *
779  * @param[in] item
780  *   Item specification.
781  * @param[in, out] flow
782  *   Pointer to flow structure.
783  * @param[in] flow_size
784  *   Size in bytes of the available space in @p flow, if too small, nothing is
785  *   written.
786  * @param[out] error
787  *   Pointer to error structure.
788  *
789  * @return
790  *   On success the number of bytes consumed/necessary, if the returned value
791  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
792  *   otherwise another call with this returned memory size should be done.
793  *   On error, a negative errno value is returned and rte_errno is set.
794  */
795 static int
796 mlx5_flow_item_ipv4(const struct rte_flow_item *item, struct rte_flow *flow,
797                     const size_t flow_size, struct rte_flow_error *error)
798 {
799         const struct rte_flow_item_ipv4 *spec = item->spec;
800         const struct rte_flow_item_ipv4 *mask = item->mask;
801         const struct rte_flow_item_ipv4 nic_mask = {
802                 .hdr = {
803                         .src_addr = RTE_BE32(0xffffffff),
804                         .dst_addr = RTE_BE32(0xffffffff),
805                         .type_of_service = 0xff,
806                         .next_proto_id = 0xff,
807                 },
808         };
809         const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
810         unsigned int size = sizeof(struct ibv_flow_spec_ipv4_ext);
811         struct ibv_flow_spec_ipv4_ext ipv4 = {
812                 .type = IBV_FLOW_SPEC_IPV4_EXT |
813                         (tunnel ? IBV_FLOW_SPEC_INNER : 0),
814                 .size = size,
815         };
816         int ret;
817
818         if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
819                             MLX5_FLOW_LAYER_OUTER_L3))
820                 return rte_flow_error_set(error, ENOTSUP,
821                                           RTE_FLOW_ERROR_TYPE_ITEM,
822                                           item,
823                                           "multiple L3 layers not supported");
824         else if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
825                                  MLX5_FLOW_LAYER_OUTER_L4))
826                 return rte_flow_error_set(error, ENOTSUP,
827                                           RTE_FLOW_ERROR_TYPE_ITEM,
828                                           item,
829                                           "L3 cannot follow an L4 layer.");
830         if (!mask)
831                 mask = &rte_flow_item_ipv4_mask;
832         ret = mlx5_flow_item_acceptable
833                 (item, (const uint8_t *)mask,
834                  (const uint8_t *)&nic_mask,
835                  sizeof(struct rte_flow_item_ipv4), error);
836         if (ret < 0)
837                 return ret;
838         flow->layers |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
839                 MLX5_FLOW_LAYER_OUTER_L3_IPV4;
840         if (spec) {
841                 ipv4.val = (struct ibv_flow_ipv4_ext_filter){
842                         .src_ip = spec->hdr.src_addr,
843                         .dst_ip = spec->hdr.dst_addr,
844                         .proto = spec->hdr.next_proto_id,
845                         .tos = spec->hdr.type_of_service,
846                 };
847                 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
848                         .src_ip = mask->hdr.src_addr,
849                         .dst_ip = mask->hdr.dst_addr,
850                         .proto = mask->hdr.next_proto_id,
851                         .tos = mask->hdr.type_of_service,
852                 };
853                 /* Remove unwanted bits from values. */
854                 ipv4.val.src_ip &= ipv4.mask.src_ip;
855                 ipv4.val.dst_ip &= ipv4.mask.dst_ip;
856                 ipv4.val.proto &= ipv4.mask.proto;
857                 ipv4.val.tos &= ipv4.mask.tos;
858         }
859         flow->l3_protocol_en = !!ipv4.mask.proto;
860         flow->l3_protocol = ipv4.val.proto;
861         if (size <= flow_size) {
862                 mlx5_flow_verbs_hashfields_adjust
863                         (flow, tunnel,
864                          (ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
865                           ETH_RSS_NONFRAG_IPV4_OTHER),
866                          (IBV_RX_HASH_SRC_IPV4 | IBV_RX_HASH_DST_IPV4));
867                 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L3;
868                 mlx5_flow_spec_verbs_add(flow, &ipv4, size);
869         }
870         return size;
871 }
872
873 /**
874  * Convert the @p item into a Verbs specification after ensuring the NIC
875  * will understand and process it correctly.
876  * If the necessary size for the conversion is greater than the @p flow_size,
877  * nothing is written in @p flow, the validation is still performed.
878  *
879  * @param[in] item
880  *   Item specification.
881  * @param[in, out] flow
882  *   Pointer to flow structure.
883  * @param[in] flow_size
884  *   Size in bytes of the available space in @p flow, if too small, nothing is
885  *   written.
886  * @param[out] error
887  *   Pointer to error structure.
888  *
889  * @return
890  *   On success the number of bytes consumed/necessary, if the returned value
891  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
892  *   otherwise another call with this returned memory size should be done.
893  *   On error, a negative errno value is returned and rte_errno is set.
894  */
895 static int
896 mlx5_flow_item_ipv6(const struct rte_flow_item *item, struct rte_flow *flow,
897                     const size_t flow_size, struct rte_flow_error *error)
898 {
899         const struct rte_flow_item_ipv6 *spec = item->spec;
900         const struct rte_flow_item_ipv6 *mask = item->mask;
901         const struct rte_flow_item_ipv6 nic_mask = {
902                 .hdr = {
903                         .src_addr =
904                                 "\xff\xff\xff\xff\xff\xff\xff\xff"
905                                 "\xff\xff\xff\xff\xff\xff\xff\xff",
906                         .dst_addr =
907                                 "\xff\xff\xff\xff\xff\xff\xff\xff"
908                                 "\xff\xff\xff\xff\xff\xff\xff\xff",
909                         .vtc_flow = RTE_BE32(0xffffffff),
910                         .proto = 0xff,
911                         .hop_limits = 0xff,
912                 },
913         };
914         const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
915         unsigned int size = sizeof(struct ibv_flow_spec_ipv6);
916         struct ibv_flow_spec_ipv6 ipv6 = {
917                 .type = IBV_FLOW_SPEC_IPV6 | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
918                 .size = size,
919         };
920         int ret;
921
922         if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
923                             MLX5_FLOW_LAYER_OUTER_L3))
924                 return rte_flow_error_set(error, ENOTSUP,
925                                           RTE_FLOW_ERROR_TYPE_ITEM,
926                                           item,
927                                           "multiple L3 layers not supported");
928         else if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
929                                  MLX5_FLOW_LAYER_OUTER_L4))
930                 return rte_flow_error_set(error, ENOTSUP,
931                                           RTE_FLOW_ERROR_TYPE_ITEM,
932                                           item,
933                                           "L3 cannot follow an L4 layer.");
934         if (!mask)
935                 mask = &rte_flow_item_ipv6_mask;
936         ret = mlx5_flow_item_acceptable
937                 (item, (const uint8_t *)mask,
938                  (const uint8_t *)&nic_mask,
939                  sizeof(struct rte_flow_item_ipv6), error);
940         if (ret < 0)
941                 return ret;
942         flow->layers |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
943                 MLX5_FLOW_LAYER_OUTER_L3_IPV6;
944         if (spec) {
945                 unsigned int i;
946                 uint32_t vtc_flow_val;
947                 uint32_t vtc_flow_mask;
948
949                 memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
950                        RTE_DIM(ipv6.val.src_ip));
951                 memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
952                        RTE_DIM(ipv6.val.dst_ip));
953                 memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
954                        RTE_DIM(ipv6.mask.src_ip));
955                 memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
956                        RTE_DIM(ipv6.mask.dst_ip));
957                 vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
958                 vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
959                 ipv6.val.flow_label =
960                         rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >>
961                                          IPV6_HDR_FL_SHIFT);
962                 ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >>
963                                          IPV6_HDR_TC_SHIFT;
964                 ipv6.val.next_hdr = spec->hdr.proto;
965                 ipv6.val.hop_limit = spec->hdr.hop_limits;
966                 ipv6.mask.flow_label =
967                         rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >>
968                                          IPV6_HDR_FL_SHIFT);
969                 ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >>
970                                           IPV6_HDR_TC_SHIFT;
971                 ipv6.mask.next_hdr = mask->hdr.proto;
972                 ipv6.mask.hop_limit = mask->hdr.hop_limits;
973                 /* Remove unwanted bits from values. */
974                 for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
975                         ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
976                         ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
977                 }
978                 ipv6.val.flow_label &= ipv6.mask.flow_label;
979                 ipv6.val.traffic_class &= ipv6.mask.traffic_class;
980                 ipv6.val.next_hdr &= ipv6.mask.next_hdr;
981                 ipv6.val.hop_limit &= ipv6.mask.hop_limit;
982         }
983         flow->l3_protocol_en = !!ipv6.mask.next_hdr;
984         flow->l3_protocol = ipv6.val.next_hdr;
985         if (size <= flow_size) {
986                 mlx5_flow_verbs_hashfields_adjust
987                         (flow, tunnel,
988                          (ETH_RSS_IPV6 | ETH_RSS_NONFRAG_IPV6_OTHER),
989                          (IBV_RX_HASH_SRC_IPV6 | IBV_RX_HASH_DST_IPV6));
990                 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L3;
991                 mlx5_flow_spec_verbs_add(flow, &ipv6, size);
992         }
993         return size;
994 }
995
996 /**
997  * Convert the @p item into a Verbs specification after ensuring the NIC
998  * will understand and process it correctly.
999  * If the necessary size for the conversion is greater than the @p flow_size,
1000  * nothing is written in @p flow, the validation is still performed.
1001  *
1002  * @param[in] item
1003  *   Item specification.
1004  * @param[in, out] flow
1005  *   Pointer to flow structure.
1006  * @param[in] flow_size
1007  *   Size in bytes of the available space in @p flow, if too small, nothing is
1008  *   written.
1009  * @param[out] error
1010  *   Pointer to error structure.
1011  *
1012  * @return
1013  *   On success the number of bytes consumed/necessary, if the returned value
1014  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
1015  *   otherwise another call with this returned memory size should be done.
1016  *   On error, a negative errno value is returned and rte_errno is set.
1017  */
1018 static int
1019 mlx5_flow_item_udp(const struct rte_flow_item *item, struct rte_flow *flow,
1020                    const size_t flow_size, struct rte_flow_error *error)
1021 {
1022         const struct rte_flow_item_udp *spec = item->spec;
1023         const struct rte_flow_item_udp *mask = item->mask;
1024         const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
1025         unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
1026         struct ibv_flow_spec_tcp_udp udp = {
1027                 .type = IBV_FLOW_SPEC_UDP | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
1028                 .size = size,
1029         };
1030         int ret;
1031
1032         if (flow->l3_protocol_en && flow->l3_protocol != MLX5_IP_PROTOCOL_UDP)
1033                 return rte_flow_error_set(error, ENOTSUP,
1034                                           RTE_FLOW_ERROR_TYPE_ITEM,
1035                                           item,
1036                                           "protocol filtering not compatible"
1037                                           " with UDP layer");
1038         if (!(flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
1039                               MLX5_FLOW_LAYER_OUTER_L3)))
1040                 return rte_flow_error_set(error, ENOTSUP,
1041                                           RTE_FLOW_ERROR_TYPE_ITEM,
1042                                           item,
1043                                           "L3 is mandatory to filter"
1044                                           " on L4");
1045         if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
1046                             MLX5_FLOW_LAYER_OUTER_L4))
1047                 return rte_flow_error_set(error, ENOTSUP,
1048                                           RTE_FLOW_ERROR_TYPE_ITEM,
1049                                           item,
1050                                           "L4 layer is already"
1051                                           " present");
1052         if (!mask)
1053                 mask = &rte_flow_item_udp_mask;
1054         ret = mlx5_flow_item_acceptable
1055                 (item, (const uint8_t *)mask,
1056                  (const uint8_t *)&rte_flow_item_udp_mask,
1057                  sizeof(struct rte_flow_item_udp), error);
1058         if (ret < 0)
1059                 return ret;
1060         flow->layers |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP :
1061                 MLX5_FLOW_LAYER_OUTER_L4_UDP;
1062         if (spec) {
1063                 udp.val.dst_port = spec->hdr.dst_port;
1064                 udp.val.src_port = spec->hdr.src_port;
1065                 udp.mask.dst_port = mask->hdr.dst_port;
1066                 udp.mask.src_port = mask->hdr.src_port;
1067                 /* Remove unwanted bits from values. */
1068                 udp.val.src_port &= udp.mask.src_port;
1069                 udp.val.dst_port &= udp.mask.dst_port;
1070         }
1071         if (size <= flow_size) {
1072                 mlx5_flow_verbs_hashfields_adjust(flow, tunnel, ETH_RSS_UDP,
1073                                                   (IBV_RX_HASH_SRC_PORT_UDP |
1074                                                    IBV_RX_HASH_DST_PORT_UDP));
1075                 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L4;
1076                 mlx5_flow_spec_verbs_add(flow, &udp, size);
1077         }
1078         return size;
1079 }
1080
1081 /**
1082  * Convert the @p item into a Verbs specification after ensuring the NIC
1083  * will understand and process it correctly.
1084  * If the necessary size for the conversion is greater than the @p flow_size,
1085  * nothing is written in @p flow, the validation is still performed.
1086  *
1087  * @param[in] item
1088  *   Item specification.
1089  * @param[in, out] flow
1090  *   Pointer to flow structure.
1091  * @param[in] flow_size
1092  *   Size in bytes of the available space in @p flow, if too small, nothing is
1093  *   written.
1094  * @param[out] error
1095  *   Pointer to error structure.
1096  *
1097  * @return
1098  *   On success the number of bytes consumed/necessary, if the returned value
1099  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
1100  *   otherwise another call with this returned memory size should be done.
1101  *   On error, a negative errno value is returned and rte_errno is set.
1102  */
1103 static int
1104 mlx5_flow_item_tcp(const struct rte_flow_item *item, struct rte_flow *flow,
1105                    const size_t flow_size, struct rte_flow_error *error)
1106 {
1107         const struct rte_flow_item_tcp *spec = item->spec;
1108         const struct rte_flow_item_tcp *mask = item->mask;
1109         const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
1110         unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
1111         struct ibv_flow_spec_tcp_udp tcp = {
1112                 .type = IBV_FLOW_SPEC_TCP | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
1113                 .size = size,
1114         };
1115         int ret;
1116
1117         if (flow->l3_protocol_en && flow->l3_protocol != MLX5_IP_PROTOCOL_TCP)
1118                 return rte_flow_error_set(error, ENOTSUP,
1119                                           RTE_FLOW_ERROR_TYPE_ITEM,
1120                                           item,
1121                                           "protocol filtering not compatible"
1122                                           " with TCP layer");
1123         if (!(flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
1124                               MLX5_FLOW_LAYER_OUTER_L3)))
1125                 return rte_flow_error_set(error, ENOTSUP,
1126                                           RTE_FLOW_ERROR_TYPE_ITEM,
1127                                           item,
1128                                           "L3 is mandatory to filter on L4");
1129         if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
1130                             MLX5_FLOW_LAYER_OUTER_L4))
1131                 return rte_flow_error_set(error, ENOTSUP,
1132                                           RTE_FLOW_ERROR_TYPE_ITEM,
1133                                           item,
1134                                           "L4 layer is already present");
1135         if (!mask)
1136                 mask = &rte_flow_item_tcp_mask;
1137         ret = mlx5_flow_item_acceptable
1138                 (item, (const uint8_t *)mask,
1139                  (const uint8_t *)&rte_flow_item_tcp_mask,
1140                  sizeof(struct rte_flow_item_tcp), error);
1141         if (ret < 0)
1142                 return ret;
1143         flow->layers |=  tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP :
1144                 MLX5_FLOW_LAYER_OUTER_L4_TCP;
1145         if (spec) {
1146                 tcp.val.dst_port = spec->hdr.dst_port;
1147                 tcp.val.src_port = spec->hdr.src_port;
1148                 tcp.mask.dst_port = mask->hdr.dst_port;
1149                 tcp.mask.src_port = mask->hdr.src_port;
1150                 /* Remove unwanted bits from values. */
1151                 tcp.val.src_port &= tcp.mask.src_port;
1152                 tcp.val.dst_port &= tcp.mask.dst_port;
1153         }
1154         if (size <= flow_size) {
1155                 mlx5_flow_verbs_hashfields_adjust(flow, tunnel, ETH_RSS_TCP,
1156                                                   (IBV_RX_HASH_SRC_PORT_TCP |
1157                                                    IBV_RX_HASH_DST_PORT_TCP));
1158                 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L4;
1159                 mlx5_flow_spec_verbs_add(flow, &tcp, size);
1160         }
1161         return size;
1162 }
1163
1164 /**
1165  * Convert the @p pattern into a Verbs specifications after ensuring the NIC
1166  * will understand and process it correctly.
1167  * The conversion is performed item per item, each of them is written into
1168  * the @p flow if its size is lesser or equal to @p flow_size.
1169  * Validation and memory consumption computation are still performed until the
1170  * end of @p pattern, unless an error is encountered.
1171  *
1172  * @param[in] pattern
1173  *   Flow pattern.
1174  * @param[in, out] flow
1175  *   Pointer to the rte_flow structure.
1176  * @param[in] flow_size
1177  *   Size in bytes of the available space in @p flow, if too small some
1178  *   garbage may be present.
1179  * @param[out] error
1180  *   Pointer to error structure.
1181  *
1182  * @return
1183  *   On success the number of bytes consumed/necessary, if the returned value
1184  *   is lesser or equal to @p flow_size, the @pattern  has fully been
1185  *   converted, otherwise another call with this returned memory size should
1186  *   be done.
1187  *   On error, a negative errno value is returned and rte_errno is set.
1188  */
1189 static int
1190 mlx5_flow_items(const struct rte_flow_item pattern[],
1191                 struct rte_flow *flow, const size_t flow_size,
1192                 struct rte_flow_error *error)
1193 {
1194         int remain = flow_size;
1195         size_t size = 0;
1196
1197         for (; pattern->type != RTE_FLOW_ITEM_TYPE_END; pattern++) {
1198                 int ret = 0;
1199
1200                 switch (pattern->type) {
1201                 case RTE_FLOW_ITEM_TYPE_VOID:
1202                         break;
1203                 case RTE_FLOW_ITEM_TYPE_ETH:
1204                         ret = mlx5_flow_item_eth(pattern, flow, remain, error);
1205                         break;
1206                 case RTE_FLOW_ITEM_TYPE_VLAN:
1207                         ret = mlx5_flow_item_vlan(pattern, flow, remain, error);
1208                         break;
1209                 case RTE_FLOW_ITEM_TYPE_IPV4:
1210                         ret = mlx5_flow_item_ipv4(pattern, flow, remain, error);
1211                         break;
1212                 case RTE_FLOW_ITEM_TYPE_IPV6:
1213                         ret = mlx5_flow_item_ipv6(pattern, flow, remain, error);
1214                         break;
1215                 case RTE_FLOW_ITEM_TYPE_UDP:
1216                         ret = mlx5_flow_item_udp(pattern, flow, remain, error);
1217                         break;
1218                 case RTE_FLOW_ITEM_TYPE_TCP:
1219                         ret = mlx5_flow_item_tcp(pattern, flow, remain, error);
1220                         break;
1221                 default:
1222                         return rte_flow_error_set(error, ENOTSUP,
1223                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1224                                                   pattern,
1225                                                   "item not supported");
1226                 }
1227                 if (ret < 0)
1228                         return ret;
1229                 if (remain > ret)
1230                         remain -= ret;
1231                 else
1232                         remain = 0;
1233                 size += ret;
1234         }
1235         if (!flow->layers) {
1236                 const struct rte_flow_item item = {
1237                         .type = RTE_FLOW_ITEM_TYPE_ETH,
1238                 };
1239
1240                 return mlx5_flow_item_eth(&item, flow, flow_size, error);
1241         }
1242         return size;
1243 }
1244
1245 /**
1246  * Convert the @p action into a Verbs specification after ensuring the NIC
1247  * will understand and process it correctly.
1248  * If the necessary size for the conversion is greater than the @p flow_size,
1249  * nothing is written in @p flow, the validation is still performed.
1250  *
1251  * @param[in] action
1252  *   Action configuration.
1253  * @param[in, out] flow
1254  *   Pointer to flow structure.
1255  * @param[in] flow_size
1256  *   Size in bytes of the available space in @p flow, if too small, nothing is
1257  *   written.
1258  * @param[out] error
1259  *   Pointer to error structure.
1260  *
1261  * @return
1262  *   On success the number of bytes consumed/necessary, if the returned value
1263  *   is lesser or equal to @p flow_size, the @p action has fully been
1264  *   converted, otherwise another call with this returned memory size should
1265  *   be done.
1266  *   On error, a negative errno value is returned and rte_errno is set.
1267  */
1268 static int
1269 mlx5_flow_action_drop(const struct rte_flow_action *action,
1270                       struct rte_flow *flow, const size_t flow_size,
1271                       struct rte_flow_error *error)
1272 {
1273         unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
1274         struct ibv_flow_spec_action_drop drop = {
1275                         .type = IBV_FLOW_SPEC_ACTION_DROP,
1276                         .size = size,
1277         };
1278
1279         if (flow->fate)
1280                 return rte_flow_error_set(error, ENOTSUP,
1281                                           RTE_FLOW_ERROR_TYPE_ACTION,
1282                                           action,
1283                                           "multiple fate actions are not"
1284                                           " supported");
1285         if (flow->modifier & (MLX5_FLOW_MOD_FLAG | MLX5_FLOW_MOD_MARK))
1286                 return rte_flow_error_set(error, ENOTSUP,
1287                                           RTE_FLOW_ERROR_TYPE_ACTION,
1288                                           action,
1289                                           "drop is not compatible with"
1290                                           " flag/mark action");
1291         if (size < flow_size)
1292                 mlx5_flow_spec_verbs_add(flow, &drop, size);
1293         flow->fate |= MLX5_FLOW_FATE_DROP;
1294         return size;
1295 }
1296
1297 /**
1298  * Convert the @p action into @p flow after ensuring the NIC will understand
1299  * and process it correctly.
1300  *
1301  * @param[in] dev
1302  *   Pointer to Ethernet device structure.
1303  * @param[in] action
1304  *   Action configuration.
1305  * @param[in, out] flow
1306  *   Pointer to flow structure.
1307  * @param[out] error
1308  *   Pointer to error structure.
1309  *
1310  * @return
1311  *   0 on success, a negative errno value otherwise and rte_errno is set.
1312  */
1313 static int
1314 mlx5_flow_action_queue(struct rte_eth_dev *dev,
1315                        const struct rte_flow_action *action,
1316                        struct rte_flow *flow,
1317                        struct rte_flow_error *error)
1318 {
1319         struct priv *priv = dev->data->dev_private;
1320         const struct rte_flow_action_queue *queue = action->conf;
1321
1322         if (flow->fate)
1323                 return rte_flow_error_set(error, ENOTSUP,
1324                                           RTE_FLOW_ERROR_TYPE_ACTION,
1325                                           action,
1326                                           "multiple fate actions are not"
1327                                           " supported");
1328         if (queue->index >= priv->rxqs_n)
1329                 return rte_flow_error_set(error, EINVAL,
1330                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1331                                           &queue->index,
1332                                           "queue index out of range");
1333         if (!(*priv->rxqs)[queue->index])
1334                 return rte_flow_error_set(error, EINVAL,
1335                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1336                                           &queue->index,
1337                                           "queue is not configured");
1338         if (flow->queue)
1339                 (*flow->queue)[0] = queue->index;
1340         flow->rss.queue_num = 1;
1341         flow->fate |= MLX5_FLOW_FATE_QUEUE;
1342         return 0;
1343 }
1344
1345 /**
1346  * Ensure the @p action will be understood and used correctly by the  NIC.
1347  *
1348  * @param dev
1349  *   Pointer to Ethernet device structure.
1350  * @param action[in]
1351  *   Pointer to flow actions array.
1352  * @param flow[in, out]
1353  *   Pointer to the rte_flow structure.
1354  * @param error[in, out]
1355  *   Pointer to error structure.
1356  *
1357  * @return
1358  *   On success @p flow->queue array and @p flow->rss are filled and valid.
1359  *   On error, a negative errno value is returned and rte_errno is set.
1360  */
1361 static int
1362 mlx5_flow_action_rss(struct rte_eth_dev *dev,
1363                      const struct rte_flow_action *action,
1364                      struct rte_flow *flow,
1365                      struct rte_flow_error *error)
1366 {
1367         struct priv *priv = dev->data->dev_private;
1368         const struct rte_flow_action_rss *rss = action->conf;
1369         unsigned int i;
1370
1371         if (flow->fate)
1372                 return rte_flow_error_set(error, ENOTSUP,
1373                                           RTE_FLOW_ERROR_TYPE_ACTION,
1374                                           action,
1375                                           "multiple fate actions are not"
1376                                           " supported");
1377         if (rss->func != RTE_ETH_HASH_FUNCTION_DEFAULT &&
1378             rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ)
1379                 return rte_flow_error_set(error, ENOTSUP,
1380                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1381                                           &rss->func,
1382                                           "RSS hash function not supported");
1383 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
1384         if (rss->level > 2)
1385 #else
1386         if (rss->level > 1)
1387 #endif
1388                 return rte_flow_error_set(error, ENOTSUP,
1389                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1390                                           &rss->level,
1391                                           "tunnel RSS is not supported");
1392         if (rss->key_len < MLX5_RSS_HASH_KEY_LEN)
1393                 return rte_flow_error_set(error, ENOTSUP,
1394                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1395                                           &rss->key_len,
1396                                           "RSS hash key too small");
1397         if (rss->key_len > MLX5_RSS_HASH_KEY_LEN)
1398                 return rte_flow_error_set(error, ENOTSUP,
1399                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1400                                           &rss->key_len,
1401                                           "RSS hash key too large");
1402         if (rss->queue_num > priv->config.ind_table_max_size)
1403                 return rte_flow_error_set(error, ENOTSUP,
1404                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1405                                           &rss->queue_num,
1406                                           "number of queues too large");
1407         if (rss->types & MLX5_RSS_HF_MASK)
1408                 return rte_flow_error_set(error, ENOTSUP,
1409                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1410                                           &rss->types,
1411                                           "some RSS protocols are not"
1412                                           " supported");
1413         for (i = 0; i != rss->queue_num; ++i) {
1414                 if (!(*priv->rxqs)[rss->queue[i]])
1415                         return rte_flow_error_set
1416                                 (error, EINVAL,
1417                                  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1418                                  &rss->queue[i],
1419                                  "queue is not configured");
1420         }
1421         if (flow->queue)
1422                 memcpy((*flow->queue), rss->queue,
1423                        rss->queue_num * sizeof(uint16_t));
1424         flow->rss.queue_num = rss->queue_num;
1425         memcpy(flow->key, rss->key, MLX5_RSS_HASH_KEY_LEN);
1426         flow->rss.types = rss->types;
1427         flow->rss.level = rss->level;
1428         flow->fate |= MLX5_FLOW_FATE_RSS;
1429         return 0;
1430 }
1431
1432 /**
1433  * Convert the @p action into a Verbs specification after ensuring the NIC
1434  * will understand and process it correctly.
1435  * If the necessary size for the conversion is greater than the @p flow_size,
1436  * nothing is written in @p flow, the validation is still performed.
1437  *
1438  * @param[in] action
1439  *   Action configuration.
1440  * @param[in, out] flow
1441  *   Pointer to flow structure.
1442  * @param[in] flow_size
1443  *   Size in bytes of the available space in @p flow, if too small, nothing is
1444  *   written.
1445  * @param[out] error
1446  *   Pointer to error structure.
1447  *
1448  * @return
1449  *   On success the number of bytes consumed/necessary, if the returned value
1450  *   is lesser or equal to @p flow_size, the @p action has fully been
1451  *   converted, otherwise another call with this returned memory size should
1452  *   be done.
1453  *   On error, a negative errno value is returned and rte_errno is set.
1454  */
1455 static int
1456 mlx5_flow_action_flag(const struct rte_flow_action *action,
1457                       struct rte_flow *flow, const size_t flow_size,
1458                       struct rte_flow_error *error)
1459 {
1460         unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1461         struct ibv_flow_spec_action_tag tag = {
1462                 .type = IBV_FLOW_SPEC_ACTION_TAG,
1463                 .size = size,
1464                 .tag_id = mlx5_flow_mark_set(MLX5_FLOW_MARK_DEFAULT),
1465         };
1466         struct mlx5_flow_verbs *verbs = flow->cur_verbs;
1467
1468         if (flow->modifier & MLX5_FLOW_MOD_FLAG)
1469                 return rte_flow_error_set(error, ENOTSUP,
1470                                           RTE_FLOW_ERROR_TYPE_ACTION,
1471                                           action,
1472                                           "flag action already present");
1473         if (flow->fate & MLX5_FLOW_FATE_DROP)
1474                 return rte_flow_error_set(error, ENOTSUP,
1475                                           RTE_FLOW_ERROR_TYPE_ACTION,
1476                                           action,
1477                                           "flag is not compatible with drop"
1478                                           " action");
1479         if (flow->modifier & MLX5_FLOW_MOD_MARK)
1480                 size = 0;
1481         else if (size <= flow_size && verbs)
1482                 mlx5_flow_spec_verbs_add(flow, &tag, size);
1483         flow->modifier |= MLX5_FLOW_MOD_FLAG;
1484         return size;
1485 }
1486
1487 /**
1488  * Update verbs specification to modify the flag to mark.
1489  *
1490  * @param[in, out] verbs
1491  *   Pointer to the mlx5_flow_verbs structure.
1492  * @param[in] mark_id
1493  *   Mark identifier to replace the flag.
1494  */
1495 static void
1496 mlx5_flow_verbs_mark_update(struct mlx5_flow_verbs *verbs, uint32_t mark_id)
1497 {
1498         struct ibv_spec_header *hdr;
1499         int i;
1500
1501         if (!verbs)
1502                 return;
1503         /* Update Verbs specification. */
1504         hdr = (struct ibv_spec_header *)verbs->specs;
1505         if (!hdr)
1506                 return;
1507         for (i = 0; i != verbs->attr->num_of_specs; ++i) {
1508                 if (hdr->type == IBV_FLOW_SPEC_ACTION_TAG) {
1509                         struct ibv_flow_spec_action_tag *t =
1510                                 (struct ibv_flow_spec_action_tag *)hdr;
1511
1512                         t->tag_id = mlx5_flow_mark_set(mark_id);
1513                 }
1514                 hdr = (struct ibv_spec_header *)((uintptr_t)hdr + hdr->size);
1515         }
1516 }
1517
1518 /**
1519  * Convert the @p action into @p flow (or by updating the already present
1520  * Flag Verbs specification) after ensuring the NIC will understand and
1521  * process it correctly.
1522  * If the necessary size for the conversion is greater than the @p flow_size,
1523  * nothing is written in @p flow, the validation is still performed.
1524  *
1525  * @param[in] action
1526  *   Action configuration.
1527  * @param[in, out] flow
1528  *   Pointer to flow structure.
1529  * @param[in] flow_size
1530  *   Size in bytes of the available space in @p flow, if too small, nothing is
1531  *   written.
1532  * @param[out] error
1533  *   Pointer to error structure.
1534  *
1535  * @return
1536  *   On success the number of bytes consumed/necessary, if the returned value
1537  *   is lesser or equal to @p flow_size, the @p action has fully been
1538  *   converted, otherwise another call with this returned memory size should
1539  *   be done.
1540  *   On error, a negative errno value is returned and rte_errno is set.
1541  */
1542 static int
1543 mlx5_flow_action_mark(const struct rte_flow_action *action,
1544                       struct rte_flow *flow, const size_t flow_size,
1545                       struct rte_flow_error *error)
1546 {
1547         const struct rte_flow_action_mark *mark = action->conf;
1548         unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1549         struct ibv_flow_spec_action_tag tag = {
1550                 .type = IBV_FLOW_SPEC_ACTION_TAG,
1551                 .size = size,
1552         };
1553         struct mlx5_flow_verbs *verbs = flow->cur_verbs;
1554
1555         if (!mark)
1556                 return rte_flow_error_set(error, EINVAL,
1557                                           RTE_FLOW_ERROR_TYPE_ACTION,
1558                                           action,
1559                                           "configuration cannot be null");
1560         if (mark->id >= MLX5_FLOW_MARK_MAX)
1561                 return rte_flow_error_set(error, EINVAL,
1562                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1563                                           &mark->id,
1564                                           "mark id must in 0 <= id < "
1565                                           RTE_STR(MLX5_FLOW_MARK_MAX));
1566         if (flow->modifier & MLX5_FLOW_MOD_MARK)
1567                 return rte_flow_error_set(error, ENOTSUP,
1568                                           RTE_FLOW_ERROR_TYPE_ACTION,
1569                                           action,
1570                                           "mark action already present");
1571         if (flow->fate & MLX5_FLOW_FATE_DROP)
1572                 return rte_flow_error_set(error, ENOTSUP,
1573                                           RTE_FLOW_ERROR_TYPE_ACTION,
1574                                           action,
1575                                           "mark is not compatible with drop"
1576                                           " action");
1577         if (flow->modifier & MLX5_FLOW_MOD_FLAG) {
1578                 mlx5_flow_verbs_mark_update(verbs, mark->id);
1579                 size = 0;
1580         } else if (size <= flow_size) {
1581                 tag.tag_id = mlx5_flow_mark_set(mark->id);
1582                 mlx5_flow_spec_verbs_add(flow, &tag, size);
1583         }
1584         flow->modifier |= MLX5_FLOW_MOD_MARK;
1585         return size;
1586 }
1587
1588 /**
1589  * Convert the @p action into @p flow after ensuring the NIC will understand
1590  * and process it correctly.
1591  * The conversion is performed action per action, each of them is written into
1592  * the @p flow if its size is lesser or equal to @p flow_size.
1593  * Validation and memory consumption computation are still performed until the
1594  * end of @p action, unless an error is encountered.
1595  *
1596  * @param[in] dev
1597  *   Pointer to Ethernet device structure.
1598  * @param[in] actions
1599  *   Pointer to flow actions array.
1600  * @param[in, out] flow
1601  *   Pointer to the rte_flow structure.
1602  * @param[in] flow_size
1603  *   Size in bytes of the available space in @p flow, if too small some
1604  *   garbage may be present.
1605  * @param[out] error
1606  *   Pointer to error structure.
1607  *
1608  * @return
1609  *   On success the number of bytes consumed/necessary, if the returned value
1610  *   is lesser or equal to @p flow_size, the @p actions has fully been
1611  *   converted, otherwise another call with this returned memory size should
1612  *   be done.
1613  *   On error, a negative errno value is returned and rte_errno is set.
1614  */
1615 static int
1616 mlx5_flow_actions(struct rte_eth_dev *dev,
1617                   const struct rte_flow_action actions[],
1618                   struct rte_flow *flow, const size_t flow_size,
1619                   struct rte_flow_error *error)
1620 {
1621         size_t size = 0;
1622         int remain = flow_size;
1623         int ret = 0;
1624
1625         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1626                 switch (actions->type) {
1627                 case RTE_FLOW_ACTION_TYPE_VOID:
1628                         break;
1629                 case RTE_FLOW_ACTION_TYPE_FLAG:
1630                         ret = mlx5_flow_action_flag(actions, flow, remain,
1631                                                     error);
1632                         break;
1633                 case RTE_FLOW_ACTION_TYPE_MARK:
1634                         ret = mlx5_flow_action_mark(actions, flow, remain,
1635                                                     error);
1636                         break;
1637                 case RTE_FLOW_ACTION_TYPE_DROP:
1638                         ret = mlx5_flow_action_drop(actions, flow, remain,
1639                                                     error);
1640                         break;
1641                 case RTE_FLOW_ACTION_TYPE_QUEUE:
1642                         ret = mlx5_flow_action_queue(dev, actions, flow, error);
1643                         break;
1644                 case RTE_FLOW_ACTION_TYPE_RSS:
1645                         ret = mlx5_flow_action_rss(dev, actions, flow, error);
1646                         break;
1647                 default:
1648                         return rte_flow_error_set(error, ENOTSUP,
1649                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1650                                                   actions,
1651                                                   "action not supported");
1652                 }
1653                 if (ret < 0)
1654                         return ret;
1655                 if (remain > ret)
1656                         remain -= ret;
1657                 else
1658                         remain = 0;
1659                 size += ret;
1660         }
1661         if (!flow->fate)
1662                 return rte_flow_error_set(error, ENOTSUP,
1663                                           RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1664                                           NULL,
1665                                           "no fate action found");
1666         return size;
1667 }
1668
1669 /**
1670  * Convert the @p attributes, @p pattern, @p action, into an flow for the NIC
1671  * after ensuring the NIC will understand and process it correctly.
1672  * The conversion is only performed item/action per item/action, each of
1673  * them is written into the @p flow if its size is lesser or equal to @p
1674  * flow_size.
1675  * Validation and memory consumption computation are still performed until the
1676  * end, unless an error is encountered.
1677  *
1678  * @param[in] dev
1679  *   Pointer to Ethernet device.
1680  * @param[in, out] flow
1681  *   Pointer to flow structure.
1682  * @param[in] flow_size
1683  *   Size in bytes of the available space in @p flow, if too small some
1684  *   garbage may be present.
1685  * @param[in] attributes
1686  *   Flow rule attributes.
1687  * @param[in] pattern
1688  *   Pattern specification (list terminated by the END pattern item).
1689  * @param[in] actions
1690  *   Associated actions (list terminated by the END action).
1691  * @param[out] error
1692  *   Perform verbose error reporting if not NULL.
1693  *
1694  * @return
1695  *   On success the number of bytes consumed/necessary, if the returned value
1696  *   is lesser or equal to @p flow_size, the flow has fully been converted and
1697  *   can be applied, otherwise another call with this returned memory size
1698  *   should be done.
1699  *   On error, a negative errno value is returned and rte_errno is set.
1700  */
1701 static int
1702 mlx5_flow_merge(struct rte_eth_dev *dev, struct rte_flow *flow,
1703                 const size_t flow_size,
1704                 const struct rte_flow_attr *attributes,
1705                 const struct rte_flow_item pattern[],
1706                 const struct rte_flow_action actions[],
1707                 struct rte_flow_error *error)
1708 {
1709         struct rte_flow local_flow = { .layers = 0, };
1710         size_t size = sizeof(*flow);
1711         union {
1712                 struct rte_flow_expand_rss buf;
1713                 uint8_t buffer[2048];
1714         } expand_buffer;
1715         struct rte_flow_expand_rss *buf = &expand_buffer.buf;
1716         struct mlx5_flow_verbs *original_verbs = NULL;
1717         size_t original_verbs_size = 0;
1718         uint32_t original_layers = 0;
1719         int expanded_pattern_idx = 0;
1720         int ret;
1721         uint32_t i;
1722
1723         if (size > flow_size)
1724                 flow = &local_flow;
1725         ret = mlx5_flow_attributes(dev, attributes, flow, error);
1726         if (ret < 0)
1727                 return ret;
1728         ret = mlx5_flow_actions(dev, actions, &local_flow, 0, error);
1729         if (ret < 0)
1730                 return ret;
1731         if (local_flow.rss.types) {
1732                 ret = rte_flow_expand_rss(buf, sizeof(expand_buffer.buffer),
1733                                           pattern, local_flow.rss.types,
1734                                           mlx5_support_expansion,
1735                                           local_flow.rss.level < 2 ?
1736                                           MLX5_EXPANSION_ROOT :
1737                                           MLX5_EXPANSION_ROOT_OUTER);
1738                 assert(ret > 0 &&
1739                        (unsigned int)ret < sizeof(expand_buffer.buffer));
1740         } else {
1741                 buf->entries = 1;
1742                 buf->entry[0].pattern = (void *)(uintptr_t)pattern;
1743         }
1744         size += RTE_ALIGN_CEIL(local_flow.rss.queue_num * sizeof(uint16_t),
1745                                sizeof(void *));
1746         if (size <= flow_size)
1747                 flow->queue = (void *)(flow + 1);
1748         LIST_INIT(&flow->verbs);
1749         flow->layers = 0;
1750         flow->modifier = 0;
1751         flow->fate = 0;
1752         for (i = 0; i != buf->entries; ++i) {
1753                 size_t off = size;
1754                 size_t off2;
1755
1756                 flow->layers = original_layers;
1757                 size += sizeof(struct ibv_flow_attr) +
1758                         sizeof(struct mlx5_flow_verbs);
1759                 off2 = size;
1760                 if (size < flow_size) {
1761                         flow->cur_verbs = (void *)((uintptr_t)flow + off);
1762                         flow->cur_verbs->attr = (void *)(flow->cur_verbs + 1);
1763                         flow->cur_verbs->specs =
1764                                 (void *)(flow->cur_verbs->attr + 1);
1765                 }
1766                 /* First iteration convert the pattern into Verbs. */
1767                 if (i == 0) {
1768                         /* Actions don't need to be converted several time. */
1769                         ret = mlx5_flow_actions(dev, actions, flow,
1770                                                 (size < flow_size) ?
1771                                                 flow_size - size : 0,
1772                                                 error);
1773                         if (ret < 0)
1774                                 return ret;
1775                         size += ret;
1776                 } else {
1777                         /*
1778                          * Next iteration means the pattern has already been
1779                          * converted and an expansion is necessary to match
1780                          * the user RSS request.  For that only the expanded
1781                          * items will be converted, the common part with the
1782                          * user pattern are just copied into the next buffer
1783                          * zone.
1784                          */
1785                         size += original_verbs_size;
1786                         if (size < flow_size) {
1787                                 rte_memcpy(flow->cur_verbs->attr,
1788                                            original_verbs->attr,
1789                                            original_verbs_size +
1790                                            sizeof(struct ibv_flow_attr));
1791                                 flow->cur_verbs->size = original_verbs_size;
1792                         }
1793                 }
1794                 ret = mlx5_flow_items
1795                         ((const struct rte_flow_item *)
1796                          &buf->entry[i].pattern[expanded_pattern_idx],
1797                          flow,
1798                          (size < flow_size) ? flow_size - size : 0, error);
1799                 if (ret < 0)
1800                         return ret;
1801                 size += ret;
1802                 if (size <= flow_size) {
1803                         mlx5_flow_adjust_priority(dev, flow);
1804                         LIST_INSERT_HEAD(&flow->verbs, flow->cur_verbs, next);
1805                 }
1806                 /*
1807                  * Keep a pointer of the first verbs conversion and the layers
1808                  * it has encountered.
1809                  */
1810                 if (i == 0) {
1811                         original_verbs = flow->cur_verbs;
1812                         original_verbs_size = size - off2;
1813                         original_layers = flow->layers;
1814                         /*
1815                          * move the index of the expanded pattern to the
1816                          * first item not addressed yet.
1817                          */
1818                         if (pattern->type == RTE_FLOW_ITEM_TYPE_END) {
1819                                 expanded_pattern_idx++;
1820                         } else {
1821                                 const struct rte_flow_item *item = pattern;
1822
1823                                 for (item = pattern;
1824                                      item->type != RTE_FLOW_ITEM_TYPE_END;
1825                                      ++item)
1826                                         expanded_pattern_idx++;
1827                         }
1828                 }
1829         }
1830         /* Restore the origin layers in the flow. */
1831         flow->layers = original_layers;
1832         return size;
1833 }
1834
1835 /**
1836  * Mark the Rx queues mark flag if the flow has a mark or flag modifier.
1837  *
1838  * @param[in] dev
1839  *   Pointer to Ethernet device.
1840  * @param[in] flow
1841  *   Pointer to flow structure.
1842  */
1843 static void
1844 mlx5_flow_rxq_mark_set(struct rte_eth_dev *dev, struct rte_flow *flow)
1845 {
1846         struct priv *priv = dev->data->dev_private;
1847
1848         if (flow->modifier & (MLX5_FLOW_MOD_FLAG | MLX5_FLOW_MOD_MARK)) {
1849                 unsigned int i;
1850
1851                 for (i = 0; i != flow->rss.queue_num; ++i) {
1852                         int idx = (*flow->queue)[i];
1853                         struct mlx5_rxq_ctrl *rxq_ctrl =
1854                                 container_of((*priv->rxqs)[idx],
1855                                              struct mlx5_rxq_ctrl, rxq);
1856
1857                         rxq_ctrl->rxq.mark = 1;
1858                         rxq_ctrl->flow_mark_n++;
1859                 }
1860         }
1861 }
1862
1863 /**
1864  * Clear the Rx queue mark associated with the @p flow if no other flow uses
1865  * it with a mark request.
1866  *
1867  * @param dev
1868  *   Pointer to Ethernet device.
1869  * @param[in] flow
1870  *   Pointer to the flow.
1871  */
1872 static void
1873 mlx5_flow_rxq_mark_trim(struct rte_eth_dev *dev, struct rte_flow *flow)
1874 {
1875         struct priv *priv = dev->data->dev_private;
1876
1877         if (flow->modifier & (MLX5_FLOW_MOD_FLAG | MLX5_FLOW_MOD_MARK)) {
1878                 unsigned int i;
1879
1880                 for (i = 0; i != flow->rss.queue_num; ++i) {
1881                         int idx = (*flow->queue)[i];
1882                         struct mlx5_rxq_ctrl *rxq_ctrl =
1883                                 container_of((*priv->rxqs)[idx],
1884                                              struct mlx5_rxq_ctrl, rxq);
1885
1886                         rxq_ctrl->flow_mark_n--;
1887                         rxq_ctrl->rxq.mark = !!rxq_ctrl->flow_mark_n;
1888                 }
1889         }
1890 }
1891
1892 /**
1893  * Clear the mark bit in all Rx queues.
1894  *
1895  * @param dev
1896  *   Pointer to Ethernet device.
1897  */
1898 static void
1899 mlx5_flow_rxq_mark_clear(struct rte_eth_dev *dev)
1900 {
1901         struct priv *priv = dev->data->dev_private;
1902         unsigned int i;
1903         unsigned int idx;
1904
1905         for (idx = 0, i = 0; idx != priv->rxqs_n; ++i) {
1906                 struct mlx5_rxq_ctrl *rxq_ctrl;
1907
1908                 if (!(*priv->rxqs)[idx])
1909                         continue;
1910                 rxq_ctrl = container_of((*priv->rxqs)[idx],
1911                                         struct mlx5_rxq_ctrl, rxq);
1912                 rxq_ctrl->flow_mark_n = 0;
1913                 rxq_ctrl->rxq.mark = 0;
1914                 ++idx;
1915         }
1916 }
1917
1918 /**
1919  * Validate a flow supported by the NIC.
1920  *
1921  * @see rte_flow_validate()
1922  * @see rte_flow_ops
1923  */
1924 int
1925 mlx5_flow_validate(struct rte_eth_dev *dev,
1926                    const struct rte_flow_attr *attr,
1927                    const struct rte_flow_item items[],
1928                    const struct rte_flow_action actions[],
1929                    struct rte_flow_error *error)
1930 {
1931         int ret = mlx5_flow_merge(dev, NULL, 0, attr, items, actions, error);
1932
1933         if (ret < 0)
1934                 return ret;
1935         return 0;
1936 }
1937
1938 /**
1939  * Remove the flow.
1940  *
1941  * @param[in] dev
1942  *   Pointer to Ethernet device.
1943  * @param[in, out] flow
1944  *   Pointer to flow structure.
1945  */
1946 static void
1947 mlx5_flow_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
1948 {
1949         struct mlx5_flow_verbs *verbs;
1950
1951         LIST_FOREACH(verbs, &flow->verbs, next) {
1952                 if (verbs->flow) {
1953                         claim_zero(mlx5_glue->destroy_flow(verbs->flow));
1954                         verbs->flow = NULL;
1955                 }
1956                 if (verbs->hrxq) {
1957                         if (flow->fate & MLX5_FLOW_FATE_DROP)
1958                                 mlx5_hrxq_drop_release(dev);
1959                         else
1960                                 mlx5_hrxq_release(dev, verbs->hrxq);
1961                         verbs->hrxq = NULL;
1962                 }
1963         }
1964 }
1965
1966 /**
1967  * Apply the flow.
1968  *
1969  * @param[in] dev
1970  *   Pointer to Ethernet device structure.
1971  * @param[in, out] flow
1972  *   Pointer to flow structure.
1973  * @param[out] error
1974  *   Pointer to error structure.
1975  *
1976  * @return
1977  *   0 on success, a negative errno value otherwise and rte_errno is set.
1978  */
1979 static int
1980 mlx5_flow_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
1981                 struct rte_flow_error *error)
1982 {
1983         struct mlx5_flow_verbs *verbs;
1984         int err;
1985
1986         LIST_FOREACH(verbs, &flow->verbs, next) {
1987                 if (flow->fate & MLX5_FLOW_FATE_DROP) {
1988                         verbs->hrxq = mlx5_hrxq_drop_new(dev);
1989                         if (!verbs->hrxq) {
1990                                 rte_flow_error_set
1991                                         (error, errno,
1992                                          RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1993                                          NULL,
1994                                          "cannot get drop hash queue");
1995                                 goto error;
1996                         }
1997                 } else {
1998                         struct mlx5_hrxq *hrxq;
1999
2000                         hrxq = mlx5_hrxq_get(dev, flow->key,
2001                                              MLX5_RSS_HASH_KEY_LEN,
2002                                              verbs->hash_fields,
2003                                              (*flow->queue),
2004                                              flow->rss.queue_num);
2005                         if (!hrxq)
2006                                 hrxq = mlx5_hrxq_new(dev, flow->key,
2007                                                      MLX5_RSS_HASH_KEY_LEN,
2008                                                      verbs->hash_fields,
2009                                                      (*flow->queue),
2010                                                      flow->rss.queue_num);
2011                         if (!hrxq) {
2012                                 rte_flow_error_set
2013                                         (error, rte_errno,
2014                                          RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2015                                          NULL,
2016                                          "cannot get hash queue");
2017                                 goto error;
2018                         }
2019                         verbs->hrxq = hrxq;
2020                 }
2021                 verbs->flow =
2022                         mlx5_glue->create_flow(verbs->hrxq->qp, verbs->attr);
2023                 if (!verbs->flow) {
2024                         rte_flow_error_set(error, errno,
2025                                            RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2026                                            NULL,
2027                                            "hardware refuses to create flow");
2028                         goto error;
2029                 }
2030         }
2031         return 0;
2032 error:
2033         err = rte_errno; /* Save rte_errno before cleanup. */
2034         LIST_FOREACH(verbs, &flow->verbs, next) {
2035                 if (verbs->hrxq) {
2036                         if (flow->fate & MLX5_FLOW_FATE_DROP)
2037                                 mlx5_hrxq_drop_release(dev);
2038                         else
2039                                 mlx5_hrxq_release(dev, verbs->hrxq);
2040                         verbs->hrxq = NULL;
2041                 }
2042         }
2043         rte_errno = err; /* Restore rte_errno. */
2044         return -rte_errno;
2045 }
2046
2047 /**
2048  * Create a flow and add it to @p list.
2049  *
2050  * @param dev
2051  *   Pointer to Ethernet device.
2052  * @param list
2053  *   Pointer to a TAILQ flow list.
2054  * @param[in] attr
2055  *   Flow rule attributes.
2056  * @param[in] items
2057  *   Pattern specification (list terminated by the END pattern item).
2058  * @param[in] actions
2059  *   Associated actions (list terminated by the END action).
2060  * @param[out] error
2061  *   Perform verbose error reporting if not NULL.
2062  *
2063  * @return
2064  *   A flow on success, NULL otherwise and rte_errno is set.
2065  */
2066 static struct rte_flow *
2067 mlx5_flow_list_create(struct rte_eth_dev *dev,
2068                       struct mlx5_flows *list,
2069                       const struct rte_flow_attr *attr,
2070                       const struct rte_flow_item items[],
2071                       const struct rte_flow_action actions[],
2072                       struct rte_flow_error *error)
2073 {
2074         struct rte_flow *flow = NULL;
2075         size_t size = 0;
2076         int ret;
2077
2078         ret = mlx5_flow_merge(dev, flow, size, attr, items, actions, error);
2079         if (ret < 0)
2080                 return NULL;
2081         size = ret;
2082         flow = rte_calloc(__func__, 1, size, 0);
2083         if (!flow) {
2084                 rte_flow_error_set(error, ENOMEM,
2085                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2086                                    NULL,
2087                                    "not enough memory to create flow");
2088                 return NULL;
2089         }
2090         ret = mlx5_flow_merge(dev, flow, size, attr, items, actions, error);
2091         if (ret < 0) {
2092                 rte_free(flow);
2093                 return NULL;
2094         }
2095         assert((size_t)ret == size);
2096         if (dev->data->dev_started) {
2097                 ret = mlx5_flow_apply(dev, flow, error);
2098                 if (ret < 0) {
2099                         ret = rte_errno; /* Save rte_errno before cleanup. */
2100                         if (flow) {
2101                                 mlx5_flow_remove(dev, flow);
2102                                 rte_free(flow);
2103                         }
2104                         rte_errno = ret; /* Restore rte_errno. */
2105                         return NULL;
2106                 }
2107         }
2108         TAILQ_INSERT_TAIL(list, flow, next);
2109         mlx5_flow_rxq_mark_set(dev, flow);
2110         return flow;
2111 }
2112
2113 /**
2114  * Create a flow.
2115  *
2116  * @see rte_flow_create()
2117  * @see rte_flow_ops
2118  */
2119 struct rte_flow *
2120 mlx5_flow_create(struct rte_eth_dev *dev,
2121                  const struct rte_flow_attr *attr,
2122                  const struct rte_flow_item items[],
2123                  const struct rte_flow_action actions[],
2124                  struct rte_flow_error *error)
2125 {
2126         return mlx5_flow_list_create
2127                 (dev, &((struct priv *)dev->data->dev_private)->flows,
2128                  attr, items, actions, error);
2129 }
2130
2131 /**
2132  * Destroy a flow in a list.
2133  *
2134  * @param dev
2135  *   Pointer to Ethernet device.
2136  * @param list
2137  *   Pointer to a TAILQ flow list.
2138  * @param[in] flow
2139  *   Flow to destroy.
2140  */
2141 static void
2142 mlx5_flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list,
2143                        struct rte_flow *flow)
2144 {
2145         mlx5_flow_remove(dev, flow);
2146         TAILQ_REMOVE(list, flow, next);
2147         mlx5_flow_rxq_mark_trim(dev, flow);
2148         rte_free(flow);
2149 }
2150
2151 /**
2152  * Destroy all flows.
2153  *
2154  * @param dev
2155  *   Pointer to Ethernet device.
2156  * @param list
2157  *   Pointer to a TAILQ flow list.
2158  */
2159 void
2160 mlx5_flow_list_flush(struct rte_eth_dev *dev, struct mlx5_flows *list)
2161 {
2162         while (!TAILQ_EMPTY(list)) {
2163                 struct rte_flow *flow;
2164
2165                 flow = TAILQ_FIRST(list);
2166                 mlx5_flow_list_destroy(dev, list, flow);
2167         }
2168 }
2169
2170 /**
2171  * Remove all flows.
2172  *
2173  * @param dev
2174  *   Pointer to Ethernet device.
2175  * @param list
2176  *   Pointer to a TAILQ flow list.
2177  */
2178 void
2179 mlx5_flow_stop(struct rte_eth_dev *dev, struct mlx5_flows *list)
2180 {
2181         struct rte_flow *flow;
2182
2183         TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next)
2184                 mlx5_flow_remove(dev, flow);
2185         mlx5_flow_rxq_mark_clear(dev);
2186 }
2187
2188 /**
2189  * Add all flows.
2190  *
2191  * @param dev
2192  *   Pointer to Ethernet device.
2193  * @param list
2194  *   Pointer to a TAILQ flow list.
2195  *
2196  * @return
2197  *   0 on success, a negative errno value otherwise and rte_errno is set.
2198  */
2199 int
2200 mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list)
2201 {
2202         struct rte_flow *flow;
2203         struct rte_flow_error error;
2204         int ret = 0;
2205
2206         TAILQ_FOREACH(flow, list, next) {
2207                 ret = mlx5_flow_apply(dev, flow, &error);
2208                 if (ret < 0)
2209                         goto error;
2210                 mlx5_flow_rxq_mark_set(dev, flow);
2211         }
2212         return 0;
2213 error:
2214         ret = rte_errno; /* Save rte_errno before cleanup. */
2215         mlx5_flow_stop(dev, list);
2216         rte_errno = ret; /* Restore rte_errno. */
2217         return -rte_errno;
2218 }
2219
2220 /**
2221  * Verify the flow list is empty
2222  *
2223  * @param dev
2224  *  Pointer to Ethernet device.
2225  *
2226  * @return the number of flows not released.
2227  */
2228 int
2229 mlx5_flow_verify(struct rte_eth_dev *dev)
2230 {
2231         struct priv *priv = dev->data->dev_private;
2232         struct rte_flow *flow;
2233         int ret = 0;
2234
2235         TAILQ_FOREACH(flow, &priv->flows, next) {
2236                 DRV_LOG(DEBUG, "port %u flow %p still referenced",
2237                         dev->data->port_id, (void *)flow);
2238                 ++ret;
2239         }
2240         return ret;
2241 }
2242
2243 /**
2244  * Enable a control flow configured from the control plane.
2245  *
2246  * @param dev
2247  *   Pointer to Ethernet device.
2248  * @param eth_spec
2249  *   An Ethernet flow spec to apply.
2250  * @param eth_mask
2251  *   An Ethernet flow mask to apply.
2252  * @param vlan_spec
2253  *   A VLAN flow spec to apply.
2254  * @param vlan_mask
2255  *   A VLAN flow mask to apply.
2256  *
2257  * @return
2258  *   0 on success, a negative errno value otherwise and rte_errno is set.
2259  */
2260 int
2261 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
2262                     struct rte_flow_item_eth *eth_spec,
2263                     struct rte_flow_item_eth *eth_mask,
2264                     struct rte_flow_item_vlan *vlan_spec,
2265                     struct rte_flow_item_vlan *vlan_mask)
2266 {
2267         struct priv *priv = dev->data->dev_private;
2268         const struct rte_flow_attr attr = {
2269                 .ingress = 1,
2270                 .priority = MLX5_FLOW_PRIO_RSVD,
2271         };
2272         struct rte_flow_item items[] = {
2273                 {
2274                         .type = RTE_FLOW_ITEM_TYPE_ETH,
2275                         .spec = eth_spec,
2276                         .last = NULL,
2277                         .mask = eth_mask,
2278                 },
2279                 {
2280                         .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
2281                                 RTE_FLOW_ITEM_TYPE_END,
2282                         .spec = vlan_spec,
2283                         .last = NULL,
2284                         .mask = vlan_mask,
2285                 },
2286                 {
2287                         .type = RTE_FLOW_ITEM_TYPE_END,
2288                 },
2289         };
2290         uint16_t queue[priv->reta_idx_n];
2291         struct rte_flow_action_rss action_rss = {
2292                 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
2293                 .level = 0,
2294                 .types = priv->rss_conf.rss_hf,
2295                 .key_len = priv->rss_conf.rss_key_len,
2296                 .queue_num = priv->reta_idx_n,
2297                 .key = priv->rss_conf.rss_key,
2298                 .queue = queue,
2299         };
2300         struct rte_flow_action actions[] = {
2301                 {
2302                         .type = RTE_FLOW_ACTION_TYPE_RSS,
2303                         .conf = &action_rss,
2304                 },
2305                 {
2306                         .type = RTE_FLOW_ACTION_TYPE_END,
2307                 },
2308         };
2309         struct rte_flow *flow;
2310         struct rte_flow_error error;
2311         unsigned int i;
2312
2313         if (!priv->reta_idx_n) {
2314                 rte_errno = EINVAL;
2315                 return -rte_errno;
2316         }
2317         for (i = 0; i != priv->reta_idx_n; ++i)
2318                 queue[i] = (*priv->reta_idx)[i];
2319         flow = mlx5_flow_list_create(dev, &priv->ctrl_flows, &attr, items,
2320                                      actions, &error);
2321         if (!flow)
2322                 return -rte_errno;
2323         return 0;
2324 }
2325
2326 /**
2327  * Enable a flow control configured from the control plane.
2328  *
2329  * @param dev
2330  *   Pointer to Ethernet device.
2331  * @param eth_spec
2332  *   An Ethernet flow spec to apply.
2333  * @param eth_mask
2334  *   An Ethernet flow mask to apply.
2335  *
2336  * @return
2337  *   0 on success, a negative errno value otherwise and rte_errno is set.
2338  */
2339 int
2340 mlx5_ctrl_flow(struct rte_eth_dev *dev,
2341                struct rte_flow_item_eth *eth_spec,
2342                struct rte_flow_item_eth *eth_mask)
2343 {
2344         return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
2345 }
2346
2347 /**
2348  * Destroy a flow.
2349  *
2350  * @see rte_flow_destroy()
2351  * @see rte_flow_ops
2352  */
2353 int
2354 mlx5_flow_destroy(struct rte_eth_dev *dev,
2355                   struct rte_flow *flow,
2356                   struct rte_flow_error *error __rte_unused)
2357 {
2358         struct priv *priv = dev->data->dev_private;
2359
2360         mlx5_flow_list_destroy(dev, &priv->flows, flow);
2361         return 0;
2362 }
2363
2364 /**
2365  * Destroy all flows.
2366  *
2367  * @see rte_flow_flush()
2368  * @see rte_flow_ops
2369  */
2370 int
2371 mlx5_flow_flush(struct rte_eth_dev *dev,
2372                 struct rte_flow_error *error __rte_unused)
2373 {
2374         struct priv *priv = dev->data->dev_private;
2375
2376         mlx5_flow_list_flush(dev, &priv->flows);
2377         return 0;
2378 }
2379
2380 /**
2381  * Isolated mode.
2382  *
2383  * @see rte_flow_isolate()
2384  * @see rte_flow_ops
2385  */
2386 int
2387 mlx5_flow_isolate(struct rte_eth_dev *dev,
2388                   int enable,
2389                   struct rte_flow_error *error)
2390 {
2391         struct priv *priv = dev->data->dev_private;
2392
2393         if (dev->data->dev_started) {
2394                 rte_flow_error_set(error, EBUSY,
2395                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2396                                    NULL,
2397                                    "port must be stopped first");
2398                 return -rte_errno;
2399         }
2400         priv->isolated = !!enable;
2401         if (enable)
2402                 dev->dev_ops = &mlx5_dev_ops_isolate;
2403         else
2404                 dev->dev_ops = &mlx5_dev_ops;
2405         return 0;
2406 }
2407
2408 /**
2409  * Convert a flow director filter to a generic flow.
2410  *
2411  * @param dev
2412  *   Pointer to Ethernet device.
2413  * @param fdir_filter
2414  *   Flow director filter to add.
2415  * @param attributes
2416  *   Generic flow parameters structure.
2417  *
2418  * @return
2419  *   0 on success, a negative errno value otherwise and rte_errno is set.
2420  */
2421 static int
2422 mlx5_fdir_filter_convert(struct rte_eth_dev *dev,
2423                          const struct rte_eth_fdir_filter *fdir_filter,
2424                          struct mlx5_fdir *attributes)
2425 {
2426         struct priv *priv = dev->data->dev_private;
2427         const struct rte_eth_fdir_input *input = &fdir_filter->input;
2428         const struct rte_eth_fdir_masks *mask =
2429                 &dev->data->dev_conf.fdir_conf.mask;
2430
2431         /* Validate queue number. */
2432         if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
2433                 DRV_LOG(ERR, "port %u invalid queue number %d",
2434                         dev->data->port_id, fdir_filter->action.rx_queue);
2435                 rte_errno = EINVAL;
2436                 return -rte_errno;
2437         }
2438         attributes->attr.ingress = 1;
2439         attributes->items[0] = (struct rte_flow_item) {
2440                 .type = RTE_FLOW_ITEM_TYPE_ETH,
2441                 .spec = &attributes->l2,
2442                 .mask = &attributes->l2_mask,
2443         };
2444         switch (fdir_filter->action.behavior) {
2445         case RTE_ETH_FDIR_ACCEPT:
2446                 attributes->actions[0] = (struct rte_flow_action){
2447                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
2448                         .conf = &attributes->queue,
2449                 };
2450                 break;
2451         case RTE_ETH_FDIR_REJECT:
2452                 attributes->actions[0] = (struct rte_flow_action){
2453                         .type = RTE_FLOW_ACTION_TYPE_DROP,
2454                 };
2455                 break;
2456         default:
2457                 DRV_LOG(ERR, "port %u invalid behavior %d",
2458                         dev->data->port_id,
2459                         fdir_filter->action.behavior);
2460                 rte_errno = ENOTSUP;
2461                 return -rte_errno;
2462         }
2463         attributes->queue.index = fdir_filter->action.rx_queue;
2464         /* Handle L3. */
2465         switch (fdir_filter->input.flow_type) {
2466         case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
2467         case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
2468         case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
2469                 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2470                         .src_addr = input->flow.ip4_flow.src_ip,
2471                         .dst_addr = input->flow.ip4_flow.dst_ip,
2472                         .time_to_live = input->flow.ip4_flow.ttl,
2473                         .type_of_service = input->flow.ip4_flow.tos,
2474                         .next_proto_id = input->flow.ip4_flow.proto,
2475                 };
2476                 attributes->l3_mask.ipv4.hdr = (struct ipv4_hdr){
2477                         .src_addr = mask->ipv4_mask.src_ip,
2478                         .dst_addr = mask->ipv4_mask.dst_ip,
2479                         .time_to_live = mask->ipv4_mask.ttl,
2480                         .type_of_service = mask->ipv4_mask.tos,
2481                         .next_proto_id = mask->ipv4_mask.proto,
2482                 };
2483                 attributes->items[1] = (struct rte_flow_item){
2484                         .type = RTE_FLOW_ITEM_TYPE_IPV4,
2485                         .spec = &attributes->l3,
2486                         .mask = &attributes->l3_mask,
2487                 };
2488                 break;
2489         case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
2490         case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
2491         case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
2492                 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2493                         .hop_limits = input->flow.ipv6_flow.hop_limits,
2494                         .proto = input->flow.ipv6_flow.proto,
2495                 };
2496
2497                 memcpy(attributes->l3.ipv6.hdr.src_addr,
2498                        input->flow.ipv6_flow.src_ip,
2499                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2500                 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2501                        input->flow.ipv6_flow.dst_ip,
2502                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2503                 memcpy(attributes->l3_mask.ipv6.hdr.src_addr,
2504                        mask->ipv6_mask.src_ip,
2505                        RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
2506                 memcpy(attributes->l3_mask.ipv6.hdr.dst_addr,
2507                        mask->ipv6_mask.dst_ip,
2508                        RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
2509                 attributes->items[1] = (struct rte_flow_item){
2510                         .type = RTE_FLOW_ITEM_TYPE_IPV6,
2511                         .spec = &attributes->l3,
2512                         .mask = &attributes->l3_mask,
2513                 };
2514                 break;
2515         default:
2516                 DRV_LOG(ERR, "port %u invalid flow type%d",
2517                         dev->data->port_id, fdir_filter->input.flow_type);
2518                 rte_errno = ENOTSUP;
2519                 return -rte_errno;
2520         }
2521         /* Handle L4. */
2522         switch (fdir_filter->input.flow_type) {
2523         case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
2524                 attributes->l4.udp.hdr = (struct udp_hdr){
2525                         .src_port = input->flow.udp4_flow.src_port,
2526                         .dst_port = input->flow.udp4_flow.dst_port,
2527                 };
2528                 attributes->l4_mask.udp.hdr = (struct udp_hdr){
2529                         .src_port = mask->src_port_mask,
2530                         .dst_port = mask->dst_port_mask,
2531                 };
2532                 attributes->items[2] = (struct rte_flow_item){
2533                         .type = RTE_FLOW_ITEM_TYPE_UDP,
2534                         .spec = &attributes->l4,
2535                         .mask = &attributes->l4_mask,
2536                 };
2537                 break;
2538         case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
2539                 attributes->l4.tcp.hdr = (struct tcp_hdr){
2540                         .src_port = input->flow.tcp4_flow.src_port,
2541                         .dst_port = input->flow.tcp4_flow.dst_port,
2542                 };
2543                 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
2544                         .src_port = mask->src_port_mask,
2545                         .dst_port = mask->dst_port_mask,
2546                 };
2547                 attributes->items[2] = (struct rte_flow_item){
2548                         .type = RTE_FLOW_ITEM_TYPE_TCP,
2549                         .spec = &attributes->l4,
2550                         .mask = &attributes->l4_mask,
2551                 };
2552                 break;
2553         case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
2554                 attributes->l4.udp.hdr = (struct udp_hdr){
2555                         .src_port = input->flow.udp6_flow.src_port,
2556                         .dst_port = input->flow.udp6_flow.dst_port,
2557                 };
2558                 attributes->l4_mask.udp.hdr = (struct udp_hdr){
2559                         .src_port = mask->src_port_mask,
2560                         .dst_port = mask->dst_port_mask,
2561                 };
2562                 attributes->items[2] = (struct rte_flow_item){
2563                         .type = RTE_FLOW_ITEM_TYPE_UDP,
2564                         .spec = &attributes->l4,
2565                         .mask = &attributes->l4_mask,
2566                 };
2567                 break;
2568         case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
2569                 attributes->l4.tcp.hdr = (struct tcp_hdr){
2570                         .src_port = input->flow.tcp6_flow.src_port,
2571                         .dst_port = input->flow.tcp6_flow.dst_port,
2572                 };
2573                 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
2574                         .src_port = mask->src_port_mask,
2575                         .dst_port = mask->dst_port_mask,
2576                 };
2577                 attributes->items[2] = (struct rte_flow_item){
2578                         .type = RTE_FLOW_ITEM_TYPE_TCP,
2579                         .spec = &attributes->l4,
2580                         .mask = &attributes->l4_mask,
2581                 };
2582                 break;
2583         case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
2584         case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
2585                 break;
2586         default:
2587                 DRV_LOG(ERR, "port %u invalid flow type%d",
2588                         dev->data->port_id, fdir_filter->input.flow_type);
2589                 rte_errno = ENOTSUP;
2590                 return -rte_errno;
2591         }
2592         return 0;
2593 }
2594
2595 /**
2596  * Add new flow director filter and store it in list.
2597  *
2598  * @param dev
2599  *   Pointer to Ethernet device.
2600  * @param fdir_filter
2601  *   Flow director filter to add.
2602  *
2603  * @return
2604  *   0 on success, a negative errno value otherwise and rte_errno is set.
2605  */
2606 static int
2607 mlx5_fdir_filter_add(struct rte_eth_dev *dev,
2608                      const struct rte_eth_fdir_filter *fdir_filter)
2609 {
2610         struct priv *priv = dev->data->dev_private;
2611         struct mlx5_fdir attributes = {
2612                 .attr.group = 0,
2613                 .l2_mask = {
2614                         .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2615                         .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2616                         .type = 0,
2617                 },
2618         };
2619         struct rte_flow_error error;
2620         struct rte_flow *flow;
2621         int ret;
2622
2623         ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
2624         if (ret)
2625                 return ret;
2626         flow = mlx5_flow_list_create(dev, &priv->flows, &attributes.attr,
2627                                      attributes.items, attributes.actions,
2628                                      &error);
2629         if (flow) {
2630                 DRV_LOG(DEBUG, "port %u FDIR created %p", dev->data->port_id,
2631                         (void *)flow);
2632                 return 0;
2633         }
2634         return -rte_errno;
2635 }
2636
2637 /**
2638  * Delete specific filter.
2639  *
2640  * @param dev
2641  *   Pointer to Ethernet device.
2642  * @param fdir_filter
2643  *   Filter to be deleted.
2644  *
2645  * @return
2646  *   0 on success, a negative errno value otherwise and rte_errno is set.
2647  */
2648 static int
2649 mlx5_fdir_filter_delete(struct rte_eth_dev *dev __rte_unused,
2650                         const struct rte_eth_fdir_filter *fdir_filter
2651                         __rte_unused)
2652 {
2653         rte_errno = ENOTSUP;
2654         return -rte_errno;
2655 }
2656
2657 /**
2658  * Update queue for specific filter.
2659  *
2660  * @param dev
2661  *   Pointer to Ethernet device.
2662  * @param fdir_filter
2663  *   Filter to be updated.
2664  *
2665  * @return
2666  *   0 on success, a negative errno value otherwise and rte_errno is set.
2667  */
2668 static int
2669 mlx5_fdir_filter_update(struct rte_eth_dev *dev,
2670                         const struct rte_eth_fdir_filter *fdir_filter)
2671 {
2672         int ret;
2673
2674         ret = mlx5_fdir_filter_delete(dev, fdir_filter);
2675         if (ret)
2676                 return ret;
2677         return mlx5_fdir_filter_add(dev, fdir_filter);
2678 }
2679
2680 /**
2681  * Flush all filters.
2682  *
2683  * @param dev
2684  *   Pointer to Ethernet device.
2685  */
2686 static void
2687 mlx5_fdir_filter_flush(struct rte_eth_dev *dev)
2688 {
2689         struct priv *priv = dev->data->dev_private;
2690
2691         mlx5_flow_list_flush(dev, &priv->flows);
2692 }
2693
2694 /**
2695  * Get flow director information.
2696  *
2697  * @param dev
2698  *   Pointer to Ethernet device.
2699  * @param[out] fdir_info
2700  *   Resulting flow director information.
2701  */
2702 static void
2703 mlx5_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info)
2704 {
2705         struct rte_eth_fdir_masks *mask =
2706                 &dev->data->dev_conf.fdir_conf.mask;
2707
2708         fdir_info->mode = dev->data->dev_conf.fdir_conf.mode;
2709         fdir_info->guarant_spc = 0;
2710         rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
2711         fdir_info->max_flexpayload = 0;
2712         fdir_info->flow_types_mask[0] = 0;
2713         fdir_info->flex_payload_unit = 0;
2714         fdir_info->max_flex_payload_segment_num = 0;
2715         fdir_info->flex_payload_limit = 0;
2716         memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
2717 }
2718
2719 /**
2720  * Deal with flow director operations.
2721  *
2722  * @param dev
2723  *   Pointer to Ethernet device.
2724  * @param filter_op
2725  *   Operation to perform.
2726  * @param arg
2727  *   Pointer to operation-specific structure.
2728  *
2729  * @return
2730  *   0 on success, a negative errno value otherwise and rte_errno is set.
2731  */
2732 static int
2733 mlx5_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op,
2734                     void *arg)
2735 {
2736         enum rte_fdir_mode fdir_mode =
2737                 dev->data->dev_conf.fdir_conf.mode;
2738
2739         if (filter_op == RTE_ETH_FILTER_NOP)
2740                 return 0;
2741         if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
2742             fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
2743                 DRV_LOG(ERR, "port %u flow director mode %d not supported",
2744                         dev->data->port_id, fdir_mode);
2745                 rte_errno = EINVAL;
2746                 return -rte_errno;
2747         }
2748         switch (filter_op) {
2749         case RTE_ETH_FILTER_ADD:
2750                 return mlx5_fdir_filter_add(dev, arg);
2751         case RTE_ETH_FILTER_UPDATE:
2752                 return mlx5_fdir_filter_update(dev, arg);
2753         case RTE_ETH_FILTER_DELETE:
2754                 return mlx5_fdir_filter_delete(dev, arg);
2755         case RTE_ETH_FILTER_FLUSH:
2756                 mlx5_fdir_filter_flush(dev);
2757                 break;
2758         case RTE_ETH_FILTER_INFO:
2759                 mlx5_fdir_info_get(dev, arg);
2760                 break;
2761         default:
2762                 DRV_LOG(DEBUG, "port %u unknown operation %u",
2763                         dev->data->port_id, filter_op);
2764                 rte_errno = EINVAL;
2765                 return -rte_errno;
2766         }
2767         return 0;
2768 }
2769
2770 /**
2771  * Manage filter operations.
2772  *
2773  * @param dev
2774  *   Pointer to Ethernet device structure.
2775  * @param filter_type
2776  *   Filter type.
2777  * @param filter_op
2778  *   Operation to perform.
2779  * @param arg
2780  *   Pointer to operation-specific structure.
2781  *
2782  * @return
2783  *   0 on success, a negative errno value otherwise and rte_errno is set.
2784  */
2785 int
2786 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
2787                      enum rte_filter_type filter_type,
2788                      enum rte_filter_op filter_op,
2789                      void *arg)
2790 {
2791         switch (filter_type) {
2792         case RTE_ETH_FILTER_GENERIC:
2793                 if (filter_op != RTE_ETH_FILTER_GET) {
2794                         rte_errno = EINVAL;
2795                         return -rte_errno;
2796                 }
2797                 *(const void **)arg = &mlx5_flow_ops;
2798                 return 0;
2799         case RTE_ETH_FILTER_FDIR:
2800                 return mlx5_fdir_ctrl_func(dev, filter_op, arg);
2801         default:
2802                 DRV_LOG(ERR, "port %u filter type (%d) not supported",
2803                         dev->data->port_id, filter_type);
2804                 rte_errno = ENOTSUP;
2805                 return -rte_errno;
2806         }
2807         return 0;
2808 }