net/mlx5: add flow VXLAN item
[dpdk.git] / drivers / net / mlx5 / mlx5_flow.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2016 6WIND S.A.
3  * Copyright 2016 Mellanox Technologies, Ltd
4  */
5
6 #include <sys/queue.h>
7 #include <stdint.h>
8 #include <string.h>
9
10 /* Verbs header. */
11 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
12 #ifdef PEDANTIC
13 #pragma GCC diagnostic ignored "-Wpedantic"
14 #endif
15 #include <infiniband/verbs.h>
16 #ifdef PEDANTIC
17 #pragma GCC diagnostic error "-Wpedantic"
18 #endif
19
20 #include <rte_common.h>
21 #include <rte_ether.h>
22 #include <rte_eth_ctrl.h>
23 #include <rte_ethdev_driver.h>
24 #include <rte_flow.h>
25 #include <rte_flow_driver.h>
26 #include <rte_malloc.h>
27 #include <rte_ip.h>
28
29 #include "mlx5.h"
30 #include "mlx5_defs.h"
31 #include "mlx5_prm.h"
32 #include "mlx5_glue.h"
33
34 /* Dev ops structure defined in mlx5.c */
35 extern const struct eth_dev_ops mlx5_dev_ops;
36 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
37
38 /* Pattern outer Layer bits. */
39 #define MLX5_FLOW_LAYER_OUTER_L2 (1u << 0)
40 #define MLX5_FLOW_LAYER_OUTER_L3_IPV4 (1u << 1)
41 #define MLX5_FLOW_LAYER_OUTER_L3_IPV6 (1u << 2)
42 #define MLX5_FLOW_LAYER_OUTER_L4_UDP (1u << 3)
43 #define MLX5_FLOW_LAYER_OUTER_L4_TCP (1u << 4)
44 #define MLX5_FLOW_LAYER_OUTER_VLAN (1u << 5)
45
46 /* Pattern inner Layer bits. */
47 #define MLX5_FLOW_LAYER_INNER_L2 (1u << 6)
48 #define MLX5_FLOW_LAYER_INNER_L3_IPV4 (1u << 7)
49 #define MLX5_FLOW_LAYER_INNER_L3_IPV6 (1u << 8)
50 #define MLX5_FLOW_LAYER_INNER_L4_UDP (1u << 9)
51 #define MLX5_FLOW_LAYER_INNER_L4_TCP (1u << 10)
52 #define MLX5_FLOW_LAYER_INNER_VLAN (1u << 11)
53
54 /* Pattern tunnel Layer bits. */
55 #define MLX5_FLOW_LAYER_VXLAN (1u << 12)
56
57 /* Outer Masks. */
58 #define MLX5_FLOW_LAYER_OUTER_L3 \
59         (MLX5_FLOW_LAYER_OUTER_L3_IPV4 | MLX5_FLOW_LAYER_OUTER_L3_IPV6)
60 #define MLX5_FLOW_LAYER_OUTER_L4 \
61         (MLX5_FLOW_LAYER_OUTER_L4_UDP | MLX5_FLOW_LAYER_OUTER_L4_TCP)
62 #define MLX5_FLOW_LAYER_OUTER \
63         (MLX5_FLOW_LAYER_OUTER_L2 | MLX5_FLOW_LAYER_OUTER_L3 | \
64          MLX5_FLOW_LAYER_OUTER_L4)
65
66 /* Tunnel Masks. */
67 #define MLX5_FLOW_LAYER_TUNNEL MLX5_FLOW_LAYER_VXLAN
68
69 /* Inner Masks. */
70 #define MLX5_FLOW_LAYER_INNER_L3 \
71         (MLX5_FLOW_LAYER_INNER_L3_IPV4 | MLX5_FLOW_LAYER_INNER_L3_IPV6)
72 #define MLX5_FLOW_LAYER_INNER_L4 \
73         (MLX5_FLOW_LAYER_INNER_L4_UDP | MLX5_FLOW_LAYER_INNER_L4_TCP)
74 #define MLX5_FLOW_LAYER_INNER \
75         (MLX5_FLOW_LAYER_INNER_L2 | MLX5_FLOW_LAYER_INNER_L3 | \
76          MLX5_FLOW_LAYER_INNER_L4)
77
78 /* Actions that modify the fate of matching traffic. */
79 #define MLX5_FLOW_FATE_DROP (1u << 0)
80 #define MLX5_FLOW_FATE_QUEUE (1u << 1)
81 #define MLX5_FLOW_FATE_RSS (1u << 2)
82
83 /* Modify a packet. */
84 #define MLX5_FLOW_MOD_FLAG (1u << 0)
85 #define MLX5_FLOW_MOD_MARK (1u << 1)
86
87 /* possible L3 layers protocols filtering. */
88 #define MLX5_IP_PROTOCOL_TCP 6
89 #define MLX5_IP_PROTOCOL_UDP 17
90
91 /* Priority reserved for default flows. */
92 #define MLX5_FLOW_PRIO_RSVD ((uint32_t)-1)
93
94 enum mlx5_expansion {
95         MLX5_EXPANSION_ROOT,
96         MLX5_EXPANSION_ROOT_OUTER,
97         MLX5_EXPANSION_OUTER_ETH,
98         MLX5_EXPANSION_OUTER_IPV4,
99         MLX5_EXPANSION_OUTER_IPV4_UDP,
100         MLX5_EXPANSION_OUTER_IPV4_TCP,
101         MLX5_EXPANSION_OUTER_IPV6,
102         MLX5_EXPANSION_OUTER_IPV6_UDP,
103         MLX5_EXPANSION_OUTER_IPV6_TCP,
104         MLX5_EXPANSION_VXLAN,
105         MLX5_EXPANSION_ETH,
106         MLX5_EXPANSION_IPV4,
107         MLX5_EXPANSION_IPV4_UDP,
108         MLX5_EXPANSION_IPV4_TCP,
109         MLX5_EXPANSION_IPV6,
110         MLX5_EXPANSION_IPV6_UDP,
111         MLX5_EXPANSION_IPV6_TCP,
112 };
113
114 /** Supported expansion of items. */
115 static const struct rte_flow_expand_node mlx5_support_expansion[] = {
116         [MLX5_EXPANSION_ROOT] = {
117                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
118                                                  MLX5_EXPANSION_IPV4,
119                                                  MLX5_EXPANSION_IPV6),
120                 .type = RTE_FLOW_ITEM_TYPE_END,
121         },
122         [MLX5_EXPANSION_ROOT_OUTER] = {
123                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_ETH,
124                                                  MLX5_EXPANSION_OUTER_IPV4,
125                                                  MLX5_EXPANSION_OUTER_IPV6),
126                 .type = RTE_FLOW_ITEM_TYPE_END,
127         },
128         [MLX5_EXPANSION_OUTER_ETH] = {
129                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_OUTER_IPV4,
130                                                  MLX5_EXPANSION_OUTER_IPV6),
131                 .type = RTE_FLOW_ITEM_TYPE_ETH,
132                 .rss_types = 0,
133         },
134         [MLX5_EXPANSION_OUTER_IPV4] = {
135                 .next = RTE_FLOW_EXPAND_RSS_NEXT
136                         (MLX5_EXPANSION_OUTER_IPV4_UDP,
137                          MLX5_EXPANSION_OUTER_IPV4_TCP),
138                 .type = RTE_FLOW_ITEM_TYPE_IPV4,
139                 .rss_types = ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
140                         ETH_RSS_NONFRAG_IPV4_OTHER,
141         },
142         [MLX5_EXPANSION_OUTER_IPV4_UDP] = {
143                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN),
144                 .type = RTE_FLOW_ITEM_TYPE_UDP,
145                 .rss_types = ETH_RSS_NONFRAG_IPV4_UDP,
146         },
147         [MLX5_EXPANSION_OUTER_IPV4_TCP] = {
148                 .type = RTE_FLOW_ITEM_TYPE_TCP,
149                 .rss_types = ETH_RSS_NONFRAG_IPV4_TCP,
150         },
151         [MLX5_EXPANSION_OUTER_IPV6] = {
152                 .next = RTE_FLOW_EXPAND_RSS_NEXT
153                         (MLX5_EXPANSION_OUTER_IPV6_UDP,
154                          MLX5_EXPANSION_OUTER_IPV6_TCP),
155                 .type = RTE_FLOW_ITEM_TYPE_IPV6,
156                 .rss_types = ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 |
157                         ETH_RSS_NONFRAG_IPV6_OTHER,
158         },
159         [MLX5_EXPANSION_OUTER_IPV6_UDP] = {
160                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_VXLAN),
161                 .type = RTE_FLOW_ITEM_TYPE_UDP,
162                 .rss_types = ETH_RSS_NONFRAG_IPV6_UDP,
163         },
164         [MLX5_EXPANSION_OUTER_IPV6_TCP] = {
165                 .type = RTE_FLOW_ITEM_TYPE_TCP,
166                 .rss_types = ETH_RSS_NONFRAG_IPV6_TCP,
167         },
168         [MLX5_EXPANSION_VXLAN] = {
169                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH),
170                 .type = RTE_FLOW_ITEM_TYPE_VXLAN,
171         },
172         [MLX5_EXPANSION_ETH] = {
173                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
174                                                  MLX5_EXPANSION_IPV6),
175                 .type = RTE_FLOW_ITEM_TYPE_ETH,
176         },
177         [MLX5_EXPANSION_IPV4] = {
178                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4_UDP,
179                                                  MLX5_EXPANSION_IPV4_TCP),
180                 .type = RTE_FLOW_ITEM_TYPE_IPV4,
181                 .rss_types = ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
182                         ETH_RSS_NONFRAG_IPV4_OTHER,
183         },
184         [MLX5_EXPANSION_IPV4_UDP] = {
185                 .type = RTE_FLOW_ITEM_TYPE_UDP,
186                 .rss_types = ETH_RSS_NONFRAG_IPV4_UDP,
187         },
188         [MLX5_EXPANSION_IPV4_TCP] = {
189                 .type = RTE_FLOW_ITEM_TYPE_TCP,
190                 .rss_types = ETH_RSS_NONFRAG_IPV4_TCP,
191         },
192         [MLX5_EXPANSION_IPV6] = {
193                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV6_UDP,
194                                                  MLX5_EXPANSION_IPV6_TCP),
195                 .type = RTE_FLOW_ITEM_TYPE_IPV6,
196                 .rss_types = ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 |
197                         ETH_RSS_NONFRAG_IPV6_OTHER,
198         },
199         [MLX5_EXPANSION_IPV6_UDP] = {
200                 .type = RTE_FLOW_ITEM_TYPE_UDP,
201                 .rss_types = ETH_RSS_NONFRAG_IPV6_UDP,
202         },
203         [MLX5_EXPANSION_IPV6_TCP] = {
204                 .type = RTE_FLOW_ITEM_TYPE_TCP,
205                 .rss_types = ETH_RSS_NONFRAG_IPV6_TCP,
206         },
207 };
208
209 /** Handles information leading to a drop fate. */
210 struct mlx5_flow_verbs {
211         LIST_ENTRY(mlx5_flow_verbs) next;
212         unsigned int size; /**< Size of the attribute. */
213         struct {
214                 struct ibv_flow_attr *attr;
215                 /**< Pointer to the Specification buffer. */
216                 uint8_t *specs; /**< Pointer to the specifications. */
217         };
218         struct ibv_flow *flow; /**< Verbs flow pointer. */
219         struct mlx5_hrxq *hrxq; /**< Hash Rx queue object. */
220         uint64_t hash_fields; /**< Verbs hash Rx queue hash fields. */
221 };
222
223 /* Flow structure. */
224 struct rte_flow {
225         TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
226         struct rte_flow_attr attributes; /**< User flow attribute. */
227         uint32_t l3_protocol_en:1; /**< Protocol filtering requested. */
228         uint32_t layers;
229         /**< Bit-fields of present layers see MLX5_FLOW_LAYER_*. */
230         uint32_t modifier;
231         /**< Bit-fields of present modifier see MLX5_FLOW_MOD_*. */
232         uint32_t fate;
233         /**< Bit-fields of present fate see MLX5_FLOW_FATE_*. */
234         uint8_t l3_protocol; /**< valid when l3_protocol_en is set. */
235         LIST_HEAD(verbs, mlx5_flow_verbs) verbs; /**< Verbs flows list. */
236         struct mlx5_flow_verbs *cur_verbs;
237         /**< Current Verbs flow structure being filled. */
238         struct rte_flow_action_rss rss;/**< RSS context. */
239         uint32_t tunnel_ptype;
240         /**< Store tunnel packet type data to store in Rx queue. */
241         uint8_t key[MLX5_RSS_HASH_KEY_LEN]; /**< RSS hash key. */
242         uint16_t (*queue)[]; /**< Destination queues to redirect traffic to. */
243 };
244
245 static const struct rte_flow_ops mlx5_flow_ops = {
246         .validate = mlx5_flow_validate,
247         .create = mlx5_flow_create,
248         .destroy = mlx5_flow_destroy,
249         .flush = mlx5_flow_flush,
250         .isolate = mlx5_flow_isolate,
251 };
252
253 /* Convert FDIR request to Generic flow. */
254 struct mlx5_fdir {
255         struct rte_flow_attr attr;
256         struct rte_flow_action actions[2];
257         struct rte_flow_item items[4];
258         struct rte_flow_item_eth l2;
259         struct rte_flow_item_eth l2_mask;
260         union {
261                 struct rte_flow_item_ipv4 ipv4;
262                 struct rte_flow_item_ipv6 ipv6;
263         } l3;
264         union {
265                 struct rte_flow_item_ipv4 ipv4;
266                 struct rte_flow_item_ipv6 ipv6;
267         } l3_mask;
268         union {
269                 struct rte_flow_item_udp udp;
270                 struct rte_flow_item_tcp tcp;
271         } l4;
272         union {
273                 struct rte_flow_item_udp udp;
274                 struct rte_flow_item_tcp tcp;
275         } l4_mask;
276         struct rte_flow_action_queue queue;
277 };
278
279 /* Verbs specification header. */
280 struct ibv_spec_header {
281         enum ibv_flow_spec_type type;
282         uint16_t size;
283 };
284
285 /*
286  * Number of sub priorities.
287  * For each kind of pattern matching i.e. L2, L3, L4 to have a correct
288  * matching on the NIC (firmware dependent) L4 most have the higher priority
289  * followed by L3 and ending with L2.
290  */
291 #define MLX5_PRIORITY_MAP_L2 2
292 #define MLX5_PRIORITY_MAP_L3 1
293 #define MLX5_PRIORITY_MAP_L4 0
294 #define MLX5_PRIORITY_MAP_MAX 3
295
296 /* Map of Verbs to Flow priority with 8 Verbs priorities. */
297 static const uint32_t priority_map_3[][MLX5_PRIORITY_MAP_MAX] = {
298         { 0, 1, 2 }, { 2, 3, 4 }, { 5, 6, 7 },
299 };
300
301 /* Map of Verbs to Flow priority with 16 Verbs priorities. */
302 static const uint32_t priority_map_5[][MLX5_PRIORITY_MAP_MAX] = {
303         { 0, 1, 2 }, { 3, 4, 5 }, { 6, 7, 8 },
304         { 9, 10, 11 }, { 12, 13, 14 },
305 };
306
307 /**
308  * Discover the maximum number of priority available.
309  *
310  * @param[in] dev
311  *   Pointer to Ethernet device.
312  *
313  * @return
314  *   number of supported flow priority on success, a negative errno
315  *   value otherwise and rte_errno is set.
316  */
317 int
318 mlx5_flow_discover_priorities(struct rte_eth_dev *dev)
319 {
320         struct {
321                 struct ibv_flow_attr attr;
322                 struct ibv_flow_spec_eth eth;
323                 struct ibv_flow_spec_action_drop drop;
324         } flow_attr = {
325                 .attr = {
326                         .num_of_specs = 2,
327                 },
328                 .eth = {
329                         .type = IBV_FLOW_SPEC_ETH,
330                         .size = sizeof(struct ibv_flow_spec_eth),
331                 },
332                 .drop = {
333                         .size = sizeof(struct ibv_flow_spec_action_drop),
334                         .type = IBV_FLOW_SPEC_ACTION_DROP,
335                 },
336         };
337         struct ibv_flow *flow;
338         struct mlx5_hrxq *drop = mlx5_hrxq_drop_new(dev);
339         uint16_t vprio[] = { 8, 16 };
340         int i;
341         int priority = 0;
342
343         if (!drop) {
344                 rte_errno = ENOTSUP;
345                 return -rte_errno;
346         }
347         for (i = 0; i != RTE_DIM(vprio); i++) {
348                 flow_attr.attr.priority = vprio[i] - 1;
349                 flow = mlx5_glue->create_flow(drop->qp, &flow_attr.attr);
350                 if (!flow)
351                         break;
352                 claim_zero(mlx5_glue->destroy_flow(flow));
353                 priority = vprio[i];
354         }
355         switch (priority) {
356         case 8:
357                 priority = RTE_DIM(priority_map_3);
358                 break;
359         case 16:
360                 priority = RTE_DIM(priority_map_5);
361                 break;
362         default:
363                 rte_errno = ENOTSUP;
364                 DRV_LOG(ERR,
365                         "port %u verbs maximum priority: %d expected 8/16",
366                         dev->data->port_id, vprio[i]);
367                 return -rte_errno;
368         }
369         mlx5_hrxq_drop_release(dev);
370         DRV_LOG(INFO, "port %u flow maximum priority: %d",
371                 dev->data->port_id, priority);
372         return priority;
373 }
374
375 /**
376  * Adjust flow priority.
377  *
378  * @param dev
379  *   Pointer to Ethernet device.
380  * @param flow
381  *   Pointer to an rte flow.
382  */
383 static void
384 mlx5_flow_adjust_priority(struct rte_eth_dev *dev, struct rte_flow *flow)
385 {
386         struct priv *priv = dev->data->dev_private;
387         uint32_t priority = flow->attributes.priority;
388         uint32_t subpriority = flow->cur_verbs->attr->priority;
389
390         switch (priv->config.flow_prio) {
391         case RTE_DIM(priority_map_3):
392                 priority = priority_map_3[priority][subpriority];
393                 break;
394         case RTE_DIM(priority_map_5):
395                 priority = priority_map_5[priority][subpriority];
396                 break;
397         }
398         flow->cur_verbs->attr->priority = priority;
399 }
400
401 /**
402  * Verify the @p attributes will be correctly understood by the NIC and store
403  * them in the @p flow if everything is correct.
404  *
405  * @param[in] dev
406  *   Pointer to Ethernet device.
407  * @param[in] attributes
408  *   Pointer to flow attributes
409  * @param[in, out] flow
410  *   Pointer to the rte_flow structure.
411  * @param[out] error
412  *   Pointer to error structure.
413  *
414  * @return
415  *   0 on success, a negative errno value otherwise and rte_errno is set.
416  */
417 static int
418 mlx5_flow_attributes(struct rte_eth_dev *dev,
419                      const struct rte_flow_attr *attributes,
420                      struct rte_flow *flow,
421                      struct rte_flow_error *error)
422 {
423         uint32_t priority_max =
424                 ((struct priv *)dev->data->dev_private)->config.flow_prio - 1;
425
426         if (attributes->group)
427                 return rte_flow_error_set(error, ENOTSUP,
428                                           RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
429                                           NULL,
430                                           "groups is not supported");
431         if (attributes->priority != MLX5_FLOW_PRIO_RSVD &&
432             attributes->priority >= priority_max)
433                 return rte_flow_error_set(error, ENOTSUP,
434                                           RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
435                                           NULL,
436                                           "priority out of range");
437         if (attributes->egress)
438                 return rte_flow_error_set(error, ENOTSUP,
439                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
440                                           NULL,
441                                           "egress is not supported");
442         if (attributes->transfer)
443                 return rte_flow_error_set(error, ENOTSUP,
444                                           RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
445                                           NULL,
446                                           "transfer is not supported");
447         if (!attributes->ingress)
448                 return rte_flow_error_set(error, ENOTSUP,
449                                           RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
450                                           NULL,
451                                           "ingress attribute is mandatory");
452         flow->attributes = *attributes;
453         if (attributes->priority == MLX5_FLOW_PRIO_RSVD)
454                 flow->attributes.priority = priority_max;
455         return 0;
456 }
457
458 /**
459  * Verify the @p item specifications (spec, last, mask) are compatible with the
460  * NIC capabilities.
461  *
462  * @param[in] item
463  *   Item specification.
464  * @param[in] mask
465  *   @p item->mask or flow default bit-masks.
466  * @param[in] nic_mask
467  *   Bit-masks covering supported fields by the NIC to compare with user mask.
468  * @param[in] size
469  *   Bit-masks size in bytes.
470  * @param[out] error
471  *   Pointer to error structure.
472  *
473  * @return
474  *   0 on success, a negative errno value otherwise and rte_errno is set.
475  */
476 static int
477 mlx5_flow_item_acceptable(const struct rte_flow_item *item,
478                           const uint8_t *mask,
479                           const uint8_t *nic_mask,
480                           unsigned int size,
481                           struct rte_flow_error *error)
482 {
483         unsigned int i;
484
485         assert(nic_mask);
486         for (i = 0; i < size; ++i)
487                 if ((nic_mask[i] | mask[i]) != nic_mask[i])
488                         return rte_flow_error_set(error, ENOTSUP,
489                                                   RTE_FLOW_ERROR_TYPE_ITEM,
490                                                   item,
491                                                   "mask enables non supported"
492                                                   " bits");
493         if (!item->spec && (item->mask || item->last))
494                 return rte_flow_error_set(error, EINVAL,
495                                           RTE_FLOW_ERROR_TYPE_ITEM,
496                                           item,
497                                           "mask/last without a spec is not"
498                                           " supported");
499         if (item->spec && item->last) {
500                 uint8_t spec[size];
501                 uint8_t last[size];
502                 unsigned int i;
503                 int ret;
504
505                 for (i = 0; i < size; ++i) {
506                         spec[i] = ((const uint8_t *)item->spec)[i] & mask[i];
507                         last[i] = ((const uint8_t *)item->last)[i] & mask[i];
508                 }
509                 ret = memcmp(spec, last, size);
510                 if (ret != 0)
511                         return rte_flow_error_set(error, ENOTSUP,
512                                                   RTE_FLOW_ERROR_TYPE_ITEM,
513                                                   item,
514                                                   "range is not supported");
515         }
516         return 0;
517 }
518
519 /**
520  * Add a verbs item specification into @p flow.
521  *
522  * @param[in, out] flow
523  *   Pointer to flow structure.
524  * @param[in] src
525  *   Create specification.
526  * @param[in] size
527  *   Size in bytes of the specification to copy.
528  */
529 static void
530 mlx5_flow_spec_verbs_add(struct rte_flow *flow, void *src, unsigned int size)
531 {
532         struct mlx5_flow_verbs *verbs = flow->cur_verbs;
533
534         if (verbs->specs) {
535                 void *dst;
536
537                 dst = (void *)(verbs->specs + verbs->size);
538                 memcpy(dst, src, size);
539                 ++verbs->attr->num_of_specs;
540         }
541         verbs->size += size;
542 }
543
544 /**
545  * Adjust verbs hash fields according to the @p flow information.
546  *
547  * @param[in, out] flow.
548  *   Pointer to flow structure.
549  * @param[in] tunnel
550  *   1 when the hash field is for a tunnel item.
551  * @param[in] layer_types
552  *   ETH_RSS_* types.
553  * @param[in] hash_fields
554  *   Item hash fields.
555  */
556 static void
557 mlx5_flow_verbs_hashfields_adjust(struct rte_flow *flow,
558                                   int tunnel __rte_unused,
559                                   uint32_t layer_types, uint64_t hash_fields)
560 {
561 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
562         hash_fields |= (tunnel ? IBV_RX_HASH_INNER : 0);
563         if (flow->rss.level == 2 && !tunnel)
564                 hash_fields = 0;
565         else if (flow->rss.level < 2 && tunnel)
566                 hash_fields = 0;
567 #endif
568         if (!(flow->rss.types & layer_types))
569                 hash_fields = 0;
570         flow->cur_verbs->hash_fields |= hash_fields;
571 }
572
573 /**
574  * Convert the @p item into a Verbs specification after ensuring the NIC
575  * will understand and process it correctly.
576  * If the necessary size for the conversion is greater than the @p flow_size,
577  * nothing is written in @p flow, the validation is still performed.
578  *
579  * @param[in] item
580  *   Item specification.
581  * @param[in, out] flow
582  *   Pointer to flow structure.
583  * @param[in] flow_size
584  *   Size in bytes of the available space in @p flow, if too small, nothing is
585  *   written.
586  * @param[out] error
587  *   Pointer to error structure.
588  *
589  * @return
590  *   On success the number of bytes consumed/necessary, if the returned value
591  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
592  *   otherwise another call with this returned memory size should be done.
593  *   On error, a negative errno value is returned and rte_errno is set.
594  */
595 static int
596 mlx5_flow_item_eth(const struct rte_flow_item *item, struct rte_flow *flow,
597                    const size_t flow_size, struct rte_flow_error *error)
598 {
599         const struct rte_flow_item_eth *spec = item->spec;
600         const struct rte_flow_item_eth *mask = item->mask;
601         const struct rte_flow_item_eth nic_mask = {
602                 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
603                 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
604                 .type = RTE_BE16(0xffff),
605         };
606         const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
607         const unsigned int size = sizeof(struct ibv_flow_spec_eth);
608         struct ibv_flow_spec_eth eth = {
609                 .type = IBV_FLOW_SPEC_ETH | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
610                 .size = size,
611         };
612         int ret;
613
614         if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
615                             MLX5_FLOW_LAYER_OUTER_L2))
616                 return rte_flow_error_set(error, ENOTSUP,
617                                           RTE_FLOW_ERROR_TYPE_ITEM,
618                                           item,
619                                           "L2 layers already configured");
620         if (!mask)
621                 mask = &rte_flow_item_eth_mask;
622         ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
623                                         (const uint8_t *)&nic_mask,
624                                         sizeof(struct rte_flow_item_eth),
625                                         error);
626         if (ret)
627                 return ret;
628         flow->layers |= tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
629                 MLX5_FLOW_LAYER_OUTER_L2;
630         if (size > flow_size)
631                 return size;
632         if (spec) {
633                 unsigned int i;
634
635                 memcpy(&eth.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
636                 memcpy(&eth.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
637                 eth.val.ether_type = spec->type;
638                 memcpy(&eth.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
639                 memcpy(&eth.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
640                 eth.mask.ether_type = mask->type;
641                 /* Remove unwanted bits from values. */
642                 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
643                         eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
644                         eth.val.src_mac[i] &= eth.mask.src_mac[i];
645                 }
646                 eth.val.ether_type &= eth.mask.ether_type;
647         }
648         flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2;
649         mlx5_flow_spec_verbs_add(flow, &eth, size);
650         return size;
651 }
652
653 /**
654  * Update the VLAN tag in the Verbs Ethernet specification.
655  *
656  * @param[in, out] attr
657  *   Pointer to Verbs attributes structure.
658  * @param[in] eth
659  *   Verbs structure containing the VLAN information to copy.
660  */
661 static void
662 mlx5_flow_item_vlan_update(struct ibv_flow_attr *attr,
663                            struct ibv_flow_spec_eth *eth)
664 {
665         unsigned int i;
666         const enum ibv_flow_spec_type search = eth->type;
667         struct ibv_spec_header *hdr = (struct ibv_spec_header *)
668                 ((uint8_t *)attr + sizeof(struct ibv_flow_attr));
669
670         for (i = 0; i != attr->num_of_specs; ++i) {
671                 if (hdr->type == search) {
672                         struct ibv_flow_spec_eth *e =
673                                 (struct ibv_flow_spec_eth *)hdr;
674
675                         e->val.vlan_tag = eth->val.vlan_tag;
676                         e->mask.vlan_tag = eth->mask.vlan_tag;
677                         e->val.ether_type = eth->val.ether_type;
678                         e->mask.ether_type = eth->mask.ether_type;
679                         break;
680                 }
681                 hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size);
682         }
683 }
684
685 /**
686  * Convert the @p item into @p flow (or by updating the already present
687  * Ethernet Verbs) specification after ensuring the NIC will understand and
688  * process it correctly.
689  * If the necessary size for the conversion is greater than the @p flow_size,
690  * nothing is written in @p flow, the validation is still performed.
691  *
692  * @param[in] item
693  *   Item specification.
694  * @param[in, out] flow
695  *   Pointer to flow structure.
696  * @param[in] flow_size
697  *   Size in bytes of the available space in @p flow, if too small, nothing is
698  *   written.
699  * @param[out] error
700  *   Pointer to error structure.
701  *
702  * @return
703  *   On success the number of bytes consumed/necessary, if the returned value
704  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
705  *   otherwise another call with this returned memory size should be done.
706  *   On error, a negative errno value is returned and rte_errno is set.
707  */
708 static int
709 mlx5_flow_item_vlan(const struct rte_flow_item *item, struct rte_flow *flow,
710                     const size_t flow_size, struct rte_flow_error *error)
711 {
712         const struct rte_flow_item_vlan *spec = item->spec;
713         const struct rte_flow_item_vlan *mask = item->mask;
714         const struct rte_flow_item_vlan nic_mask = {
715                 .tci = RTE_BE16(0x0fff),
716                 .inner_type = RTE_BE16(0xffff),
717         };
718         unsigned int size = sizeof(struct ibv_flow_spec_eth);
719         const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
720         struct ibv_flow_spec_eth eth = {
721                 .type = IBV_FLOW_SPEC_ETH | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
722                 .size = size,
723         };
724         int ret;
725         const uint32_t l34m = tunnel ? (MLX5_FLOW_LAYER_INNER_L3 |
726                                         MLX5_FLOW_LAYER_INNER_L4) :
727                 (MLX5_FLOW_LAYER_OUTER_L3 | MLX5_FLOW_LAYER_OUTER_L4);
728         const uint32_t vlanm = tunnel ? MLX5_FLOW_LAYER_INNER_VLAN :
729                 MLX5_FLOW_LAYER_OUTER_VLAN;
730         const uint32_t l2m = tunnel ? MLX5_FLOW_LAYER_INNER_L2 :
731                 MLX5_FLOW_LAYER_OUTER_L2;
732
733         if (flow->layers & vlanm)
734                 return rte_flow_error_set(error, ENOTSUP,
735                                           RTE_FLOW_ERROR_TYPE_ITEM,
736                                           item,
737                                           "VLAN layer already configured");
738         else if ((flow->layers & l34m) != 0)
739                 return rte_flow_error_set(error, ENOTSUP,
740                                           RTE_FLOW_ERROR_TYPE_ITEM,
741                                           item,
742                                           "L2 layer cannot follow L3/L4 layer");
743         if (!mask)
744                 mask = &rte_flow_item_vlan_mask;
745         ret = mlx5_flow_item_acceptable
746                 (item, (const uint8_t *)mask,
747                  (const uint8_t *)&nic_mask,
748                  sizeof(struct rte_flow_item_vlan), error);
749         if (ret)
750                 return ret;
751         if (spec) {
752                 eth.val.vlan_tag = spec->tci;
753                 eth.mask.vlan_tag = mask->tci;
754                 eth.val.vlan_tag &= eth.mask.vlan_tag;
755                 eth.val.ether_type = spec->inner_type;
756                 eth.mask.ether_type = mask->inner_type;
757                 eth.val.ether_type &= eth.mask.ether_type;
758         }
759         /*
760          * From verbs perspective an empty VLAN is equivalent
761          * to a packet without VLAN layer.
762          */
763         if (!eth.mask.vlan_tag)
764                 return rte_flow_error_set(error, EINVAL,
765                                           RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
766                                           item->spec,
767                                           "VLAN cannot be empty");
768         if (!(flow->layers & l2m)) {
769                 if (size <= flow_size) {
770                         flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2;
771                         mlx5_flow_spec_verbs_add(flow, &eth, size);
772                 }
773         } else {
774                 if (flow->cur_verbs)
775                         mlx5_flow_item_vlan_update(flow->cur_verbs->attr,
776                                                    &eth);
777                 size = 0; /* Only an update is done in eth specification. */
778         }
779         flow->layers |= tunnel ?
780                 (MLX5_FLOW_LAYER_INNER_L2 | MLX5_FLOW_LAYER_INNER_VLAN) :
781                 (MLX5_FLOW_LAYER_OUTER_L2 | MLX5_FLOW_LAYER_OUTER_VLAN);
782         return size;
783 }
784
785 /**
786  * Convert the @p item into a Verbs specification after ensuring the NIC
787  * will understand and process it correctly.
788  * If the necessary size for the conversion is greater than the @p flow_size,
789  * nothing is written in @p flow, the validation is still performed.
790  *
791  * @param[in] item
792  *   Item specification.
793  * @param[in, out] flow
794  *   Pointer to flow structure.
795  * @param[in] flow_size
796  *   Size in bytes of the available space in @p flow, if too small, nothing is
797  *   written.
798  * @param[out] error
799  *   Pointer to error structure.
800  *
801  * @return
802  *   On success the number of bytes consumed/necessary, if the returned value
803  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
804  *   otherwise another call with this returned memory size should be done.
805  *   On error, a negative errno value is returned and rte_errno is set.
806  */
807 static int
808 mlx5_flow_item_ipv4(const struct rte_flow_item *item, struct rte_flow *flow,
809                     const size_t flow_size, struct rte_flow_error *error)
810 {
811         const struct rte_flow_item_ipv4 *spec = item->spec;
812         const struct rte_flow_item_ipv4 *mask = item->mask;
813         const struct rte_flow_item_ipv4 nic_mask = {
814                 .hdr = {
815                         .src_addr = RTE_BE32(0xffffffff),
816                         .dst_addr = RTE_BE32(0xffffffff),
817                         .type_of_service = 0xff,
818                         .next_proto_id = 0xff,
819                 },
820         };
821         const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
822         unsigned int size = sizeof(struct ibv_flow_spec_ipv4_ext);
823         struct ibv_flow_spec_ipv4_ext ipv4 = {
824                 .type = IBV_FLOW_SPEC_IPV4_EXT |
825                         (tunnel ? IBV_FLOW_SPEC_INNER : 0),
826                 .size = size,
827         };
828         int ret;
829
830         if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
831                             MLX5_FLOW_LAYER_OUTER_L3))
832                 return rte_flow_error_set(error, ENOTSUP,
833                                           RTE_FLOW_ERROR_TYPE_ITEM,
834                                           item,
835                                           "multiple L3 layers not supported");
836         else if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
837                                  MLX5_FLOW_LAYER_OUTER_L4))
838                 return rte_flow_error_set(error, ENOTSUP,
839                                           RTE_FLOW_ERROR_TYPE_ITEM,
840                                           item,
841                                           "L3 cannot follow an L4 layer.");
842         if (!mask)
843                 mask = &rte_flow_item_ipv4_mask;
844         ret = mlx5_flow_item_acceptable
845                 (item, (const uint8_t *)mask,
846                  (const uint8_t *)&nic_mask,
847                  sizeof(struct rte_flow_item_ipv4), error);
848         if (ret < 0)
849                 return ret;
850         flow->layers |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV4 :
851                 MLX5_FLOW_LAYER_OUTER_L3_IPV4;
852         if (spec) {
853                 ipv4.val = (struct ibv_flow_ipv4_ext_filter){
854                         .src_ip = spec->hdr.src_addr,
855                         .dst_ip = spec->hdr.dst_addr,
856                         .proto = spec->hdr.next_proto_id,
857                         .tos = spec->hdr.type_of_service,
858                 };
859                 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
860                         .src_ip = mask->hdr.src_addr,
861                         .dst_ip = mask->hdr.dst_addr,
862                         .proto = mask->hdr.next_proto_id,
863                         .tos = mask->hdr.type_of_service,
864                 };
865                 /* Remove unwanted bits from values. */
866                 ipv4.val.src_ip &= ipv4.mask.src_ip;
867                 ipv4.val.dst_ip &= ipv4.mask.dst_ip;
868                 ipv4.val.proto &= ipv4.mask.proto;
869                 ipv4.val.tos &= ipv4.mask.tos;
870         }
871         flow->l3_protocol_en = !!ipv4.mask.proto;
872         flow->l3_protocol = ipv4.val.proto;
873         if (size <= flow_size) {
874                 mlx5_flow_verbs_hashfields_adjust
875                         (flow, tunnel,
876                          (ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
877                           ETH_RSS_NONFRAG_IPV4_OTHER),
878                          (IBV_RX_HASH_SRC_IPV4 | IBV_RX_HASH_DST_IPV4));
879                 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L3;
880                 mlx5_flow_spec_verbs_add(flow, &ipv4, size);
881         }
882         return size;
883 }
884
885 /**
886  * Convert the @p item into a Verbs specification after ensuring the NIC
887  * will understand and process it correctly.
888  * If the necessary size for the conversion is greater than the @p flow_size,
889  * nothing is written in @p flow, the validation is still performed.
890  *
891  * @param[in] item
892  *   Item specification.
893  * @param[in, out] flow
894  *   Pointer to flow structure.
895  * @param[in] flow_size
896  *   Size in bytes of the available space in @p flow, if too small, nothing is
897  *   written.
898  * @param[out] error
899  *   Pointer to error structure.
900  *
901  * @return
902  *   On success the number of bytes consumed/necessary, if the returned value
903  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
904  *   otherwise another call with this returned memory size should be done.
905  *   On error, a negative errno value is returned and rte_errno is set.
906  */
907 static int
908 mlx5_flow_item_ipv6(const struct rte_flow_item *item, struct rte_flow *flow,
909                     const size_t flow_size, struct rte_flow_error *error)
910 {
911         const struct rte_flow_item_ipv6 *spec = item->spec;
912         const struct rte_flow_item_ipv6 *mask = item->mask;
913         const struct rte_flow_item_ipv6 nic_mask = {
914                 .hdr = {
915                         .src_addr =
916                                 "\xff\xff\xff\xff\xff\xff\xff\xff"
917                                 "\xff\xff\xff\xff\xff\xff\xff\xff",
918                         .dst_addr =
919                                 "\xff\xff\xff\xff\xff\xff\xff\xff"
920                                 "\xff\xff\xff\xff\xff\xff\xff\xff",
921                         .vtc_flow = RTE_BE32(0xffffffff),
922                         .proto = 0xff,
923                         .hop_limits = 0xff,
924                 },
925         };
926         const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
927         unsigned int size = sizeof(struct ibv_flow_spec_ipv6);
928         struct ibv_flow_spec_ipv6 ipv6 = {
929                 .type = IBV_FLOW_SPEC_IPV6 | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
930                 .size = size,
931         };
932         int ret;
933
934         if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
935                             MLX5_FLOW_LAYER_OUTER_L3))
936                 return rte_flow_error_set(error, ENOTSUP,
937                                           RTE_FLOW_ERROR_TYPE_ITEM,
938                                           item,
939                                           "multiple L3 layers not supported");
940         else if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
941                                  MLX5_FLOW_LAYER_OUTER_L4))
942                 return rte_flow_error_set(error, ENOTSUP,
943                                           RTE_FLOW_ERROR_TYPE_ITEM,
944                                           item,
945                                           "L3 cannot follow an L4 layer.");
946         if (!mask)
947                 mask = &rte_flow_item_ipv6_mask;
948         ret = mlx5_flow_item_acceptable
949                 (item, (const uint8_t *)mask,
950                  (const uint8_t *)&nic_mask,
951                  sizeof(struct rte_flow_item_ipv6), error);
952         if (ret < 0)
953                 return ret;
954         flow->layers |= tunnel ? MLX5_FLOW_LAYER_INNER_L3_IPV6 :
955                 MLX5_FLOW_LAYER_OUTER_L3_IPV6;
956         if (spec) {
957                 unsigned int i;
958                 uint32_t vtc_flow_val;
959                 uint32_t vtc_flow_mask;
960
961                 memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
962                        RTE_DIM(ipv6.val.src_ip));
963                 memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
964                        RTE_DIM(ipv6.val.dst_ip));
965                 memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
966                        RTE_DIM(ipv6.mask.src_ip));
967                 memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
968                        RTE_DIM(ipv6.mask.dst_ip));
969                 vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
970                 vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
971                 ipv6.val.flow_label =
972                         rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >>
973                                          IPV6_HDR_FL_SHIFT);
974                 ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >>
975                                          IPV6_HDR_TC_SHIFT;
976                 ipv6.val.next_hdr = spec->hdr.proto;
977                 ipv6.val.hop_limit = spec->hdr.hop_limits;
978                 ipv6.mask.flow_label =
979                         rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >>
980                                          IPV6_HDR_FL_SHIFT);
981                 ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >>
982                                           IPV6_HDR_TC_SHIFT;
983                 ipv6.mask.next_hdr = mask->hdr.proto;
984                 ipv6.mask.hop_limit = mask->hdr.hop_limits;
985                 /* Remove unwanted bits from values. */
986                 for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
987                         ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
988                         ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
989                 }
990                 ipv6.val.flow_label &= ipv6.mask.flow_label;
991                 ipv6.val.traffic_class &= ipv6.mask.traffic_class;
992                 ipv6.val.next_hdr &= ipv6.mask.next_hdr;
993                 ipv6.val.hop_limit &= ipv6.mask.hop_limit;
994         }
995         flow->l3_protocol_en = !!ipv6.mask.next_hdr;
996         flow->l3_protocol = ipv6.val.next_hdr;
997         if (size <= flow_size) {
998                 mlx5_flow_verbs_hashfields_adjust
999                         (flow, tunnel,
1000                          (ETH_RSS_IPV6 | ETH_RSS_NONFRAG_IPV6_OTHER),
1001                          (IBV_RX_HASH_SRC_IPV6 | IBV_RX_HASH_DST_IPV6));
1002                 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L3;
1003                 mlx5_flow_spec_verbs_add(flow, &ipv6, size);
1004         }
1005         return size;
1006 }
1007
1008 /**
1009  * Convert the @p item into a Verbs specification after ensuring the NIC
1010  * will understand and process it correctly.
1011  * If the necessary size for the conversion is greater than the @p flow_size,
1012  * nothing is written in @p flow, the validation is still performed.
1013  *
1014  * @param[in] item
1015  *   Item specification.
1016  * @param[in, out] flow
1017  *   Pointer to flow structure.
1018  * @param[in] flow_size
1019  *   Size in bytes of the available space in @p flow, if too small, nothing is
1020  *   written.
1021  * @param[out] error
1022  *   Pointer to error structure.
1023  *
1024  * @return
1025  *   On success the number of bytes consumed/necessary, if the returned value
1026  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
1027  *   otherwise another call with this returned memory size should be done.
1028  *   On error, a negative errno value is returned and rte_errno is set.
1029  */
1030 static int
1031 mlx5_flow_item_udp(const struct rte_flow_item *item, struct rte_flow *flow,
1032                    const size_t flow_size, struct rte_flow_error *error)
1033 {
1034         const struct rte_flow_item_udp *spec = item->spec;
1035         const struct rte_flow_item_udp *mask = item->mask;
1036         const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
1037         unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
1038         struct ibv_flow_spec_tcp_udp udp = {
1039                 .type = IBV_FLOW_SPEC_UDP | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
1040                 .size = size,
1041         };
1042         int ret;
1043
1044         if (flow->l3_protocol_en && flow->l3_protocol != MLX5_IP_PROTOCOL_UDP)
1045                 return rte_flow_error_set(error, ENOTSUP,
1046                                           RTE_FLOW_ERROR_TYPE_ITEM,
1047                                           item,
1048                                           "protocol filtering not compatible"
1049                                           " with UDP layer");
1050         if (!(flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
1051                               MLX5_FLOW_LAYER_OUTER_L3)))
1052                 return rte_flow_error_set(error, ENOTSUP,
1053                                           RTE_FLOW_ERROR_TYPE_ITEM,
1054                                           item,
1055                                           "L3 is mandatory to filter"
1056                                           " on L4");
1057         if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
1058                             MLX5_FLOW_LAYER_OUTER_L4))
1059                 return rte_flow_error_set(error, ENOTSUP,
1060                                           RTE_FLOW_ERROR_TYPE_ITEM,
1061                                           item,
1062                                           "L4 layer is already"
1063                                           " present");
1064         if (!mask)
1065                 mask = &rte_flow_item_udp_mask;
1066         ret = mlx5_flow_item_acceptable
1067                 (item, (const uint8_t *)mask,
1068                  (const uint8_t *)&rte_flow_item_udp_mask,
1069                  sizeof(struct rte_flow_item_udp), error);
1070         if (ret < 0)
1071                 return ret;
1072         flow->layers |= tunnel ? MLX5_FLOW_LAYER_INNER_L4_UDP :
1073                 MLX5_FLOW_LAYER_OUTER_L4_UDP;
1074         if (spec) {
1075                 udp.val.dst_port = spec->hdr.dst_port;
1076                 udp.val.src_port = spec->hdr.src_port;
1077                 udp.mask.dst_port = mask->hdr.dst_port;
1078                 udp.mask.src_port = mask->hdr.src_port;
1079                 /* Remove unwanted bits from values. */
1080                 udp.val.src_port &= udp.mask.src_port;
1081                 udp.val.dst_port &= udp.mask.dst_port;
1082         }
1083         if (size <= flow_size) {
1084                 mlx5_flow_verbs_hashfields_adjust(flow, tunnel, ETH_RSS_UDP,
1085                                                   (IBV_RX_HASH_SRC_PORT_UDP |
1086                                                    IBV_RX_HASH_DST_PORT_UDP));
1087                 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L4;
1088                 mlx5_flow_spec_verbs_add(flow, &udp, size);
1089         }
1090         return size;
1091 }
1092
1093 /**
1094  * Convert the @p item into a Verbs specification after ensuring the NIC
1095  * will understand and process it correctly.
1096  * If the necessary size for the conversion is greater than the @p flow_size,
1097  * nothing is written in @p flow, the validation is still performed.
1098  *
1099  * @param[in] item
1100  *   Item specification.
1101  * @param[in, out] flow
1102  *   Pointer to flow structure.
1103  * @param[in] flow_size
1104  *   Size in bytes of the available space in @p flow, if too small, nothing is
1105  *   written.
1106  * @param[out] error
1107  *   Pointer to error structure.
1108  *
1109  * @return
1110  *   On success the number of bytes consumed/necessary, if the returned value
1111  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
1112  *   otherwise another call with this returned memory size should be done.
1113  *   On error, a negative errno value is returned and rte_errno is set.
1114  */
1115 static int
1116 mlx5_flow_item_tcp(const struct rte_flow_item *item, struct rte_flow *flow,
1117                    const size_t flow_size, struct rte_flow_error *error)
1118 {
1119         const struct rte_flow_item_tcp *spec = item->spec;
1120         const struct rte_flow_item_tcp *mask = item->mask;
1121         const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
1122         unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
1123         struct ibv_flow_spec_tcp_udp tcp = {
1124                 .type = IBV_FLOW_SPEC_TCP | (tunnel ? IBV_FLOW_SPEC_INNER : 0),
1125                 .size = size,
1126         };
1127         int ret;
1128
1129         if (flow->l3_protocol_en && flow->l3_protocol != MLX5_IP_PROTOCOL_TCP)
1130                 return rte_flow_error_set(error, ENOTSUP,
1131                                           RTE_FLOW_ERROR_TYPE_ITEM,
1132                                           item,
1133                                           "protocol filtering not compatible"
1134                                           " with TCP layer");
1135         if (!(flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L3 :
1136                               MLX5_FLOW_LAYER_OUTER_L3)))
1137                 return rte_flow_error_set(error, ENOTSUP,
1138                                           RTE_FLOW_ERROR_TYPE_ITEM,
1139                                           item,
1140                                           "L3 is mandatory to filter on L4");
1141         if (flow->layers & (tunnel ? MLX5_FLOW_LAYER_INNER_L4 :
1142                             MLX5_FLOW_LAYER_OUTER_L4))
1143                 return rte_flow_error_set(error, ENOTSUP,
1144                                           RTE_FLOW_ERROR_TYPE_ITEM,
1145                                           item,
1146                                           "L4 layer is already present");
1147         if (!mask)
1148                 mask = &rte_flow_item_tcp_mask;
1149         ret = mlx5_flow_item_acceptable
1150                 (item, (const uint8_t *)mask,
1151                  (const uint8_t *)&rte_flow_item_tcp_mask,
1152                  sizeof(struct rte_flow_item_tcp), error);
1153         if (ret < 0)
1154                 return ret;
1155         flow->layers |=  tunnel ? MLX5_FLOW_LAYER_INNER_L4_TCP :
1156                 MLX5_FLOW_LAYER_OUTER_L4_TCP;
1157         if (spec) {
1158                 tcp.val.dst_port = spec->hdr.dst_port;
1159                 tcp.val.src_port = spec->hdr.src_port;
1160                 tcp.mask.dst_port = mask->hdr.dst_port;
1161                 tcp.mask.src_port = mask->hdr.src_port;
1162                 /* Remove unwanted bits from values. */
1163                 tcp.val.src_port &= tcp.mask.src_port;
1164                 tcp.val.dst_port &= tcp.mask.dst_port;
1165         }
1166         if (size <= flow_size) {
1167                 mlx5_flow_verbs_hashfields_adjust(flow, tunnel, ETH_RSS_TCP,
1168                                                   (IBV_RX_HASH_SRC_PORT_TCP |
1169                                                    IBV_RX_HASH_DST_PORT_TCP));
1170                 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L4;
1171                 mlx5_flow_spec_verbs_add(flow, &tcp, size);
1172         }
1173         return size;
1174 }
1175
1176 /**
1177  * Convert the @p item into a Verbs specification after ensuring the NIC
1178  * will understand and process it correctly.
1179  * If the necessary size for the conversion is greater than the @p flow_size,
1180  * nothing is written in @p flow, the validation is still performed.
1181  *
1182  * @param[in] item
1183  *   Item specification.
1184  * @param[in, out] flow
1185  *   Pointer to flow structure.
1186  * @param[in] flow_size
1187  *   Size in bytes of the available space in @p flow, if too small, nothing is
1188  *   written.
1189  * @param[out] error
1190  *   Pointer to error structure.
1191  *
1192  * @return
1193  *   On success the number of bytes consumed/necessary, if the returned value
1194  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
1195  *   otherwise another call with this returned memory size should be done.
1196  *   On error, a negative errno value is returned and rte_errno is set.
1197  */
1198 static int
1199 mlx5_flow_item_vxlan(const struct rte_flow_item *item, struct rte_flow *flow,
1200                      const size_t flow_size, struct rte_flow_error *error)
1201 {
1202         const struct rte_flow_item_vxlan *spec = item->spec;
1203         const struct rte_flow_item_vxlan *mask = item->mask;
1204         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1205         struct ibv_flow_spec_tunnel vxlan = {
1206                 .type = IBV_FLOW_SPEC_VXLAN_TUNNEL,
1207                 .size = size,
1208         };
1209         int ret;
1210         union vni {
1211                 uint32_t vlan_id;
1212                 uint8_t vni[4];
1213         } id = { .vlan_id = 0, };
1214
1215         if (flow->layers & MLX5_FLOW_LAYER_TUNNEL)
1216                 return rte_flow_error_set(error, ENOTSUP,
1217                                           RTE_FLOW_ERROR_TYPE_ITEM,
1218                                           item,
1219                                           "a tunnel is already present");
1220         /*
1221          * Verify only UDPv4 is present as defined in
1222          * https://tools.ietf.org/html/rfc7348
1223          */
1224         if (!(flow->layers & MLX5_FLOW_LAYER_OUTER_L4_UDP))
1225                 return rte_flow_error_set(error, ENOTSUP,
1226                                           RTE_FLOW_ERROR_TYPE_ITEM,
1227                                           item,
1228                                           "no outer UDP layer found");
1229         if (!mask)
1230                 mask = &rte_flow_item_vxlan_mask;
1231         ret = mlx5_flow_item_acceptable
1232                 (item, (const uint8_t *)mask,
1233                  (const uint8_t *)&rte_flow_item_vxlan_mask,
1234                  sizeof(struct rte_flow_item_vxlan), error);
1235         if (ret < 0)
1236                 return ret;
1237         if (spec) {
1238                 memcpy(&id.vni[1], spec->vni, 3);
1239                 vxlan.val.tunnel_id = id.vlan_id;
1240                 memcpy(&id.vni[1], mask->vni, 3);
1241                 vxlan.mask.tunnel_id = id.vlan_id;
1242                 /* Remove unwanted bits from values. */
1243                 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
1244         }
1245         /*
1246          * Tunnel id 0 is equivalent as not adding a VXLAN layer, if
1247          * only this layer is defined in the Verbs specification it is
1248          * interpreted as wildcard and all packets will match this
1249          * rule, if it follows a full stack layer (ex: eth / ipv4 /
1250          * udp), all packets matching the layers before will also
1251          * match this rule.  To avoid such situation, VNI 0 is
1252          * currently refused.
1253          */
1254         if (!vxlan.val.tunnel_id)
1255                 return rte_flow_error_set(error, EINVAL,
1256                                           RTE_FLOW_ERROR_TYPE_ITEM,
1257                                           item,
1258                                           "VXLAN vni cannot be 0");
1259         if (!(flow->layers & MLX5_FLOW_LAYER_OUTER))
1260                 return rte_flow_error_set(error, EINVAL,
1261                                           RTE_FLOW_ERROR_TYPE_ITEM,
1262                                           item,
1263                                           "VXLAN tunnel must be fully defined");
1264         if (size <= flow_size) {
1265                 mlx5_flow_spec_verbs_add(flow, &vxlan, size);
1266                 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2;
1267         }
1268         flow->layers |= MLX5_FLOW_LAYER_VXLAN;
1269         flow->tunnel_ptype = RTE_PTYPE_TUNNEL_VXLAN | RTE_PTYPE_L4_UDP;
1270         return size;
1271 }
1272
1273 /**
1274  * Convert the @p pattern into a Verbs specifications after ensuring the NIC
1275  * will understand and process it correctly.
1276  * The conversion is performed item per item, each of them is written into
1277  * the @p flow if its size is lesser or equal to @p flow_size.
1278  * Validation and memory consumption computation are still performed until the
1279  * end of @p pattern, unless an error is encountered.
1280  *
1281  * @param[in] pattern
1282  *   Flow pattern.
1283  * @param[in, out] flow
1284  *   Pointer to the rte_flow structure.
1285  * @param[in] flow_size
1286  *   Size in bytes of the available space in @p flow, if too small some
1287  *   garbage may be present.
1288  * @param[out] error
1289  *   Pointer to error structure.
1290  *
1291  * @return
1292  *   On success the number of bytes consumed/necessary, if the returned value
1293  *   is lesser or equal to @p flow_size, the @pattern  has fully been
1294  *   converted, otherwise another call with this returned memory size should
1295  *   be done.
1296  *   On error, a negative errno value is returned and rte_errno is set.
1297  */
1298 static int
1299 mlx5_flow_items(const struct rte_flow_item pattern[],
1300                 struct rte_flow *flow, const size_t flow_size,
1301                 struct rte_flow_error *error)
1302 {
1303         int remain = flow_size;
1304         size_t size = 0;
1305
1306         for (; pattern->type != RTE_FLOW_ITEM_TYPE_END; pattern++) {
1307                 int ret = 0;
1308
1309                 switch (pattern->type) {
1310                 case RTE_FLOW_ITEM_TYPE_VOID:
1311                         break;
1312                 case RTE_FLOW_ITEM_TYPE_ETH:
1313                         ret = mlx5_flow_item_eth(pattern, flow, remain, error);
1314                         break;
1315                 case RTE_FLOW_ITEM_TYPE_VLAN:
1316                         ret = mlx5_flow_item_vlan(pattern, flow, remain, error);
1317                         break;
1318                 case RTE_FLOW_ITEM_TYPE_IPV4:
1319                         ret = mlx5_flow_item_ipv4(pattern, flow, remain, error);
1320                         break;
1321                 case RTE_FLOW_ITEM_TYPE_IPV6:
1322                         ret = mlx5_flow_item_ipv6(pattern, flow, remain, error);
1323                         break;
1324                 case RTE_FLOW_ITEM_TYPE_UDP:
1325                         ret = mlx5_flow_item_udp(pattern, flow, remain, error);
1326                         break;
1327                 case RTE_FLOW_ITEM_TYPE_TCP:
1328                         ret = mlx5_flow_item_tcp(pattern, flow, remain, error);
1329                         break;
1330                 case RTE_FLOW_ITEM_TYPE_VXLAN:
1331                         ret = mlx5_flow_item_vxlan(pattern, flow, remain,
1332                                                    error);
1333                         break;
1334                 default:
1335                         return rte_flow_error_set(error, ENOTSUP,
1336                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1337                                                   pattern,
1338                                                   "item not supported");
1339                 }
1340                 if (ret < 0)
1341                         return ret;
1342                 if (remain > ret)
1343                         remain -= ret;
1344                 else
1345                         remain = 0;
1346                 size += ret;
1347         }
1348         if (!flow->layers) {
1349                 const struct rte_flow_item item = {
1350                         .type = RTE_FLOW_ITEM_TYPE_ETH,
1351                 };
1352
1353                 return mlx5_flow_item_eth(&item, flow, flow_size, error);
1354         }
1355         return size;
1356 }
1357
1358 /**
1359  * Convert the @p action into a Verbs specification after ensuring the NIC
1360  * will understand and process it correctly.
1361  * If the necessary size for the conversion is greater than the @p flow_size,
1362  * nothing is written in @p flow, the validation is still performed.
1363  *
1364  * @param[in] action
1365  *   Action configuration.
1366  * @param[in, out] flow
1367  *   Pointer to flow structure.
1368  * @param[in] flow_size
1369  *   Size in bytes of the available space in @p flow, if too small, nothing is
1370  *   written.
1371  * @param[out] error
1372  *   Pointer to error structure.
1373  *
1374  * @return
1375  *   On success the number of bytes consumed/necessary, if the returned value
1376  *   is lesser or equal to @p flow_size, the @p action has fully been
1377  *   converted, otherwise another call with this returned memory size should
1378  *   be done.
1379  *   On error, a negative errno value is returned and rte_errno is set.
1380  */
1381 static int
1382 mlx5_flow_action_drop(const struct rte_flow_action *action,
1383                       struct rte_flow *flow, const size_t flow_size,
1384                       struct rte_flow_error *error)
1385 {
1386         unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
1387         struct ibv_flow_spec_action_drop drop = {
1388                         .type = IBV_FLOW_SPEC_ACTION_DROP,
1389                         .size = size,
1390         };
1391
1392         if (flow->fate)
1393                 return rte_flow_error_set(error, ENOTSUP,
1394                                           RTE_FLOW_ERROR_TYPE_ACTION,
1395                                           action,
1396                                           "multiple fate actions are not"
1397                                           " supported");
1398         if (flow->modifier & (MLX5_FLOW_MOD_FLAG | MLX5_FLOW_MOD_MARK))
1399                 return rte_flow_error_set(error, ENOTSUP,
1400                                           RTE_FLOW_ERROR_TYPE_ACTION,
1401                                           action,
1402                                           "drop is not compatible with"
1403                                           " flag/mark action");
1404         if (size < flow_size)
1405                 mlx5_flow_spec_verbs_add(flow, &drop, size);
1406         flow->fate |= MLX5_FLOW_FATE_DROP;
1407         return size;
1408 }
1409
1410 /**
1411  * Convert the @p action into @p flow after ensuring the NIC will understand
1412  * and process it correctly.
1413  *
1414  * @param[in] dev
1415  *   Pointer to Ethernet device structure.
1416  * @param[in] action
1417  *   Action configuration.
1418  * @param[in, out] flow
1419  *   Pointer to flow structure.
1420  * @param[out] error
1421  *   Pointer to error structure.
1422  *
1423  * @return
1424  *   0 on success, a negative errno value otherwise and rte_errno is set.
1425  */
1426 static int
1427 mlx5_flow_action_queue(struct rte_eth_dev *dev,
1428                        const struct rte_flow_action *action,
1429                        struct rte_flow *flow,
1430                        struct rte_flow_error *error)
1431 {
1432         struct priv *priv = dev->data->dev_private;
1433         const struct rte_flow_action_queue *queue = action->conf;
1434
1435         if (flow->fate)
1436                 return rte_flow_error_set(error, ENOTSUP,
1437                                           RTE_FLOW_ERROR_TYPE_ACTION,
1438                                           action,
1439                                           "multiple fate actions are not"
1440                                           " supported");
1441         if (queue->index >= priv->rxqs_n)
1442                 return rte_flow_error_set(error, EINVAL,
1443                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1444                                           &queue->index,
1445                                           "queue index out of range");
1446         if (!(*priv->rxqs)[queue->index])
1447                 return rte_flow_error_set(error, EINVAL,
1448                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1449                                           &queue->index,
1450                                           "queue is not configured");
1451         if (flow->queue)
1452                 (*flow->queue)[0] = queue->index;
1453         flow->rss.queue_num = 1;
1454         flow->fate |= MLX5_FLOW_FATE_QUEUE;
1455         return 0;
1456 }
1457
1458 /**
1459  * Ensure the @p action will be understood and used correctly by the  NIC.
1460  *
1461  * @param dev
1462  *   Pointer to Ethernet device structure.
1463  * @param action[in]
1464  *   Pointer to flow actions array.
1465  * @param flow[in, out]
1466  *   Pointer to the rte_flow structure.
1467  * @param error[in, out]
1468  *   Pointer to error structure.
1469  *
1470  * @return
1471  *   On success @p flow->queue array and @p flow->rss are filled and valid.
1472  *   On error, a negative errno value is returned and rte_errno is set.
1473  */
1474 static int
1475 mlx5_flow_action_rss(struct rte_eth_dev *dev,
1476                      const struct rte_flow_action *action,
1477                      struct rte_flow *flow,
1478                      struct rte_flow_error *error)
1479 {
1480         struct priv *priv = dev->data->dev_private;
1481         const struct rte_flow_action_rss *rss = action->conf;
1482         unsigned int i;
1483
1484         if (flow->fate)
1485                 return rte_flow_error_set(error, ENOTSUP,
1486                                           RTE_FLOW_ERROR_TYPE_ACTION,
1487                                           action,
1488                                           "multiple fate actions are not"
1489                                           " supported");
1490         if (rss->func != RTE_ETH_HASH_FUNCTION_DEFAULT &&
1491             rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ)
1492                 return rte_flow_error_set(error, ENOTSUP,
1493                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1494                                           &rss->func,
1495                                           "RSS hash function not supported");
1496 #ifdef HAVE_IBV_DEVICE_TUNNEL_SUPPORT
1497         if (rss->level > 2)
1498 #else
1499         if (rss->level > 1)
1500 #endif
1501                 return rte_flow_error_set(error, ENOTSUP,
1502                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1503                                           &rss->level,
1504                                           "tunnel RSS is not supported");
1505         if (rss->key_len < MLX5_RSS_HASH_KEY_LEN)
1506                 return rte_flow_error_set(error, ENOTSUP,
1507                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1508                                           &rss->key_len,
1509                                           "RSS hash key too small");
1510         if (rss->key_len > MLX5_RSS_HASH_KEY_LEN)
1511                 return rte_flow_error_set(error, ENOTSUP,
1512                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1513                                           &rss->key_len,
1514                                           "RSS hash key too large");
1515         if (rss->queue_num > priv->config.ind_table_max_size)
1516                 return rte_flow_error_set(error, ENOTSUP,
1517                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1518                                           &rss->queue_num,
1519                                           "number of queues too large");
1520         if (rss->types & MLX5_RSS_HF_MASK)
1521                 return rte_flow_error_set(error, ENOTSUP,
1522                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1523                                           &rss->types,
1524                                           "some RSS protocols are not"
1525                                           " supported");
1526         for (i = 0; i != rss->queue_num; ++i) {
1527                 if (!(*priv->rxqs)[rss->queue[i]])
1528                         return rte_flow_error_set
1529                                 (error, EINVAL,
1530                                  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1531                                  &rss->queue[i],
1532                                  "queue is not configured");
1533         }
1534         if (flow->queue)
1535                 memcpy((*flow->queue), rss->queue,
1536                        rss->queue_num * sizeof(uint16_t));
1537         flow->rss.queue_num = rss->queue_num;
1538         memcpy(flow->key, rss->key, MLX5_RSS_HASH_KEY_LEN);
1539         flow->rss.types = rss->types;
1540         flow->rss.level = rss->level;
1541         flow->fate |= MLX5_FLOW_FATE_RSS;
1542         return 0;
1543 }
1544
1545 /**
1546  * Convert the @p action into a Verbs specification after ensuring the NIC
1547  * will understand and process it correctly.
1548  * If the necessary size for the conversion is greater than the @p flow_size,
1549  * nothing is written in @p flow, the validation is still performed.
1550  *
1551  * @param[in] action
1552  *   Action configuration.
1553  * @param[in, out] flow
1554  *   Pointer to flow structure.
1555  * @param[in] flow_size
1556  *   Size in bytes of the available space in @p flow, if too small, nothing is
1557  *   written.
1558  * @param[out] error
1559  *   Pointer to error structure.
1560  *
1561  * @return
1562  *   On success the number of bytes consumed/necessary, if the returned value
1563  *   is lesser or equal to @p flow_size, the @p action has fully been
1564  *   converted, otherwise another call with this returned memory size should
1565  *   be done.
1566  *   On error, a negative errno value is returned and rte_errno is set.
1567  */
1568 static int
1569 mlx5_flow_action_flag(const struct rte_flow_action *action,
1570                       struct rte_flow *flow, const size_t flow_size,
1571                       struct rte_flow_error *error)
1572 {
1573         unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1574         struct ibv_flow_spec_action_tag tag = {
1575                 .type = IBV_FLOW_SPEC_ACTION_TAG,
1576                 .size = size,
1577                 .tag_id = mlx5_flow_mark_set(MLX5_FLOW_MARK_DEFAULT),
1578         };
1579         struct mlx5_flow_verbs *verbs = flow->cur_verbs;
1580
1581         if (flow->modifier & MLX5_FLOW_MOD_FLAG)
1582                 return rte_flow_error_set(error, ENOTSUP,
1583                                           RTE_FLOW_ERROR_TYPE_ACTION,
1584                                           action,
1585                                           "flag action already present");
1586         if (flow->fate & MLX5_FLOW_FATE_DROP)
1587                 return rte_flow_error_set(error, ENOTSUP,
1588                                           RTE_FLOW_ERROR_TYPE_ACTION,
1589                                           action,
1590                                           "flag is not compatible with drop"
1591                                           " action");
1592         if (flow->modifier & MLX5_FLOW_MOD_MARK)
1593                 size = 0;
1594         else if (size <= flow_size && verbs)
1595                 mlx5_flow_spec_verbs_add(flow, &tag, size);
1596         flow->modifier |= MLX5_FLOW_MOD_FLAG;
1597         return size;
1598 }
1599
1600 /**
1601  * Update verbs specification to modify the flag to mark.
1602  *
1603  * @param[in, out] verbs
1604  *   Pointer to the mlx5_flow_verbs structure.
1605  * @param[in] mark_id
1606  *   Mark identifier to replace the flag.
1607  */
1608 static void
1609 mlx5_flow_verbs_mark_update(struct mlx5_flow_verbs *verbs, uint32_t mark_id)
1610 {
1611         struct ibv_spec_header *hdr;
1612         int i;
1613
1614         if (!verbs)
1615                 return;
1616         /* Update Verbs specification. */
1617         hdr = (struct ibv_spec_header *)verbs->specs;
1618         if (!hdr)
1619                 return;
1620         for (i = 0; i != verbs->attr->num_of_specs; ++i) {
1621                 if (hdr->type == IBV_FLOW_SPEC_ACTION_TAG) {
1622                         struct ibv_flow_spec_action_tag *t =
1623                                 (struct ibv_flow_spec_action_tag *)hdr;
1624
1625                         t->tag_id = mlx5_flow_mark_set(mark_id);
1626                 }
1627                 hdr = (struct ibv_spec_header *)((uintptr_t)hdr + hdr->size);
1628         }
1629 }
1630
1631 /**
1632  * Convert the @p action into @p flow (or by updating the already present
1633  * Flag Verbs specification) after ensuring the NIC will understand and
1634  * process it correctly.
1635  * If the necessary size for the conversion is greater than the @p flow_size,
1636  * nothing is written in @p flow, the validation is still performed.
1637  *
1638  * @param[in] action
1639  *   Action configuration.
1640  * @param[in, out] flow
1641  *   Pointer to flow structure.
1642  * @param[in] flow_size
1643  *   Size in bytes of the available space in @p flow, if too small, nothing is
1644  *   written.
1645  * @param[out] error
1646  *   Pointer to error structure.
1647  *
1648  * @return
1649  *   On success the number of bytes consumed/necessary, if the returned value
1650  *   is lesser or equal to @p flow_size, the @p action has fully been
1651  *   converted, otherwise another call with this returned memory size should
1652  *   be done.
1653  *   On error, a negative errno value is returned and rte_errno is set.
1654  */
1655 static int
1656 mlx5_flow_action_mark(const struct rte_flow_action *action,
1657                       struct rte_flow *flow, const size_t flow_size,
1658                       struct rte_flow_error *error)
1659 {
1660         const struct rte_flow_action_mark *mark = action->conf;
1661         unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1662         struct ibv_flow_spec_action_tag tag = {
1663                 .type = IBV_FLOW_SPEC_ACTION_TAG,
1664                 .size = size,
1665         };
1666         struct mlx5_flow_verbs *verbs = flow->cur_verbs;
1667
1668         if (!mark)
1669                 return rte_flow_error_set(error, EINVAL,
1670                                           RTE_FLOW_ERROR_TYPE_ACTION,
1671                                           action,
1672                                           "configuration cannot be null");
1673         if (mark->id >= MLX5_FLOW_MARK_MAX)
1674                 return rte_flow_error_set(error, EINVAL,
1675                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1676                                           &mark->id,
1677                                           "mark id must in 0 <= id < "
1678                                           RTE_STR(MLX5_FLOW_MARK_MAX));
1679         if (flow->modifier & MLX5_FLOW_MOD_MARK)
1680                 return rte_flow_error_set(error, ENOTSUP,
1681                                           RTE_FLOW_ERROR_TYPE_ACTION,
1682                                           action,
1683                                           "mark action already present");
1684         if (flow->fate & MLX5_FLOW_FATE_DROP)
1685                 return rte_flow_error_set(error, ENOTSUP,
1686                                           RTE_FLOW_ERROR_TYPE_ACTION,
1687                                           action,
1688                                           "mark is not compatible with drop"
1689                                           " action");
1690         if (flow->modifier & MLX5_FLOW_MOD_FLAG) {
1691                 mlx5_flow_verbs_mark_update(verbs, mark->id);
1692                 size = 0;
1693         } else if (size <= flow_size) {
1694                 tag.tag_id = mlx5_flow_mark_set(mark->id);
1695                 mlx5_flow_spec_verbs_add(flow, &tag, size);
1696         }
1697         flow->modifier |= MLX5_FLOW_MOD_MARK;
1698         return size;
1699 }
1700
1701 /**
1702  * Convert the @p action into @p flow after ensuring the NIC will understand
1703  * and process it correctly.
1704  * The conversion is performed action per action, each of them is written into
1705  * the @p flow if its size is lesser or equal to @p flow_size.
1706  * Validation and memory consumption computation are still performed until the
1707  * end of @p action, unless an error is encountered.
1708  *
1709  * @param[in] dev
1710  *   Pointer to Ethernet device structure.
1711  * @param[in] actions
1712  *   Pointer to flow actions array.
1713  * @param[in, out] flow
1714  *   Pointer to the rte_flow structure.
1715  * @param[in] flow_size
1716  *   Size in bytes of the available space in @p flow, if too small some
1717  *   garbage may be present.
1718  * @param[out] error
1719  *   Pointer to error structure.
1720  *
1721  * @return
1722  *   On success the number of bytes consumed/necessary, if the returned value
1723  *   is lesser or equal to @p flow_size, the @p actions has fully been
1724  *   converted, otherwise another call with this returned memory size should
1725  *   be done.
1726  *   On error, a negative errno value is returned and rte_errno is set.
1727  */
1728 static int
1729 mlx5_flow_actions(struct rte_eth_dev *dev,
1730                   const struct rte_flow_action actions[],
1731                   struct rte_flow *flow, const size_t flow_size,
1732                   struct rte_flow_error *error)
1733 {
1734         size_t size = 0;
1735         int remain = flow_size;
1736         int ret = 0;
1737
1738         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1739                 switch (actions->type) {
1740                 case RTE_FLOW_ACTION_TYPE_VOID:
1741                         break;
1742                 case RTE_FLOW_ACTION_TYPE_FLAG:
1743                         ret = mlx5_flow_action_flag(actions, flow, remain,
1744                                                     error);
1745                         break;
1746                 case RTE_FLOW_ACTION_TYPE_MARK:
1747                         ret = mlx5_flow_action_mark(actions, flow, remain,
1748                                                     error);
1749                         break;
1750                 case RTE_FLOW_ACTION_TYPE_DROP:
1751                         ret = mlx5_flow_action_drop(actions, flow, remain,
1752                                                     error);
1753                         break;
1754                 case RTE_FLOW_ACTION_TYPE_QUEUE:
1755                         ret = mlx5_flow_action_queue(dev, actions, flow, error);
1756                         break;
1757                 case RTE_FLOW_ACTION_TYPE_RSS:
1758                         ret = mlx5_flow_action_rss(dev, actions, flow, error);
1759                         break;
1760                 default:
1761                         return rte_flow_error_set(error, ENOTSUP,
1762                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1763                                                   actions,
1764                                                   "action not supported");
1765                 }
1766                 if (ret < 0)
1767                         return ret;
1768                 if (remain > ret)
1769                         remain -= ret;
1770                 else
1771                         remain = 0;
1772                 size += ret;
1773         }
1774         if (!flow->fate)
1775                 return rte_flow_error_set(error, ENOTSUP,
1776                                           RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1777                                           NULL,
1778                                           "no fate action found");
1779         return size;
1780 }
1781
1782 /**
1783  * Convert the @p attributes, @p pattern, @p action, into an flow for the NIC
1784  * after ensuring the NIC will understand and process it correctly.
1785  * The conversion is only performed item/action per item/action, each of
1786  * them is written into the @p flow if its size is lesser or equal to @p
1787  * flow_size.
1788  * Validation and memory consumption computation are still performed until the
1789  * end, unless an error is encountered.
1790  *
1791  * @param[in] dev
1792  *   Pointer to Ethernet device.
1793  * @param[in, out] flow
1794  *   Pointer to flow structure.
1795  * @param[in] flow_size
1796  *   Size in bytes of the available space in @p flow, if too small some
1797  *   garbage may be present.
1798  * @param[in] attributes
1799  *   Flow rule attributes.
1800  * @param[in] pattern
1801  *   Pattern specification (list terminated by the END pattern item).
1802  * @param[in] actions
1803  *   Associated actions (list terminated by the END action).
1804  * @param[out] error
1805  *   Perform verbose error reporting if not NULL.
1806  *
1807  * @return
1808  *   On success the number of bytes consumed/necessary, if the returned value
1809  *   is lesser or equal to @p flow_size, the flow has fully been converted and
1810  *   can be applied, otherwise another call with this returned memory size
1811  *   should be done.
1812  *   On error, a negative errno value is returned and rte_errno is set.
1813  */
1814 static int
1815 mlx5_flow_merge(struct rte_eth_dev *dev, struct rte_flow *flow,
1816                 const size_t flow_size,
1817                 const struct rte_flow_attr *attributes,
1818                 const struct rte_flow_item pattern[],
1819                 const struct rte_flow_action actions[],
1820                 struct rte_flow_error *error)
1821 {
1822         struct rte_flow local_flow = { .layers = 0, };
1823         size_t size = sizeof(*flow);
1824         union {
1825                 struct rte_flow_expand_rss buf;
1826                 uint8_t buffer[2048];
1827         } expand_buffer;
1828         struct rte_flow_expand_rss *buf = &expand_buffer.buf;
1829         struct mlx5_flow_verbs *original_verbs = NULL;
1830         size_t original_verbs_size = 0;
1831         uint32_t original_layers = 0;
1832         int expanded_pattern_idx = 0;
1833         int ret;
1834         uint32_t i;
1835
1836         if (size > flow_size)
1837                 flow = &local_flow;
1838         ret = mlx5_flow_attributes(dev, attributes, flow, error);
1839         if (ret < 0)
1840                 return ret;
1841         ret = mlx5_flow_actions(dev, actions, &local_flow, 0, error);
1842         if (ret < 0)
1843                 return ret;
1844         if (local_flow.rss.types) {
1845                 ret = rte_flow_expand_rss(buf, sizeof(expand_buffer.buffer),
1846                                           pattern, local_flow.rss.types,
1847                                           mlx5_support_expansion,
1848                                           local_flow.rss.level < 2 ?
1849                                           MLX5_EXPANSION_ROOT :
1850                                           MLX5_EXPANSION_ROOT_OUTER);
1851                 assert(ret > 0 &&
1852                        (unsigned int)ret < sizeof(expand_buffer.buffer));
1853         } else {
1854                 buf->entries = 1;
1855                 buf->entry[0].pattern = (void *)(uintptr_t)pattern;
1856         }
1857         size += RTE_ALIGN_CEIL(local_flow.rss.queue_num * sizeof(uint16_t),
1858                                sizeof(void *));
1859         if (size <= flow_size)
1860                 flow->queue = (void *)(flow + 1);
1861         LIST_INIT(&flow->verbs);
1862         flow->layers = 0;
1863         flow->modifier = 0;
1864         flow->fate = 0;
1865         for (i = 0; i != buf->entries; ++i) {
1866                 size_t off = size;
1867                 size_t off2;
1868
1869                 flow->layers = original_layers;
1870                 size += sizeof(struct ibv_flow_attr) +
1871                         sizeof(struct mlx5_flow_verbs);
1872                 off2 = size;
1873                 if (size < flow_size) {
1874                         flow->cur_verbs = (void *)((uintptr_t)flow + off);
1875                         flow->cur_verbs->attr = (void *)(flow->cur_verbs + 1);
1876                         flow->cur_verbs->specs =
1877                                 (void *)(flow->cur_verbs->attr + 1);
1878                 }
1879                 /* First iteration convert the pattern into Verbs. */
1880                 if (i == 0) {
1881                         /* Actions don't need to be converted several time. */
1882                         ret = mlx5_flow_actions(dev, actions, flow,
1883                                                 (size < flow_size) ?
1884                                                 flow_size - size : 0,
1885                                                 error);
1886                         if (ret < 0)
1887                                 return ret;
1888                         size += ret;
1889                 } else {
1890                         /*
1891                          * Next iteration means the pattern has already been
1892                          * converted and an expansion is necessary to match
1893                          * the user RSS request.  For that only the expanded
1894                          * items will be converted, the common part with the
1895                          * user pattern are just copied into the next buffer
1896                          * zone.
1897                          */
1898                         size += original_verbs_size;
1899                         if (size < flow_size) {
1900                                 rte_memcpy(flow->cur_verbs->attr,
1901                                            original_verbs->attr,
1902                                            original_verbs_size +
1903                                            sizeof(struct ibv_flow_attr));
1904                                 flow->cur_verbs->size = original_verbs_size;
1905                         }
1906                 }
1907                 ret = mlx5_flow_items
1908                         ((const struct rte_flow_item *)
1909                          &buf->entry[i].pattern[expanded_pattern_idx],
1910                          flow,
1911                          (size < flow_size) ? flow_size - size : 0, error);
1912                 if (ret < 0)
1913                         return ret;
1914                 size += ret;
1915                 if (size <= flow_size) {
1916                         mlx5_flow_adjust_priority(dev, flow);
1917                         LIST_INSERT_HEAD(&flow->verbs, flow->cur_verbs, next);
1918                 }
1919                 /*
1920                  * Keep a pointer of the first verbs conversion and the layers
1921                  * it has encountered.
1922                  */
1923                 if (i == 0) {
1924                         original_verbs = flow->cur_verbs;
1925                         original_verbs_size = size - off2;
1926                         original_layers = flow->layers;
1927                         /*
1928                          * move the index of the expanded pattern to the
1929                          * first item not addressed yet.
1930                          */
1931                         if (pattern->type == RTE_FLOW_ITEM_TYPE_END) {
1932                                 expanded_pattern_idx++;
1933                         } else {
1934                                 const struct rte_flow_item *item = pattern;
1935
1936                                 for (item = pattern;
1937                                      item->type != RTE_FLOW_ITEM_TYPE_END;
1938                                      ++item)
1939                                         expanded_pattern_idx++;
1940                         }
1941                 }
1942         }
1943         /* Restore the origin layers in the flow. */
1944         flow->layers = original_layers;
1945         return size;
1946 }
1947
1948 /**
1949  * Set the Rx queue flags (Mark/Flag and Tunnel Ptypes) according to the flow.
1950  *
1951  * @param[in] dev
1952  *   Pointer to Ethernet device.
1953  * @param[in] flow
1954  *   Pointer to flow structure.
1955  */
1956 static void
1957 mlx5_flow_rxq_flags_set(struct rte_eth_dev *dev, struct rte_flow *flow)
1958 {
1959         struct priv *priv = dev->data->dev_private;
1960         const int mark = !!(flow->modifier &
1961                             (MLX5_FLOW_MOD_FLAG | MLX5_FLOW_MOD_MARK));
1962         const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
1963         unsigned int i;
1964
1965         for (i = 0; i != flow->rss.queue_num; ++i) {
1966                 int idx = (*flow->queue)[i];
1967                 struct mlx5_rxq_ctrl *rxq_ctrl =
1968                         container_of((*priv->rxqs)[idx],
1969                                      struct mlx5_rxq_ctrl, rxq);
1970
1971                 if (mark) {
1972                         rxq_ctrl->rxq.mark = 1;
1973                         rxq_ctrl->flow_mark_n++;
1974                 }
1975                 if (tunnel) {
1976                         rxq_ctrl->rxq.tunnel = flow->tunnel_ptype;
1977                         rxq_ctrl->flow_vxlan_n++;
1978                 }
1979         }
1980 }
1981
1982 /**
1983  * Clear the Rx queue flags (Mark/Flag and Tunnel Ptype) associated with the
1984  * @p flow if no other flow uses it with the same kind of request.
1985  *
1986  * @param dev
1987  *   Pointer to Ethernet device.
1988  * @param[in] flow
1989  *   Pointer to the flow.
1990  */
1991 static void
1992 mlx5_flow_rxq_flags_trim(struct rte_eth_dev *dev, struct rte_flow *flow)
1993 {
1994         struct priv *priv = dev->data->dev_private;
1995         const int mark = !!(flow->modifier &
1996                             (MLX5_FLOW_MOD_FLAG | MLX5_FLOW_MOD_MARK));
1997         const int tunnel = !!(flow->layers & MLX5_FLOW_LAYER_TUNNEL);
1998         unsigned int i;
1999
2000         assert(dev->data->dev_started);
2001         for (i = 0; i != flow->rss.queue_num; ++i) {
2002                 int idx = (*flow->queue)[i];
2003                 struct mlx5_rxq_ctrl *rxq_ctrl =
2004                         container_of((*priv->rxqs)[idx],
2005                                      struct mlx5_rxq_ctrl, rxq);
2006
2007                 if (mark) {
2008                         rxq_ctrl->flow_mark_n--;
2009                         rxq_ctrl->rxq.mark = !!rxq_ctrl->flow_mark_n;
2010                 }
2011                 if (tunnel) {
2012                         rxq_ctrl->flow_vxlan_n++;
2013                         if (!rxq_ctrl->flow_vxlan_n)
2014                                 rxq_ctrl->rxq.tunnel = 0;
2015                 }
2016         }
2017 }
2018
2019 /**
2020  * Clear the Mark/Flag and Tunnel ptype information in all Rx queues.
2021  *
2022  * @param dev
2023  *   Pointer to Ethernet device.
2024  */
2025 static void
2026 mlx5_flow_rxq_flags_clear(struct rte_eth_dev *dev)
2027 {
2028         struct priv *priv = dev->data->dev_private;
2029         unsigned int i;
2030         unsigned int idx;
2031
2032         for (idx = 0, i = 0; idx != priv->rxqs_n; ++i) {
2033                 struct mlx5_rxq_ctrl *rxq_ctrl;
2034
2035                 if (!(*priv->rxqs)[idx])
2036                         continue;
2037                 rxq_ctrl = container_of((*priv->rxqs)[idx],
2038                                         struct mlx5_rxq_ctrl, rxq);
2039                 rxq_ctrl->flow_mark_n = 0;
2040                 rxq_ctrl->rxq.mark = 0;
2041                 rxq_ctrl->flow_vxlan_n = 0;
2042                 rxq_ctrl->rxq.tunnel = 0;
2043                 ++idx;
2044         }
2045 }
2046
2047 /**
2048  * Validate a flow supported by the NIC.
2049  *
2050  * @see rte_flow_validate()
2051  * @see rte_flow_ops
2052  */
2053 int
2054 mlx5_flow_validate(struct rte_eth_dev *dev,
2055                    const struct rte_flow_attr *attr,
2056                    const struct rte_flow_item items[],
2057                    const struct rte_flow_action actions[],
2058                    struct rte_flow_error *error)
2059 {
2060         int ret = mlx5_flow_merge(dev, NULL, 0, attr, items, actions, error);
2061
2062         if (ret < 0)
2063                 return ret;
2064         return 0;
2065 }
2066
2067 /**
2068  * Remove the flow.
2069  *
2070  * @param[in] dev
2071  *   Pointer to Ethernet device.
2072  * @param[in, out] flow
2073  *   Pointer to flow structure.
2074  */
2075 static void
2076 mlx5_flow_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
2077 {
2078         struct mlx5_flow_verbs *verbs;
2079
2080         LIST_FOREACH(verbs, &flow->verbs, next) {
2081                 if (verbs->flow) {
2082                         claim_zero(mlx5_glue->destroy_flow(verbs->flow));
2083                         verbs->flow = NULL;
2084                 }
2085                 if (verbs->hrxq) {
2086                         if (flow->fate & MLX5_FLOW_FATE_DROP)
2087                                 mlx5_hrxq_drop_release(dev);
2088                         else
2089                                 mlx5_hrxq_release(dev, verbs->hrxq);
2090                         verbs->hrxq = NULL;
2091                 }
2092         }
2093 }
2094
2095 /**
2096  * Apply the flow.
2097  *
2098  * @param[in] dev
2099  *   Pointer to Ethernet device structure.
2100  * @param[in, out] flow
2101  *   Pointer to flow structure.
2102  * @param[out] error
2103  *   Pointer to error structure.
2104  *
2105  * @return
2106  *   0 on success, a negative errno value otherwise and rte_errno is set.
2107  */
2108 static int
2109 mlx5_flow_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
2110                 struct rte_flow_error *error)
2111 {
2112         struct mlx5_flow_verbs *verbs;
2113         int err;
2114
2115         LIST_FOREACH(verbs, &flow->verbs, next) {
2116                 if (flow->fate & MLX5_FLOW_FATE_DROP) {
2117                         verbs->hrxq = mlx5_hrxq_drop_new(dev);
2118                         if (!verbs->hrxq) {
2119                                 rte_flow_error_set
2120                                         (error, errno,
2121                                          RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2122                                          NULL,
2123                                          "cannot get drop hash queue");
2124                                 goto error;
2125                         }
2126                 } else {
2127                         struct mlx5_hrxq *hrxq;
2128
2129                         hrxq = mlx5_hrxq_get(dev, flow->key,
2130                                              MLX5_RSS_HASH_KEY_LEN,
2131                                              verbs->hash_fields,
2132                                              (*flow->queue),
2133                                              flow->rss.queue_num);
2134                         if (!hrxq)
2135                                 hrxq = mlx5_hrxq_new(dev, flow->key,
2136                                                      MLX5_RSS_HASH_KEY_LEN,
2137                                                      verbs->hash_fields,
2138                                                      (*flow->queue),
2139                                                      flow->rss.queue_num);
2140                         if (!hrxq) {
2141                                 rte_flow_error_set
2142                                         (error, rte_errno,
2143                                          RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2144                                          NULL,
2145                                          "cannot get hash queue");
2146                                 goto error;
2147                         }
2148                         verbs->hrxq = hrxq;
2149                 }
2150                 verbs->flow =
2151                         mlx5_glue->create_flow(verbs->hrxq->qp, verbs->attr);
2152                 if (!verbs->flow) {
2153                         rte_flow_error_set(error, errno,
2154                                            RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2155                                            NULL,
2156                                            "hardware refuses to create flow");
2157                         goto error;
2158                 }
2159         }
2160         return 0;
2161 error:
2162         err = rte_errno; /* Save rte_errno before cleanup. */
2163         LIST_FOREACH(verbs, &flow->verbs, next) {
2164                 if (verbs->hrxq) {
2165                         if (flow->fate & MLX5_FLOW_FATE_DROP)
2166                                 mlx5_hrxq_drop_release(dev);
2167                         else
2168                                 mlx5_hrxq_release(dev, verbs->hrxq);
2169                         verbs->hrxq = NULL;
2170                 }
2171         }
2172         rte_errno = err; /* Restore rte_errno. */
2173         return -rte_errno;
2174 }
2175
2176 /**
2177  * Create a flow and add it to @p list.
2178  *
2179  * @param dev
2180  *   Pointer to Ethernet device.
2181  * @param list
2182  *   Pointer to a TAILQ flow list.
2183  * @param[in] attr
2184  *   Flow rule attributes.
2185  * @param[in] items
2186  *   Pattern specification (list terminated by the END pattern item).
2187  * @param[in] actions
2188  *   Associated actions (list terminated by the END action).
2189  * @param[out] error
2190  *   Perform verbose error reporting if not NULL.
2191  *
2192  * @return
2193  *   A flow on success, NULL otherwise and rte_errno is set.
2194  */
2195 static struct rte_flow *
2196 mlx5_flow_list_create(struct rte_eth_dev *dev,
2197                       struct mlx5_flows *list,
2198                       const struct rte_flow_attr *attr,
2199                       const struct rte_flow_item items[],
2200                       const struct rte_flow_action actions[],
2201                       struct rte_flow_error *error)
2202 {
2203         struct rte_flow *flow = NULL;
2204         size_t size = 0;
2205         int ret;
2206
2207         ret = mlx5_flow_merge(dev, flow, size, attr, items, actions, error);
2208         if (ret < 0)
2209                 return NULL;
2210         size = ret;
2211         flow = rte_calloc(__func__, 1, size, 0);
2212         if (!flow) {
2213                 rte_flow_error_set(error, ENOMEM,
2214                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2215                                    NULL,
2216                                    "not enough memory to create flow");
2217                 return NULL;
2218         }
2219         ret = mlx5_flow_merge(dev, flow, size, attr, items, actions, error);
2220         if (ret < 0) {
2221                 rte_free(flow);
2222                 return NULL;
2223         }
2224         assert((size_t)ret == size);
2225         if (dev->data->dev_started) {
2226                 ret = mlx5_flow_apply(dev, flow, error);
2227                 if (ret < 0) {
2228                         ret = rte_errno; /* Save rte_errno before cleanup. */
2229                         if (flow) {
2230                                 mlx5_flow_remove(dev, flow);
2231                                 rte_free(flow);
2232                         }
2233                         rte_errno = ret; /* Restore rte_errno. */
2234                         return NULL;
2235                 }
2236         }
2237         TAILQ_INSERT_TAIL(list, flow, next);
2238         mlx5_flow_rxq_flags_set(dev, flow);
2239         return flow;
2240 }
2241
2242 /**
2243  * Create a flow.
2244  *
2245  * @see rte_flow_create()
2246  * @see rte_flow_ops
2247  */
2248 struct rte_flow *
2249 mlx5_flow_create(struct rte_eth_dev *dev,
2250                  const struct rte_flow_attr *attr,
2251                  const struct rte_flow_item items[],
2252                  const struct rte_flow_action actions[],
2253                  struct rte_flow_error *error)
2254 {
2255         return mlx5_flow_list_create
2256                 (dev, &((struct priv *)dev->data->dev_private)->flows,
2257                  attr, items, actions, error);
2258 }
2259
2260 /**
2261  * Destroy a flow in a list.
2262  *
2263  * @param dev
2264  *   Pointer to Ethernet device.
2265  * @param list
2266  *   Pointer to a TAILQ flow list.
2267  * @param[in] flow
2268  *   Flow to destroy.
2269  */
2270 static void
2271 mlx5_flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list,
2272                        struct rte_flow *flow)
2273 {
2274         mlx5_flow_remove(dev, flow);
2275         TAILQ_REMOVE(list, flow, next);
2276         /*
2277          * Update RX queue flags only if port is started, otherwise it is
2278          * already clean.
2279          */
2280         if (dev->data->dev_started)
2281                 mlx5_flow_rxq_flags_trim(dev, flow);
2282         rte_free(flow);
2283 }
2284
2285 /**
2286  * Destroy all flows.
2287  *
2288  * @param dev
2289  *   Pointer to Ethernet device.
2290  * @param list
2291  *   Pointer to a TAILQ flow list.
2292  */
2293 void
2294 mlx5_flow_list_flush(struct rte_eth_dev *dev, struct mlx5_flows *list)
2295 {
2296         while (!TAILQ_EMPTY(list)) {
2297                 struct rte_flow *flow;
2298
2299                 flow = TAILQ_FIRST(list);
2300                 mlx5_flow_list_destroy(dev, list, flow);
2301         }
2302 }
2303
2304 /**
2305  * Remove all flows.
2306  *
2307  * @param dev
2308  *   Pointer to Ethernet device.
2309  * @param list
2310  *   Pointer to a TAILQ flow list.
2311  */
2312 void
2313 mlx5_flow_stop(struct rte_eth_dev *dev, struct mlx5_flows *list)
2314 {
2315         struct rte_flow *flow;
2316
2317         TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next)
2318                 mlx5_flow_remove(dev, flow);
2319         mlx5_flow_rxq_flags_clear(dev);
2320 }
2321
2322 /**
2323  * Add all flows.
2324  *
2325  * @param dev
2326  *   Pointer to Ethernet device.
2327  * @param list
2328  *   Pointer to a TAILQ flow list.
2329  *
2330  * @return
2331  *   0 on success, a negative errno value otherwise and rte_errno is set.
2332  */
2333 int
2334 mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list)
2335 {
2336         struct rte_flow *flow;
2337         struct rte_flow_error error;
2338         int ret = 0;
2339
2340         TAILQ_FOREACH(flow, list, next) {
2341                 ret = mlx5_flow_apply(dev, flow, &error);
2342                 if (ret < 0)
2343                         goto error;
2344                 mlx5_flow_rxq_flags_set(dev, flow);
2345         }
2346         return 0;
2347 error:
2348         ret = rte_errno; /* Save rte_errno before cleanup. */
2349         mlx5_flow_stop(dev, list);
2350         rte_errno = ret; /* Restore rte_errno. */
2351         return -rte_errno;
2352 }
2353
2354 /**
2355  * Verify the flow list is empty
2356  *
2357  * @param dev
2358  *  Pointer to Ethernet device.
2359  *
2360  * @return the number of flows not released.
2361  */
2362 int
2363 mlx5_flow_verify(struct rte_eth_dev *dev)
2364 {
2365         struct priv *priv = dev->data->dev_private;
2366         struct rte_flow *flow;
2367         int ret = 0;
2368
2369         TAILQ_FOREACH(flow, &priv->flows, next) {
2370                 DRV_LOG(DEBUG, "port %u flow %p still referenced",
2371                         dev->data->port_id, (void *)flow);
2372                 ++ret;
2373         }
2374         return ret;
2375 }
2376
2377 /**
2378  * Enable a control flow configured from the control plane.
2379  *
2380  * @param dev
2381  *   Pointer to Ethernet device.
2382  * @param eth_spec
2383  *   An Ethernet flow spec to apply.
2384  * @param eth_mask
2385  *   An Ethernet flow mask to apply.
2386  * @param vlan_spec
2387  *   A VLAN flow spec to apply.
2388  * @param vlan_mask
2389  *   A VLAN flow mask to apply.
2390  *
2391  * @return
2392  *   0 on success, a negative errno value otherwise and rte_errno is set.
2393  */
2394 int
2395 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
2396                     struct rte_flow_item_eth *eth_spec,
2397                     struct rte_flow_item_eth *eth_mask,
2398                     struct rte_flow_item_vlan *vlan_spec,
2399                     struct rte_flow_item_vlan *vlan_mask)
2400 {
2401         struct priv *priv = dev->data->dev_private;
2402         const struct rte_flow_attr attr = {
2403                 .ingress = 1,
2404                 .priority = MLX5_FLOW_PRIO_RSVD,
2405         };
2406         struct rte_flow_item items[] = {
2407                 {
2408                         .type = RTE_FLOW_ITEM_TYPE_ETH,
2409                         .spec = eth_spec,
2410                         .last = NULL,
2411                         .mask = eth_mask,
2412                 },
2413                 {
2414                         .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
2415                                 RTE_FLOW_ITEM_TYPE_END,
2416                         .spec = vlan_spec,
2417                         .last = NULL,
2418                         .mask = vlan_mask,
2419                 },
2420                 {
2421                         .type = RTE_FLOW_ITEM_TYPE_END,
2422                 },
2423         };
2424         uint16_t queue[priv->reta_idx_n];
2425         struct rte_flow_action_rss action_rss = {
2426                 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
2427                 .level = 0,
2428                 .types = priv->rss_conf.rss_hf,
2429                 .key_len = priv->rss_conf.rss_key_len,
2430                 .queue_num = priv->reta_idx_n,
2431                 .key = priv->rss_conf.rss_key,
2432                 .queue = queue,
2433         };
2434         struct rte_flow_action actions[] = {
2435                 {
2436                         .type = RTE_FLOW_ACTION_TYPE_RSS,
2437                         .conf = &action_rss,
2438                 },
2439                 {
2440                         .type = RTE_FLOW_ACTION_TYPE_END,
2441                 },
2442         };
2443         struct rte_flow *flow;
2444         struct rte_flow_error error;
2445         unsigned int i;
2446
2447         if (!priv->reta_idx_n) {
2448                 rte_errno = EINVAL;
2449                 return -rte_errno;
2450         }
2451         for (i = 0; i != priv->reta_idx_n; ++i)
2452                 queue[i] = (*priv->reta_idx)[i];
2453         flow = mlx5_flow_list_create(dev, &priv->ctrl_flows, &attr, items,
2454                                      actions, &error);
2455         if (!flow)
2456                 return -rte_errno;
2457         return 0;
2458 }
2459
2460 /**
2461  * Enable a flow control configured from the control plane.
2462  *
2463  * @param dev
2464  *   Pointer to Ethernet device.
2465  * @param eth_spec
2466  *   An Ethernet flow spec to apply.
2467  * @param eth_mask
2468  *   An Ethernet flow mask to apply.
2469  *
2470  * @return
2471  *   0 on success, a negative errno value otherwise and rte_errno is set.
2472  */
2473 int
2474 mlx5_ctrl_flow(struct rte_eth_dev *dev,
2475                struct rte_flow_item_eth *eth_spec,
2476                struct rte_flow_item_eth *eth_mask)
2477 {
2478         return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
2479 }
2480
2481 /**
2482  * Destroy a flow.
2483  *
2484  * @see rte_flow_destroy()
2485  * @see rte_flow_ops
2486  */
2487 int
2488 mlx5_flow_destroy(struct rte_eth_dev *dev,
2489                   struct rte_flow *flow,
2490                   struct rte_flow_error *error __rte_unused)
2491 {
2492         struct priv *priv = dev->data->dev_private;
2493
2494         mlx5_flow_list_destroy(dev, &priv->flows, flow);
2495         return 0;
2496 }
2497
2498 /**
2499  * Destroy all flows.
2500  *
2501  * @see rte_flow_flush()
2502  * @see rte_flow_ops
2503  */
2504 int
2505 mlx5_flow_flush(struct rte_eth_dev *dev,
2506                 struct rte_flow_error *error __rte_unused)
2507 {
2508         struct priv *priv = dev->data->dev_private;
2509
2510         mlx5_flow_list_flush(dev, &priv->flows);
2511         return 0;
2512 }
2513
2514 /**
2515  * Isolated mode.
2516  *
2517  * @see rte_flow_isolate()
2518  * @see rte_flow_ops
2519  */
2520 int
2521 mlx5_flow_isolate(struct rte_eth_dev *dev,
2522                   int enable,
2523                   struct rte_flow_error *error)
2524 {
2525         struct priv *priv = dev->data->dev_private;
2526
2527         if (dev->data->dev_started) {
2528                 rte_flow_error_set(error, EBUSY,
2529                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2530                                    NULL,
2531                                    "port must be stopped first");
2532                 return -rte_errno;
2533         }
2534         priv->isolated = !!enable;
2535         if (enable)
2536                 dev->dev_ops = &mlx5_dev_ops_isolate;
2537         else
2538                 dev->dev_ops = &mlx5_dev_ops;
2539         return 0;
2540 }
2541
2542 /**
2543  * Convert a flow director filter to a generic flow.
2544  *
2545  * @param dev
2546  *   Pointer to Ethernet device.
2547  * @param fdir_filter
2548  *   Flow director filter to add.
2549  * @param attributes
2550  *   Generic flow parameters structure.
2551  *
2552  * @return
2553  *   0 on success, a negative errno value otherwise and rte_errno is set.
2554  */
2555 static int
2556 mlx5_fdir_filter_convert(struct rte_eth_dev *dev,
2557                          const struct rte_eth_fdir_filter *fdir_filter,
2558                          struct mlx5_fdir *attributes)
2559 {
2560         struct priv *priv = dev->data->dev_private;
2561         const struct rte_eth_fdir_input *input = &fdir_filter->input;
2562         const struct rte_eth_fdir_masks *mask =
2563                 &dev->data->dev_conf.fdir_conf.mask;
2564
2565         /* Validate queue number. */
2566         if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
2567                 DRV_LOG(ERR, "port %u invalid queue number %d",
2568                         dev->data->port_id, fdir_filter->action.rx_queue);
2569                 rte_errno = EINVAL;
2570                 return -rte_errno;
2571         }
2572         attributes->attr.ingress = 1;
2573         attributes->items[0] = (struct rte_flow_item) {
2574                 .type = RTE_FLOW_ITEM_TYPE_ETH,
2575                 .spec = &attributes->l2,
2576                 .mask = &attributes->l2_mask,
2577         };
2578         switch (fdir_filter->action.behavior) {
2579         case RTE_ETH_FDIR_ACCEPT:
2580                 attributes->actions[0] = (struct rte_flow_action){
2581                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
2582                         .conf = &attributes->queue,
2583                 };
2584                 break;
2585         case RTE_ETH_FDIR_REJECT:
2586                 attributes->actions[0] = (struct rte_flow_action){
2587                         .type = RTE_FLOW_ACTION_TYPE_DROP,
2588                 };
2589                 break;
2590         default:
2591                 DRV_LOG(ERR, "port %u invalid behavior %d",
2592                         dev->data->port_id,
2593                         fdir_filter->action.behavior);
2594                 rte_errno = ENOTSUP;
2595                 return -rte_errno;
2596         }
2597         attributes->queue.index = fdir_filter->action.rx_queue;
2598         /* Handle L3. */
2599         switch (fdir_filter->input.flow_type) {
2600         case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
2601         case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
2602         case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
2603                 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2604                         .src_addr = input->flow.ip4_flow.src_ip,
2605                         .dst_addr = input->flow.ip4_flow.dst_ip,
2606                         .time_to_live = input->flow.ip4_flow.ttl,
2607                         .type_of_service = input->flow.ip4_flow.tos,
2608                         .next_proto_id = input->flow.ip4_flow.proto,
2609                 };
2610                 attributes->l3_mask.ipv4.hdr = (struct ipv4_hdr){
2611                         .src_addr = mask->ipv4_mask.src_ip,
2612                         .dst_addr = mask->ipv4_mask.dst_ip,
2613                         .time_to_live = mask->ipv4_mask.ttl,
2614                         .type_of_service = mask->ipv4_mask.tos,
2615                         .next_proto_id = mask->ipv4_mask.proto,
2616                 };
2617                 attributes->items[1] = (struct rte_flow_item){
2618                         .type = RTE_FLOW_ITEM_TYPE_IPV4,
2619                         .spec = &attributes->l3,
2620                         .mask = &attributes->l3_mask,
2621                 };
2622                 break;
2623         case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
2624         case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
2625         case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
2626                 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2627                         .hop_limits = input->flow.ipv6_flow.hop_limits,
2628                         .proto = input->flow.ipv6_flow.proto,
2629                 };
2630
2631                 memcpy(attributes->l3.ipv6.hdr.src_addr,
2632                        input->flow.ipv6_flow.src_ip,
2633                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2634                 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2635                        input->flow.ipv6_flow.dst_ip,
2636                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2637                 memcpy(attributes->l3_mask.ipv6.hdr.src_addr,
2638                        mask->ipv6_mask.src_ip,
2639                        RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
2640                 memcpy(attributes->l3_mask.ipv6.hdr.dst_addr,
2641                        mask->ipv6_mask.dst_ip,
2642                        RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
2643                 attributes->items[1] = (struct rte_flow_item){
2644                         .type = RTE_FLOW_ITEM_TYPE_IPV6,
2645                         .spec = &attributes->l3,
2646                         .mask = &attributes->l3_mask,
2647                 };
2648                 break;
2649         default:
2650                 DRV_LOG(ERR, "port %u invalid flow type%d",
2651                         dev->data->port_id, fdir_filter->input.flow_type);
2652                 rte_errno = ENOTSUP;
2653                 return -rte_errno;
2654         }
2655         /* Handle L4. */
2656         switch (fdir_filter->input.flow_type) {
2657         case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
2658                 attributes->l4.udp.hdr = (struct udp_hdr){
2659                         .src_port = input->flow.udp4_flow.src_port,
2660                         .dst_port = input->flow.udp4_flow.dst_port,
2661                 };
2662                 attributes->l4_mask.udp.hdr = (struct udp_hdr){
2663                         .src_port = mask->src_port_mask,
2664                         .dst_port = mask->dst_port_mask,
2665                 };
2666                 attributes->items[2] = (struct rte_flow_item){
2667                         .type = RTE_FLOW_ITEM_TYPE_UDP,
2668                         .spec = &attributes->l4,
2669                         .mask = &attributes->l4_mask,
2670                 };
2671                 break;
2672         case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
2673                 attributes->l4.tcp.hdr = (struct tcp_hdr){
2674                         .src_port = input->flow.tcp4_flow.src_port,
2675                         .dst_port = input->flow.tcp4_flow.dst_port,
2676                 };
2677                 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
2678                         .src_port = mask->src_port_mask,
2679                         .dst_port = mask->dst_port_mask,
2680                 };
2681                 attributes->items[2] = (struct rte_flow_item){
2682                         .type = RTE_FLOW_ITEM_TYPE_TCP,
2683                         .spec = &attributes->l4,
2684                         .mask = &attributes->l4_mask,
2685                 };
2686                 break;
2687         case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
2688                 attributes->l4.udp.hdr = (struct udp_hdr){
2689                         .src_port = input->flow.udp6_flow.src_port,
2690                         .dst_port = input->flow.udp6_flow.dst_port,
2691                 };
2692                 attributes->l4_mask.udp.hdr = (struct udp_hdr){
2693                         .src_port = mask->src_port_mask,
2694                         .dst_port = mask->dst_port_mask,
2695                 };
2696                 attributes->items[2] = (struct rte_flow_item){
2697                         .type = RTE_FLOW_ITEM_TYPE_UDP,
2698                         .spec = &attributes->l4,
2699                         .mask = &attributes->l4_mask,
2700                 };
2701                 break;
2702         case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
2703                 attributes->l4.tcp.hdr = (struct tcp_hdr){
2704                         .src_port = input->flow.tcp6_flow.src_port,
2705                         .dst_port = input->flow.tcp6_flow.dst_port,
2706                 };
2707                 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
2708                         .src_port = mask->src_port_mask,
2709                         .dst_port = mask->dst_port_mask,
2710                 };
2711                 attributes->items[2] = (struct rte_flow_item){
2712                         .type = RTE_FLOW_ITEM_TYPE_TCP,
2713                         .spec = &attributes->l4,
2714                         .mask = &attributes->l4_mask,
2715                 };
2716                 break;
2717         case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
2718         case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
2719                 break;
2720         default:
2721                 DRV_LOG(ERR, "port %u invalid flow type%d",
2722                         dev->data->port_id, fdir_filter->input.flow_type);
2723                 rte_errno = ENOTSUP;
2724                 return -rte_errno;
2725         }
2726         return 0;
2727 }
2728
2729 /**
2730  * Add new flow director filter and store it in list.
2731  *
2732  * @param dev
2733  *   Pointer to Ethernet device.
2734  * @param fdir_filter
2735  *   Flow director filter to add.
2736  *
2737  * @return
2738  *   0 on success, a negative errno value otherwise and rte_errno is set.
2739  */
2740 static int
2741 mlx5_fdir_filter_add(struct rte_eth_dev *dev,
2742                      const struct rte_eth_fdir_filter *fdir_filter)
2743 {
2744         struct priv *priv = dev->data->dev_private;
2745         struct mlx5_fdir attributes = {
2746                 .attr.group = 0,
2747                 .l2_mask = {
2748                         .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2749                         .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2750                         .type = 0,
2751                 },
2752         };
2753         struct rte_flow_error error;
2754         struct rte_flow *flow;
2755         int ret;
2756
2757         ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
2758         if (ret)
2759                 return ret;
2760         flow = mlx5_flow_list_create(dev, &priv->flows, &attributes.attr,
2761                                      attributes.items, attributes.actions,
2762                                      &error);
2763         if (flow) {
2764                 DRV_LOG(DEBUG, "port %u FDIR created %p", dev->data->port_id,
2765                         (void *)flow);
2766                 return 0;
2767         }
2768         return -rte_errno;
2769 }
2770
2771 /**
2772  * Delete specific filter.
2773  *
2774  * @param dev
2775  *   Pointer to Ethernet device.
2776  * @param fdir_filter
2777  *   Filter to be deleted.
2778  *
2779  * @return
2780  *   0 on success, a negative errno value otherwise and rte_errno is set.
2781  */
2782 static int
2783 mlx5_fdir_filter_delete(struct rte_eth_dev *dev __rte_unused,
2784                         const struct rte_eth_fdir_filter *fdir_filter
2785                         __rte_unused)
2786 {
2787         rte_errno = ENOTSUP;
2788         return -rte_errno;
2789 }
2790
2791 /**
2792  * Update queue for specific filter.
2793  *
2794  * @param dev
2795  *   Pointer to Ethernet device.
2796  * @param fdir_filter
2797  *   Filter to be updated.
2798  *
2799  * @return
2800  *   0 on success, a negative errno value otherwise and rte_errno is set.
2801  */
2802 static int
2803 mlx5_fdir_filter_update(struct rte_eth_dev *dev,
2804                         const struct rte_eth_fdir_filter *fdir_filter)
2805 {
2806         int ret;
2807
2808         ret = mlx5_fdir_filter_delete(dev, fdir_filter);
2809         if (ret)
2810                 return ret;
2811         return mlx5_fdir_filter_add(dev, fdir_filter);
2812 }
2813
2814 /**
2815  * Flush all filters.
2816  *
2817  * @param dev
2818  *   Pointer to Ethernet device.
2819  */
2820 static void
2821 mlx5_fdir_filter_flush(struct rte_eth_dev *dev)
2822 {
2823         struct priv *priv = dev->data->dev_private;
2824
2825         mlx5_flow_list_flush(dev, &priv->flows);
2826 }
2827
2828 /**
2829  * Get flow director information.
2830  *
2831  * @param dev
2832  *   Pointer to Ethernet device.
2833  * @param[out] fdir_info
2834  *   Resulting flow director information.
2835  */
2836 static void
2837 mlx5_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info)
2838 {
2839         struct rte_eth_fdir_masks *mask =
2840                 &dev->data->dev_conf.fdir_conf.mask;
2841
2842         fdir_info->mode = dev->data->dev_conf.fdir_conf.mode;
2843         fdir_info->guarant_spc = 0;
2844         rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
2845         fdir_info->max_flexpayload = 0;
2846         fdir_info->flow_types_mask[0] = 0;
2847         fdir_info->flex_payload_unit = 0;
2848         fdir_info->max_flex_payload_segment_num = 0;
2849         fdir_info->flex_payload_limit = 0;
2850         memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
2851 }
2852
2853 /**
2854  * Deal with flow director operations.
2855  *
2856  * @param dev
2857  *   Pointer to Ethernet device.
2858  * @param filter_op
2859  *   Operation to perform.
2860  * @param arg
2861  *   Pointer to operation-specific structure.
2862  *
2863  * @return
2864  *   0 on success, a negative errno value otherwise and rte_errno is set.
2865  */
2866 static int
2867 mlx5_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op,
2868                     void *arg)
2869 {
2870         enum rte_fdir_mode fdir_mode =
2871                 dev->data->dev_conf.fdir_conf.mode;
2872
2873         if (filter_op == RTE_ETH_FILTER_NOP)
2874                 return 0;
2875         if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
2876             fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
2877                 DRV_LOG(ERR, "port %u flow director mode %d not supported",
2878                         dev->data->port_id, fdir_mode);
2879                 rte_errno = EINVAL;
2880                 return -rte_errno;
2881         }
2882         switch (filter_op) {
2883         case RTE_ETH_FILTER_ADD:
2884                 return mlx5_fdir_filter_add(dev, arg);
2885         case RTE_ETH_FILTER_UPDATE:
2886                 return mlx5_fdir_filter_update(dev, arg);
2887         case RTE_ETH_FILTER_DELETE:
2888                 return mlx5_fdir_filter_delete(dev, arg);
2889         case RTE_ETH_FILTER_FLUSH:
2890                 mlx5_fdir_filter_flush(dev);
2891                 break;
2892         case RTE_ETH_FILTER_INFO:
2893                 mlx5_fdir_info_get(dev, arg);
2894                 break;
2895         default:
2896                 DRV_LOG(DEBUG, "port %u unknown operation %u",
2897                         dev->data->port_id, filter_op);
2898                 rte_errno = EINVAL;
2899                 return -rte_errno;
2900         }
2901         return 0;
2902 }
2903
2904 /**
2905  * Manage filter operations.
2906  *
2907  * @param dev
2908  *   Pointer to Ethernet device structure.
2909  * @param filter_type
2910  *   Filter type.
2911  * @param filter_op
2912  *   Operation to perform.
2913  * @param arg
2914  *   Pointer to operation-specific structure.
2915  *
2916  * @return
2917  *   0 on success, a negative errno value otherwise and rte_errno is set.
2918  */
2919 int
2920 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
2921                      enum rte_filter_type filter_type,
2922                      enum rte_filter_op filter_op,
2923                      void *arg)
2924 {
2925         switch (filter_type) {
2926         case RTE_ETH_FILTER_GENERIC:
2927                 if (filter_op != RTE_ETH_FILTER_GET) {
2928                         rte_errno = EINVAL;
2929                         return -rte_errno;
2930                 }
2931                 *(const void **)arg = &mlx5_flow_ops;
2932                 return 0;
2933         case RTE_ETH_FILTER_FDIR:
2934                 return mlx5_fdir_ctrl_func(dev, filter_op, arg);
2935         default:
2936                 DRV_LOG(ERR, "port %u filter type (%d) not supported",
2937                         dev->data->port_id, filter_type);
2938                 rte_errno = ENOTSUP;
2939                 return -rte_errno;
2940         }
2941         return 0;
2942 }