net/mlx5: add RSS flow action
[dpdk.git] / drivers / net / mlx5 / mlx5_flow.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2016 6WIND S.A.
3  * Copyright 2016 Mellanox Technologies, Ltd
4  */
5
6 #include <sys/queue.h>
7 #include <stdint.h>
8 #include <string.h>
9
10 /* Verbs header. */
11 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
12 #ifdef PEDANTIC
13 #pragma GCC diagnostic ignored "-Wpedantic"
14 #endif
15 #include <infiniband/verbs.h>
16 #ifdef PEDANTIC
17 #pragma GCC diagnostic error "-Wpedantic"
18 #endif
19
20 #include <rte_common.h>
21 #include <rte_ether.h>
22 #include <rte_eth_ctrl.h>
23 #include <rte_ethdev_driver.h>
24 #include <rte_flow.h>
25 #include <rte_flow_driver.h>
26 #include <rte_malloc.h>
27 #include <rte_ip.h>
28
29 #include "mlx5.h"
30 #include "mlx5_defs.h"
31 #include "mlx5_prm.h"
32 #include "mlx5_glue.h"
33
34 /* Dev ops structure defined in mlx5.c */
35 extern const struct eth_dev_ops mlx5_dev_ops;
36 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
37
38 /* Pattern Layer bits. */
39 #define MLX5_FLOW_LAYER_OUTER_L2 (1u << 0)
40 #define MLX5_FLOW_LAYER_OUTER_L3_IPV4 (1u << 1)
41 #define MLX5_FLOW_LAYER_OUTER_L3_IPV6 (1u << 2)
42 #define MLX5_FLOW_LAYER_OUTER_L4_UDP (1u << 3)
43 #define MLX5_FLOW_LAYER_OUTER_L4_TCP (1u << 4)
44 #define MLX5_FLOW_LAYER_OUTER_VLAN (1u << 5)
45 /* Masks. */
46 #define MLX5_FLOW_LAYER_OUTER_L3 \
47         (MLX5_FLOW_LAYER_OUTER_L3_IPV4 | MLX5_FLOW_LAYER_OUTER_L3_IPV6)
48 #define MLX5_FLOW_LAYER_OUTER_L4 \
49         (MLX5_FLOW_LAYER_OUTER_L4_UDP | MLX5_FLOW_LAYER_OUTER_L4_TCP)
50
51 /* Actions that modify the fate of matching traffic. */
52 #define MLX5_FLOW_FATE_DROP (1u << 0)
53 #define MLX5_FLOW_FATE_QUEUE (1u << 1)
54 #define MLX5_FLOW_FATE_RSS (1u << 2)
55
56 /* Modify a packet. */
57 #define MLX5_FLOW_MOD_FLAG (1u << 0)
58 #define MLX5_FLOW_MOD_MARK (1u << 1)
59
60 /* possible L3 layers protocols filtering. */
61 #define MLX5_IP_PROTOCOL_TCP 6
62 #define MLX5_IP_PROTOCOL_UDP 17
63
64 /* Priority reserved for default flows. */
65 #define MLX5_FLOW_PRIO_RSVD ((uint32_t)-1)
66
67 enum mlx5_expansion {
68         MLX5_EXPANSION_ROOT,
69         MLX5_EXPANSION_ETH,
70         MLX5_EXPANSION_IPV4,
71         MLX5_EXPANSION_IPV4_UDP,
72         MLX5_EXPANSION_IPV4_TCP,
73         MLX5_EXPANSION_IPV6,
74         MLX5_EXPANSION_IPV6_UDP,
75         MLX5_EXPANSION_IPV6_TCP,
76 };
77
78 /** Supported expansion of items. */
79 static const struct rte_flow_expand_node mlx5_support_expansion[] = {
80         [MLX5_EXPANSION_ROOT] = {
81                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_ETH,
82                                                  MLX5_EXPANSION_IPV4,
83                                                  MLX5_EXPANSION_IPV6),
84                 .type = RTE_FLOW_ITEM_TYPE_END,
85         },
86         [MLX5_EXPANSION_ETH] = {
87                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4,
88                                                  MLX5_EXPANSION_IPV6),
89                 .type = RTE_FLOW_ITEM_TYPE_ETH,
90         },
91         [MLX5_EXPANSION_IPV4] = {
92                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV4_UDP,
93                                                  MLX5_EXPANSION_IPV4_TCP),
94                 .type = RTE_FLOW_ITEM_TYPE_IPV4,
95                 .rss_types = ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
96                         ETH_RSS_NONFRAG_IPV4_OTHER,
97         },
98         [MLX5_EXPANSION_IPV4_UDP] = {
99                 .type = RTE_FLOW_ITEM_TYPE_UDP,
100                 .rss_types = ETH_RSS_NONFRAG_IPV4_UDP,
101         },
102         [MLX5_EXPANSION_IPV4_TCP] = {
103                 .type = RTE_FLOW_ITEM_TYPE_TCP,
104                 .rss_types = ETH_RSS_NONFRAG_IPV4_TCP,
105         },
106         [MLX5_EXPANSION_IPV6] = {
107                 .next = RTE_FLOW_EXPAND_RSS_NEXT(MLX5_EXPANSION_IPV6_UDP,
108                                                  MLX5_EXPANSION_IPV6_TCP),
109                 .type = RTE_FLOW_ITEM_TYPE_IPV6,
110                 .rss_types = ETH_RSS_IPV6 | ETH_RSS_FRAG_IPV6 |
111                         ETH_RSS_NONFRAG_IPV6_OTHER,
112         },
113         [MLX5_EXPANSION_IPV6_UDP] = {
114                 .type = RTE_FLOW_ITEM_TYPE_UDP,
115                 .rss_types = ETH_RSS_NONFRAG_IPV6_UDP,
116         },
117         [MLX5_EXPANSION_IPV6_TCP] = {
118                 .type = RTE_FLOW_ITEM_TYPE_TCP,
119                 .rss_types = ETH_RSS_NONFRAG_IPV6_TCP,
120         },
121 };
122
123 /** Handles information leading to a drop fate. */
124 struct mlx5_flow_verbs {
125         LIST_ENTRY(mlx5_flow_verbs) next;
126         unsigned int size; /**< Size of the attribute. */
127         struct {
128                 struct ibv_flow_attr *attr;
129                 /**< Pointer to the Specification buffer. */
130                 uint8_t *specs; /**< Pointer to the specifications. */
131         };
132         struct ibv_flow *flow; /**< Verbs flow pointer. */
133         struct mlx5_hrxq *hrxq; /**< Hash Rx queue object. */
134         uint64_t hash_fields; /**< Verbs hash Rx queue hash fields. */
135 };
136
137 /* Flow structure. */
138 struct rte_flow {
139         TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
140         struct rte_flow_attr attributes; /**< User flow attribute. */
141         uint32_t l3_protocol_en:1; /**< Protocol filtering requested. */
142         uint32_t layers;
143         /**< Bit-fields of present layers see MLX5_FLOW_LAYER_*. */
144         uint32_t modifier;
145         /**< Bit-fields of present modifier see MLX5_FLOW_MOD_*. */
146         uint32_t fate;
147         /**< Bit-fields of present fate see MLX5_FLOW_FATE_*. */
148         uint8_t l3_protocol; /**< valid when l3_protocol_en is set. */
149         LIST_HEAD(verbs, mlx5_flow_verbs) verbs; /**< Verbs flows list. */
150         struct mlx5_flow_verbs *cur_verbs;
151         /**< Current Verbs flow structure being filled. */
152         struct rte_flow_action_rss rss;/**< RSS context. */
153         uint8_t key[MLX5_RSS_HASH_KEY_LEN]; /**< RSS hash key. */
154         uint16_t (*queue)[]; /**< Destination queues to redirect traffic to. */
155 };
156
157 static const struct rte_flow_ops mlx5_flow_ops = {
158         .validate = mlx5_flow_validate,
159         .create = mlx5_flow_create,
160         .destroy = mlx5_flow_destroy,
161         .flush = mlx5_flow_flush,
162         .isolate = mlx5_flow_isolate,
163 };
164
165 /* Convert FDIR request to Generic flow. */
166 struct mlx5_fdir {
167         struct rte_flow_attr attr;
168         struct rte_flow_action actions[2];
169         struct rte_flow_item items[4];
170         struct rte_flow_item_eth l2;
171         struct rte_flow_item_eth l2_mask;
172         union {
173                 struct rte_flow_item_ipv4 ipv4;
174                 struct rte_flow_item_ipv6 ipv6;
175         } l3;
176         union {
177                 struct rte_flow_item_ipv4 ipv4;
178                 struct rte_flow_item_ipv6 ipv6;
179         } l3_mask;
180         union {
181                 struct rte_flow_item_udp udp;
182                 struct rte_flow_item_tcp tcp;
183         } l4;
184         union {
185                 struct rte_flow_item_udp udp;
186                 struct rte_flow_item_tcp tcp;
187         } l4_mask;
188         struct rte_flow_action_queue queue;
189 };
190
191 /* Verbs specification header. */
192 struct ibv_spec_header {
193         enum ibv_flow_spec_type type;
194         uint16_t size;
195 };
196
197 /*
198  * Number of sub priorities.
199  * For each kind of pattern matching i.e. L2, L3, L4 to have a correct
200  * matching on the NIC (firmware dependent) L4 most have the higher priority
201  * followed by L3 and ending with L2.
202  */
203 #define MLX5_PRIORITY_MAP_L2 2
204 #define MLX5_PRIORITY_MAP_L3 1
205 #define MLX5_PRIORITY_MAP_L4 0
206 #define MLX5_PRIORITY_MAP_MAX 3
207
208 /* Map of Verbs to Flow priority with 8 Verbs priorities. */
209 static const uint32_t priority_map_3[][MLX5_PRIORITY_MAP_MAX] = {
210         { 0, 1, 2 }, { 2, 3, 4 }, { 5, 6, 7 },
211 };
212
213 /* Map of Verbs to Flow priority with 16 Verbs priorities. */
214 static const uint32_t priority_map_5[][MLX5_PRIORITY_MAP_MAX] = {
215         { 0, 1, 2 }, { 3, 4, 5 }, { 6, 7, 8 },
216         { 9, 10, 11 }, { 12, 13, 14 },
217 };
218
219 /**
220  * Discover the maximum number of priority available.
221  *
222  * @param[in] dev
223  *   Pointer to Ethernet device.
224  *
225  * @return
226  *   number of supported flow priority on success, a negative errno
227  *   value otherwise and rte_errno is set.
228  */
229 int
230 mlx5_flow_discover_priorities(struct rte_eth_dev *dev)
231 {
232         struct {
233                 struct ibv_flow_attr attr;
234                 struct ibv_flow_spec_eth eth;
235                 struct ibv_flow_spec_action_drop drop;
236         } flow_attr = {
237                 .attr = {
238                         .num_of_specs = 2,
239                 },
240                 .eth = {
241                         .type = IBV_FLOW_SPEC_ETH,
242                         .size = sizeof(struct ibv_flow_spec_eth),
243                 },
244                 .drop = {
245                         .size = sizeof(struct ibv_flow_spec_action_drop),
246                         .type = IBV_FLOW_SPEC_ACTION_DROP,
247                 },
248         };
249         struct ibv_flow *flow;
250         struct mlx5_hrxq *drop = mlx5_hrxq_drop_new(dev);
251         uint16_t vprio[] = { 8, 16 };
252         int i;
253         int priority = 0;
254
255         if (!drop) {
256                 rte_errno = ENOTSUP;
257                 return -rte_errno;
258         }
259         for (i = 0; i != RTE_DIM(vprio); i++) {
260                 flow_attr.attr.priority = vprio[i] - 1;
261                 flow = mlx5_glue->create_flow(drop->qp, &flow_attr.attr);
262                 if (!flow)
263                         break;
264                 claim_zero(mlx5_glue->destroy_flow(flow));
265                 priority = vprio[i];
266         }
267         switch (priority) {
268         case 8:
269                 priority = RTE_DIM(priority_map_3);
270                 break;
271         case 16:
272                 priority = RTE_DIM(priority_map_5);
273                 break;
274         default:
275                 rte_errno = ENOTSUP;
276                 DRV_LOG(ERR,
277                         "port %u verbs maximum priority: %d expected 8/16",
278                         dev->data->port_id, vprio[i]);
279                 return -rte_errno;
280         }
281         mlx5_hrxq_drop_release(dev);
282         DRV_LOG(INFO, "port %u flow maximum priority: %d",
283                 dev->data->port_id, priority);
284         return priority;
285 }
286
287 /**
288  * Adjust flow priority.
289  *
290  * @param dev
291  *   Pointer to Ethernet device.
292  * @param flow
293  *   Pointer to an rte flow.
294  */
295 static void
296 mlx5_flow_adjust_priority(struct rte_eth_dev *dev, struct rte_flow *flow)
297 {
298         struct priv *priv = dev->data->dev_private;
299         uint32_t priority = flow->attributes.priority;
300         uint32_t subpriority = flow->cur_verbs->attr->priority;
301
302         switch (priv->config.flow_prio) {
303         case RTE_DIM(priority_map_3):
304                 priority = priority_map_3[priority][subpriority];
305                 break;
306         case RTE_DIM(priority_map_5):
307                 priority = priority_map_5[priority][subpriority];
308                 break;
309         }
310         flow->cur_verbs->attr->priority = priority;
311 }
312
313 /**
314  * Verify the @p attributes will be correctly understood by the NIC and store
315  * them in the @p flow if everything is correct.
316  *
317  * @param[in] dev
318  *   Pointer to Ethernet device.
319  * @param[in] attributes
320  *   Pointer to flow attributes
321  * @param[in, out] flow
322  *   Pointer to the rte_flow structure.
323  * @param[out] error
324  *   Pointer to error structure.
325  *
326  * @return
327  *   0 on success, a negative errno value otherwise and rte_errno is set.
328  */
329 static int
330 mlx5_flow_attributes(struct rte_eth_dev *dev,
331                      const struct rte_flow_attr *attributes,
332                      struct rte_flow *flow,
333                      struct rte_flow_error *error)
334 {
335         uint32_t priority_max =
336                 ((struct priv *)dev->data->dev_private)->config.flow_prio - 1;
337
338         if (attributes->group)
339                 return rte_flow_error_set(error, ENOTSUP,
340                                           RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
341                                           NULL,
342                                           "groups is not supported");
343         if (attributes->priority != MLX5_FLOW_PRIO_RSVD &&
344             attributes->priority >= priority_max)
345                 return rte_flow_error_set(error, ENOTSUP,
346                                           RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
347                                           NULL,
348                                           "priority out of range");
349         if (attributes->egress)
350                 return rte_flow_error_set(error, ENOTSUP,
351                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
352                                           NULL,
353                                           "egress is not supported");
354         if (attributes->transfer)
355                 return rte_flow_error_set(error, ENOTSUP,
356                                           RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
357                                           NULL,
358                                           "transfer is not supported");
359         if (!attributes->ingress)
360                 return rte_flow_error_set(error, ENOTSUP,
361                                           RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
362                                           NULL,
363                                           "ingress attribute is mandatory");
364         flow->attributes = *attributes;
365         if (attributes->priority == MLX5_FLOW_PRIO_RSVD)
366                 flow->attributes.priority = priority_max;
367         return 0;
368 }
369
370 /**
371  * Verify the @p item specifications (spec, last, mask) are compatible with the
372  * NIC capabilities.
373  *
374  * @param[in] item
375  *   Item specification.
376  * @param[in] mask
377  *   @p item->mask or flow default bit-masks.
378  * @param[in] nic_mask
379  *   Bit-masks covering supported fields by the NIC to compare with user mask.
380  * @param[in] size
381  *   Bit-masks size in bytes.
382  * @param[out] error
383  *   Pointer to error structure.
384  *
385  * @return
386  *   0 on success, a negative errno value otherwise and rte_errno is set.
387  */
388 static int
389 mlx5_flow_item_acceptable(const struct rte_flow_item *item,
390                           const uint8_t *mask,
391                           const uint8_t *nic_mask,
392                           unsigned int size,
393                           struct rte_flow_error *error)
394 {
395         unsigned int i;
396
397         assert(nic_mask);
398         for (i = 0; i < size; ++i)
399                 if ((nic_mask[i] | mask[i]) != nic_mask[i])
400                         return rte_flow_error_set(error, ENOTSUP,
401                                                   RTE_FLOW_ERROR_TYPE_ITEM,
402                                                   item,
403                                                   "mask enables non supported"
404                                                   " bits");
405         if (!item->spec && (item->mask || item->last))
406                 return rte_flow_error_set(error, EINVAL,
407                                           RTE_FLOW_ERROR_TYPE_ITEM,
408                                           item,
409                                           "mask/last without a spec is not"
410                                           " supported");
411         if (item->spec && item->last) {
412                 uint8_t spec[size];
413                 uint8_t last[size];
414                 unsigned int i;
415                 int ret;
416
417                 for (i = 0; i < size; ++i) {
418                         spec[i] = ((const uint8_t *)item->spec)[i] & mask[i];
419                         last[i] = ((const uint8_t *)item->last)[i] & mask[i];
420                 }
421                 ret = memcmp(spec, last, size);
422                 if (ret != 0)
423                         return rte_flow_error_set(error, ENOTSUP,
424                                                   RTE_FLOW_ERROR_TYPE_ITEM,
425                                                   item,
426                                                   "range is not supported");
427         }
428         return 0;
429 }
430
431 /**
432  * Add a verbs item specification into @p flow.
433  *
434  * @param[in, out] flow
435  *   Pointer to flow structure.
436  * @param[in] src
437  *   Create specification.
438  * @param[in] size
439  *   Size in bytes of the specification to copy.
440  */
441 static void
442 mlx5_flow_spec_verbs_add(struct rte_flow *flow, void *src, unsigned int size)
443 {
444         struct mlx5_flow_verbs *verbs = flow->cur_verbs;
445
446         if (verbs->specs) {
447                 void *dst;
448
449                 dst = (void *)(verbs->specs + verbs->size);
450                 memcpy(dst, src, size);
451                 ++verbs->attr->num_of_specs;
452         }
453         verbs->size += size;
454 }
455
456 /**
457  * Convert the @p item into a Verbs specification after ensuring the NIC
458  * will understand and process it correctly.
459  * If the necessary size for the conversion is greater than the @p flow_size,
460  * nothing is written in @p flow, the validation is still performed.
461  *
462  * @param[in] item
463  *   Item specification.
464  * @param[in, out] flow
465  *   Pointer to flow structure.
466  * @param[in] flow_size
467  *   Size in bytes of the available space in @p flow, if too small, nothing is
468  *   written.
469  * @param[out] error
470  *   Pointer to error structure.
471  *
472  * @return
473  *   On success the number of bytes consumed/necessary, if the returned value
474  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
475  *   otherwise another call with this returned memory size should be done.
476  *   On error, a negative errno value is returned and rte_errno is set.
477  */
478 static int
479 mlx5_flow_item_eth(const struct rte_flow_item *item, struct rte_flow *flow,
480                    const size_t flow_size, struct rte_flow_error *error)
481 {
482         const struct rte_flow_item_eth *spec = item->spec;
483         const struct rte_flow_item_eth *mask = item->mask;
484         const struct rte_flow_item_eth nic_mask = {
485                 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
486                 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
487                 .type = RTE_BE16(0xffff),
488         };
489         const unsigned int size = sizeof(struct ibv_flow_spec_eth);
490         struct ibv_flow_spec_eth eth = {
491                 .type = IBV_FLOW_SPEC_ETH,
492                 .size = size,
493         };
494         int ret;
495
496         if (flow->layers & MLX5_FLOW_LAYER_OUTER_L2)
497                 return rte_flow_error_set(error, ENOTSUP,
498                                           RTE_FLOW_ERROR_TYPE_ITEM,
499                                           item,
500                                           "L2 layers already configured");
501         if (!mask)
502                 mask = &rte_flow_item_eth_mask;
503         ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
504                                         (const uint8_t *)&nic_mask,
505                                         sizeof(struct rte_flow_item_eth),
506                                         error);
507         if (ret)
508                 return ret;
509         flow->layers |= MLX5_FLOW_LAYER_OUTER_L2;
510         if (size > flow_size)
511                 return size;
512         if (spec) {
513                 unsigned int i;
514
515                 memcpy(&eth.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
516                 memcpy(&eth.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
517                 eth.val.ether_type = spec->type;
518                 memcpy(&eth.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
519                 memcpy(&eth.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
520                 eth.mask.ether_type = mask->type;
521                 /* Remove unwanted bits from values. */
522                 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
523                         eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
524                         eth.val.src_mac[i] &= eth.mask.src_mac[i];
525                 }
526                 eth.val.ether_type &= eth.mask.ether_type;
527         }
528         flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2;
529         mlx5_flow_spec_verbs_add(flow, &eth, size);
530         return size;
531 }
532
533 /**
534  * Update the VLAN tag in the Verbs Ethernet specification.
535  *
536  * @param[in, out] attr
537  *   Pointer to Verbs attributes structure.
538  * @param[in] eth
539  *   Verbs structure containing the VLAN information to copy.
540  */
541 static void
542 mlx5_flow_item_vlan_update(struct ibv_flow_attr *attr,
543                            struct ibv_flow_spec_eth *eth)
544 {
545         unsigned int i;
546         enum ibv_flow_spec_type search = IBV_FLOW_SPEC_ETH;
547         struct ibv_spec_header *hdr = (struct ibv_spec_header *)
548                 ((uint8_t *)attr + sizeof(struct ibv_flow_attr));
549
550         for (i = 0; i != attr->num_of_specs; ++i) {
551                 if (hdr->type == search) {
552                         struct ibv_flow_spec_eth *e =
553                                 (struct ibv_flow_spec_eth *)hdr;
554
555                         e->val.vlan_tag = eth->val.vlan_tag;
556                         e->mask.vlan_tag = eth->mask.vlan_tag;
557                         e->val.ether_type = eth->val.ether_type;
558                         e->mask.ether_type = eth->mask.ether_type;
559                         break;
560                 }
561                 hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size);
562         }
563 }
564
565 /**
566  * Convert the @p item into @p flow (or by updating the already present
567  * Ethernet Verbs) specification after ensuring the NIC will understand and
568  * process it correctly.
569  * If the necessary size for the conversion is greater than the @p flow_size,
570  * nothing is written in @p flow, the validation is still performed.
571  *
572  * @param[in] item
573  *   Item specification.
574  * @param[in, out] flow
575  *   Pointer to flow structure.
576  * @param[in] flow_size
577  *   Size in bytes of the available space in @p flow, if too small, nothing is
578  *   written.
579  * @param[out] error
580  *   Pointer to error structure.
581  *
582  * @return
583  *   On success the number of bytes consumed/necessary, if the returned value
584  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
585  *   otherwise another call with this returned memory size should be done.
586  *   On error, a negative errno value is returned and rte_errno is set.
587  */
588 static int
589 mlx5_flow_item_vlan(const struct rte_flow_item *item, struct rte_flow *flow,
590                     const size_t flow_size, struct rte_flow_error *error)
591 {
592         const struct rte_flow_item_vlan *spec = item->spec;
593         const struct rte_flow_item_vlan *mask = item->mask;
594         const struct rte_flow_item_vlan nic_mask = {
595                 .tci = RTE_BE16(0x0fff),
596                 .inner_type = RTE_BE16(0xffff),
597         };
598         unsigned int size = sizeof(struct ibv_flow_spec_eth);
599         struct mlx5_flow_verbs *verbs = flow->cur_verbs;
600         struct ibv_flow_spec_eth eth = {
601                 .type = IBV_FLOW_SPEC_ETH,
602                 .size = size,
603         };
604         int ret;
605         const uint32_t l34m = MLX5_FLOW_LAYER_OUTER_L3 |
606                         MLX5_FLOW_LAYER_OUTER_L4;
607         const uint32_t vlanm = MLX5_FLOW_LAYER_OUTER_VLAN;
608         const uint32_t l2m = MLX5_FLOW_LAYER_OUTER_L2;
609
610         if (flow->layers & vlanm)
611                 return rte_flow_error_set(error, ENOTSUP,
612                                           RTE_FLOW_ERROR_TYPE_ITEM,
613                                           item,
614                                           "VLAN layer already configured");
615         else if ((flow->layers & l34m) != 0)
616                 return rte_flow_error_set(error, ENOTSUP,
617                                           RTE_FLOW_ERROR_TYPE_ITEM,
618                                           item,
619                                           "L2 layer cannot follow L3/L4 layer");
620         if (!mask)
621                 mask = &rte_flow_item_vlan_mask;
622         ret = mlx5_flow_item_acceptable
623                 (item, (const uint8_t *)mask,
624                  (const uint8_t *)&nic_mask,
625                  sizeof(struct rte_flow_item_vlan), error);
626         if (ret)
627                 return ret;
628         if (spec) {
629                 eth.val.vlan_tag = spec->tci;
630                 eth.mask.vlan_tag = mask->tci;
631                 eth.val.vlan_tag &= eth.mask.vlan_tag;
632                 eth.val.ether_type = spec->inner_type;
633                 eth.mask.ether_type = mask->inner_type;
634                 eth.val.ether_type &= eth.mask.ether_type;
635         }
636         /*
637          * From verbs perspective an empty VLAN is equivalent
638          * to a packet without VLAN layer.
639          */
640         if (!eth.mask.vlan_tag)
641                 return rte_flow_error_set(error, EINVAL,
642                                           RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
643                                           item->spec,
644                                           "VLAN cannot be empty");
645         if (!(flow->layers & l2m)) {
646                 if (size <= flow_size) {
647                         flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L2;
648                         mlx5_flow_spec_verbs_add(flow, &eth, size);
649                 }
650         } else {
651                 if (verbs->attr)
652                         mlx5_flow_item_vlan_update(verbs->attr, &eth);
653                 size = 0; /* Only an update is done in eth specification. */
654         }
655         flow->layers |= MLX5_FLOW_LAYER_OUTER_L2 | MLX5_FLOW_LAYER_OUTER_VLAN;
656         return size;
657 }
658
659 /**
660  * Convert the @p item into a Verbs specification after ensuring the NIC
661  * will understand and process it correctly.
662  * If the necessary size for the conversion is greater than the @p flow_size,
663  * nothing is written in @p flow, the validation is still performed.
664  *
665  * @param[in] item
666  *   Item specification.
667  * @param[in, out] flow
668  *   Pointer to flow structure.
669  * @param[in] flow_size
670  *   Size in bytes of the available space in @p flow, if too small, nothing is
671  *   written.
672  * @param[out] error
673  *   Pointer to error structure.
674  *
675  * @return
676  *   On success the number of bytes consumed/necessary, if the returned value
677  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
678  *   otherwise another call with this returned memory size should be done.
679  *   On error, a negative errno value is returned and rte_errno is set.
680  */
681 static int
682 mlx5_flow_item_ipv4(const struct rte_flow_item *item, struct rte_flow *flow,
683                     const size_t flow_size, struct rte_flow_error *error)
684 {
685         const struct rte_flow_item_ipv4 *spec = item->spec;
686         const struct rte_flow_item_ipv4 *mask = item->mask;
687         const struct rte_flow_item_ipv4 nic_mask = {
688                 .hdr = {
689                         .src_addr = RTE_BE32(0xffffffff),
690                         .dst_addr = RTE_BE32(0xffffffff),
691                         .type_of_service = 0xff,
692                         .next_proto_id = 0xff,
693                 },
694         };
695         unsigned int size = sizeof(struct ibv_flow_spec_ipv4_ext);
696         struct ibv_flow_spec_ipv4_ext ipv4 = {
697                 .type = IBV_FLOW_SPEC_IPV4_EXT,
698                 .size = size,
699         };
700         int ret;
701
702         if (flow->layers & MLX5_FLOW_LAYER_OUTER_L3)
703                 return rte_flow_error_set(error, ENOTSUP,
704                                           RTE_FLOW_ERROR_TYPE_ITEM,
705                                           item,
706                                           "multiple L3 layers not supported");
707         else if (flow->layers & MLX5_FLOW_LAYER_OUTER_L4)
708                 return rte_flow_error_set(error, ENOTSUP,
709                                           RTE_FLOW_ERROR_TYPE_ITEM,
710                                           item,
711                                           "L3 cannot follow an L4 layer.");
712         if (!mask)
713                 mask = &rte_flow_item_ipv4_mask;
714         ret = mlx5_flow_item_acceptable
715                 (item, (const uint8_t *)mask,
716                  (const uint8_t *)&nic_mask,
717                  sizeof(struct rte_flow_item_ipv4), error);
718         if (ret < 0)
719                 return ret;
720         flow->layers |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
721         if (spec) {
722                 ipv4.val = (struct ibv_flow_ipv4_ext_filter){
723                         .src_ip = spec->hdr.src_addr,
724                         .dst_ip = spec->hdr.dst_addr,
725                         .proto = spec->hdr.next_proto_id,
726                         .tos = spec->hdr.type_of_service,
727                 };
728                 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
729                         .src_ip = mask->hdr.src_addr,
730                         .dst_ip = mask->hdr.dst_addr,
731                         .proto = mask->hdr.next_proto_id,
732                         .tos = mask->hdr.type_of_service,
733                 };
734                 /* Remove unwanted bits from values. */
735                 ipv4.val.src_ip &= ipv4.mask.src_ip;
736                 ipv4.val.dst_ip &= ipv4.mask.dst_ip;
737                 ipv4.val.proto &= ipv4.mask.proto;
738                 ipv4.val.tos &= ipv4.mask.tos;
739         }
740         flow->l3_protocol_en = !!ipv4.mask.proto;
741         flow->l3_protocol = ipv4.val.proto;
742         if (size <= flow_size) {
743                 uint64_t hash_fields = IBV_RX_HASH_SRC_IPV4 |
744                         IBV_RX_HASH_DST_IPV4;
745
746                 if (!(flow->rss.types &
747                       (ETH_RSS_IPV4 | ETH_RSS_FRAG_IPV4 |
748                        ETH_RSS_NONFRAG_IPV4_OTHER)))
749                         hash_fields = 0;
750                 flow->cur_verbs->hash_fields |= hash_fields;
751                 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L3;
752                 mlx5_flow_spec_verbs_add(flow, &ipv4, size);
753         }
754         return size;
755 }
756
757 /**
758  * Convert the @p item into a Verbs specification after ensuring the NIC
759  * will understand and process it correctly.
760  * If the necessary size for the conversion is greater than the @p flow_size,
761  * nothing is written in @p flow, the validation is still performed.
762  *
763  * @param[in] item
764  *   Item specification.
765  * @param[in, out] flow
766  *   Pointer to flow structure.
767  * @param[in] flow_size
768  *   Size in bytes of the available space in @p flow, if too small, nothing is
769  *   written.
770  * @param[out] error
771  *   Pointer to error structure.
772  *
773  * @return
774  *   On success the number of bytes consumed/necessary, if the returned value
775  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
776  *   otherwise another call with this returned memory size should be done.
777  *   On error, a negative errno value is returned and rte_errno is set.
778  */
779 static int
780 mlx5_flow_item_ipv6(const struct rte_flow_item *item, struct rte_flow *flow,
781                     const size_t flow_size, struct rte_flow_error *error)
782 {
783         const struct rte_flow_item_ipv6 *spec = item->spec;
784         const struct rte_flow_item_ipv6 *mask = item->mask;
785         const struct rte_flow_item_ipv6 nic_mask = {
786                 .hdr = {
787                         .src_addr =
788                                 "\xff\xff\xff\xff\xff\xff\xff\xff"
789                                 "\xff\xff\xff\xff\xff\xff\xff\xff",
790                         .dst_addr =
791                                 "\xff\xff\xff\xff\xff\xff\xff\xff"
792                                 "\xff\xff\xff\xff\xff\xff\xff\xff",
793                         .vtc_flow = RTE_BE32(0xffffffff),
794                         .proto = 0xff,
795                         .hop_limits = 0xff,
796                 },
797         };
798         unsigned int size = sizeof(struct ibv_flow_spec_ipv6);
799         struct ibv_flow_spec_ipv6 ipv6 = {
800                 .type = IBV_FLOW_SPEC_IPV6,
801                 .size = size,
802         };
803         int ret;
804
805         if (flow->layers & MLX5_FLOW_LAYER_OUTER_L3)
806                 return rte_flow_error_set(error, ENOTSUP,
807                                           RTE_FLOW_ERROR_TYPE_ITEM,
808                                           item,
809                                           "multiple L3 layers not supported");
810         else if (flow->layers & MLX5_FLOW_LAYER_OUTER_L4)
811                 return rte_flow_error_set(error, ENOTSUP,
812                                           RTE_FLOW_ERROR_TYPE_ITEM,
813                                           item,
814                                           "L3 cannot follow an L4 layer.");
815         if (!mask)
816                 mask = &rte_flow_item_ipv6_mask;
817         ret = mlx5_flow_item_acceptable
818                 (item, (const uint8_t *)mask,
819                  (const uint8_t *)&nic_mask,
820                  sizeof(struct rte_flow_item_ipv6), error);
821         if (ret < 0)
822                 return ret;
823         flow->layers |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
824         if (spec) {
825                 unsigned int i;
826                 uint32_t vtc_flow_val;
827                 uint32_t vtc_flow_mask;
828
829                 memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
830                        RTE_DIM(ipv6.val.src_ip));
831                 memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
832                        RTE_DIM(ipv6.val.dst_ip));
833                 memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
834                        RTE_DIM(ipv6.mask.src_ip));
835                 memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
836                        RTE_DIM(ipv6.mask.dst_ip));
837                 vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
838                 vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
839                 ipv6.val.flow_label =
840                         rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >>
841                                          IPV6_HDR_FL_SHIFT);
842                 ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >>
843                                          IPV6_HDR_TC_SHIFT;
844                 ipv6.val.next_hdr = spec->hdr.proto;
845                 ipv6.val.hop_limit = spec->hdr.hop_limits;
846                 ipv6.mask.flow_label =
847                         rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >>
848                                          IPV6_HDR_FL_SHIFT);
849                 ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >>
850                                           IPV6_HDR_TC_SHIFT;
851                 ipv6.mask.next_hdr = mask->hdr.proto;
852                 ipv6.mask.hop_limit = mask->hdr.hop_limits;
853                 /* Remove unwanted bits from values. */
854                 for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
855                         ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
856                         ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
857                 }
858                 ipv6.val.flow_label &= ipv6.mask.flow_label;
859                 ipv6.val.traffic_class &= ipv6.mask.traffic_class;
860                 ipv6.val.next_hdr &= ipv6.mask.next_hdr;
861                 ipv6.val.hop_limit &= ipv6.mask.hop_limit;
862         }
863         flow->l3_protocol_en = !!ipv6.mask.next_hdr;
864         flow->l3_protocol = ipv6.val.next_hdr;
865         if (size <= flow_size) {
866                 uint64_t hash_fields = IBV_RX_HASH_SRC_IPV6 |
867                         IBV_RX_HASH_DST_IPV6;
868
869                 if (!(flow->rss.types &
870                       (ETH_RSS_IPV6 | ETH_RSS_NONFRAG_IPV6_OTHER)))
871                         hash_fields = 0;
872                 flow->cur_verbs->hash_fields |= hash_fields;
873                 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L3;
874                 mlx5_flow_spec_verbs_add(flow, &ipv6, size);
875         }
876         return size;
877 }
878
879 /**
880  * Convert the @p item into a Verbs specification after ensuring the NIC
881  * will understand and process it correctly.
882  * If the necessary size for the conversion is greater than the @p flow_size,
883  * nothing is written in @p flow, the validation is still performed.
884  *
885  * @param[in] item
886  *   Item specification.
887  * @param[in, out] flow
888  *   Pointer to flow structure.
889  * @param[in] flow_size
890  *   Size in bytes of the available space in @p flow, if too small, nothing is
891  *   written.
892  * @param[out] error
893  *   Pointer to error structure.
894  *
895  * @return
896  *   On success the number of bytes consumed/necessary, if the returned value
897  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
898  *   otherwise another call with this returned memory size should be done.
899  *   On error, a negative errno value is returned and rte_errno is set.
900  */
901 static int
902 mlx5_flow_item_udp(const struct rte_flow_item *item, struct rte_flow *flow,
903                    const size_t flow_size, struct rte_flow_error *error)
904 {
905         const struct rte_flow_item_udp *spec = item->spec;
906         const struct rte_flow_item_udp *mask = item->mask;
907         unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
908         struct ibv_flow_spec_tcp_udp udp = {
909                 .type = IBV_FLOW_SPEC_UDP,
910                 .size = size,
911         };
912         int ret;
913
914         if (flow->l3_protocol_en && flow->l3_protocol != MLX5_IP_PROTOCOL_UDP)
915                 return rte_flow_error_set(error, ENOTSUP,
916                                           RTE_FLOW_ERROR_TYPE_ITEM,
917                                           item,
918                                           "protocol filtering not compatible"
919                                           " with UDP layer");
920         if (!(flow->layers & MLX5_FLOW_LAYER_OUTER_L3))
921                 return rte_flow_error_set(error, ENOTSUP,
922                                           RTE_FLOW_ERROR_TYPE_ITEM,
923                                           item,
924                                           "L3 is mandatory to filter"
925                                           " on L4");
926         if (flow->layers & MLX5_FLOW_LAYER_OUTER_L4)
927                 return rte_flow_error_set(error, ENOTSUP,
928                                           RTE_FLOW_ERROR_TYPE_ITEM,
929                                           item,
930                                           "L4 layer is already"
931                                           " present");
932         if (!mask)
933                 mask = &rte_flow_item_udp_mask;
934         ret = mlx5_flow_item_acceptable
935                 (item, (const uint8_t *)mask,
936                  (const uint8_t *)&rte_flow_item_udp_mask,
937                  sizeof(struct rte_flow_item_udp), error);
938         if (ret < 0)
939                 return ret;
940         flow->layers |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
941         if (spec) {
942                 udp.val.dst_port = spec->hdr.dst_port;
943                 udp.val.src_port = spec->hdr.src_port;
944                 udp.mask.dst_port = mask->hdr.dst_port;
945                 udp.mask.src_port = mask->hdr.src_port;
946                 /* Remove unwanted bits from values. */
947                 udp.val.src_port &= udp.mask.src_port;
948                 udp.val.dst_port &= udp.mask.dst_port;
949         }
950         if (size <= flow_size) {
951                 uint64_t hash_fields = IBV_RX_HASH_SRC_PORT_UDP |
952                         IBV_RX_HASH_DST_PORT_UDP;
953
954                 if (!(flow->rss.types & ETH_RSS_UDP))
955                         hash_fields = 0;
956                 flow->cur_verbs->hash_fields |= hash_fields;
957                 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L4;
958                 mlx5_flow_spec_verbs_add(flow, &udp, size);
959         }
960         return size;
961 }
962
963 /**
964  * Convert the @p item into a Verbs specification after ensuring the NIC
965  * will understand and process it correctly.
966  * If the necessary size for the conversion is greater than the @p flow_size,
967  * nothing is written in @p flow, the validation is still performed.
968  *
969  * @param[in] item
970  *   Item specification.
971  * @param[in, out] flow
972  *   Pointer to flow structure.
973  * @param[in] flow_size
974  *   Size in bytes of the available space in @p flow, if too small, nothing is
975  *   written.
976  * @param[out] error
977  *   Pointer to error structure.
978  *
979  * @return
980  *   On success the number of bytes consumed/necessary, if the returned value
981  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
982  *   otherwise another call with this returned memory size should be done.
983  *   On error, a negative errno value is returned and rte_errno is set.
984  */
985 static int
986 mlx5_flow_item_tcp(const struct rte_flow_item *item, struct rte_flow *flow,
987                    const size_t flow_size, struct rte_flow_error *error)
988 {
989         const struct rte_flow_item_tcp *spec = item->spec;
990         const struct rte_flow_item_tcp *mask = item->mask;
991         unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
992         struct ibv_flow_spec_tcp_udp tcp = {
993                 .type = IBV_FLOW_SPEC_TCP,
994                 .size = size,
995         };
996         int ret;
997
998         if (flow->l3_protocol_en && flow->l3_protocol != MLX5_IP_PROTOCOL_TCP)
999                 return rte_flow_error_set(error, ENOTSUP,
1000                                           RTE_FLOW_ERROR_TYPE_ITEM,
1001                                           item,
1002                                           "protocol filtering not compatible"
1003                                           " with TCP layer");
1004         if (!(flow->layers & MLX5_FLOW_LAYER_OUTER_L3))
1005                 return rte_flow_error_set(error, ENOTSUP,
1006                                           RTE_FLOW_ERROR_TYPE_ITEM,
1007                                           item,
1008                                           "L3 is mandatory to filter on L4");
1009         if (flow->layers & MLX5_FLOW_LAYER_OUTER_L4)
1010                 return rte_flow_error_set(error, ENOTSUP,
1011                                           RTE_FLOW_ERROR_TYPE_ITEM,
1012                                           item,
1013                                           "L4 layer is already present");
1014         if (!mask)
1015                 mask = &rte_flow_item_tcp_mask;
1016         ret = mlx5_flow_item_acceptable
1017                 (item, (const uint8_t *)mask,
1018                  (const uint8_t *)&rte_flow_item_tcp_mask,
1019                  sizeof(struct rte_flow_item_tcp), error);
1020         if (ret < 0)
1021                 return ret;
1022         flow->layers |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
1023         if (spec) {
1024                 tcp.val.dst_port = spec->hdr.dst_port;
1025                 tcp.val.src_port = spec->hdr.src_port;
1026                 tcp.mask.dst_port = mask->hdr.dst_port;
1027                 tcp.mask.src_port = mask->hdr.src_port;
1028                 /* Remove unwanted bits from values. */
1029                 tcp.val.src_port &= tcp.mask.src_port;
1030                 tcp.val.dst_port &= tcp.mask.dst_port;
1031         }
1032         if (size <= flow_size) {
1033                 uint64_t hash_fields = IBV_RX_HASH_SRC_PORT_TCP |
1034                         IBV_RX_HASH_DST_PORT_TCP;
1035
1036                 if (!(flow->rss.types & ETH_RSS_TCP))
1037                         hash_fields = 0;
1038                 flow->cur_verbs->hash_fields |= hash_fields;
1039                 flow->cur_verbs->attr->priority = MLX5_PRIORITY_MAP_L4;
1040                 mlx5_flow_spec_verbs_add(flow, &tcp, size);
1041         }
1042         return size;
1043 }
1044
1045 /**
1046  * Convert the @p pattern into a Verbs specifications after ensuring the NIC
1047  * will understand and process it correctly.
1048  * The conversion is performed item per item, each of them is written into
1049  * the @p flow if its size is lesser or equal to @p flow_size.
1050  * Validation and memory consumption computation are still performed until the
1051  * end of @p pattern, unless an error is encountered.
1052  *
1053  * @param[in] pattern
1054  *   Flow pattern.
1055  * @param[in, out] flow
1056  *   Pointer to the rte_flow structure.
1057  * @param[in] flow_size
1058  *   Size in bytes of the available space in @p flow, if too small some
1059  *   garbage may be present.
1060  * @param[out] error
1061  *   Pointer to error structure.
1062  *
1063  * @return
1064  *   On success the number of bytes consumed/necessary, if the returned value
1065  *   is lesser or equal to @p flow_size, the @pattern  has fully been
1066  *   converted, otherwise another call with this returned memory size should
1067  *   be done.
1068  *   On error, a negative errno value is returned and rte_errno is set.
1069  */
1070 static int
1071 mlx5_flow_items(const struct rte_flow_item pattern[],
1072                 struct rte_flow *flow, const size_t flow_size,
1073                 struct rte_flow_error *error)
1074 {
1075         int remain = flow_size;
1076         size_t size = 0;
1077
1078         for (; pattern->type != RTE_FLOW_ITEM_TYPE_END; pattern++) {
1079                 int ret = 0;
1080
1081                 switch (pattern->type) {
1082                 case RTE_FLOW_ITEM_TYPE_VOID:
1083                         break;
1084                 case RTE_FLOW_ITEM_TYPE_ETH:
1085                         ret = mlx5_flow_item_eth(pattern, flow, remain, error);
1086                         break;
1087                 case RTE_FLOW_ITEM_TYPE_VLAN:
1088                         ret = mlx5_flow_item_vlan(pattern, flow, remain, error);
1089                         break;
1090                 case RTE_FLOW_ITEM_TYPE_IPV4:
1091                         ret = mlx5_flow_item_ipv4(pattern, flow, remain, error);
1092                         break;
1093                 case RTE_FLOW_ITEM_TYPE_IPV6:
1094                         ret = mlx5_flow_item_ipv6(pattern, flow, remain, error);
1095                         break;
1096                 case RTE_FLOW_ITEM_TYPE_UDP:
1097                         ret = mlx5_flow_item_udp(pattern, flow, remain, error);
1098                         break;
1099                 case RTE_FLOW_ITEM_TYPE_TCP:
1100                         ret = mlx5_flow_item_tcp(pattern, flow, remain, error);
1101                         break;
1102                 default:
1103                         return rte_flow_error_set(error, ENOTSUP,
1104                                                   RTE_FLOW_ERROR_TYPE_ITEM,
1105                                                   pattern,
1106                                                   "item not supported");
1107                 }
1108                 if (ret < 0)
1109                         return ret;
1110                 if (remain > ret)
1111                         remain -= ret;
1112                 else
1113                         remain = 0;
1114                 size += ret;
1115         }
1116         if (!flow->layers) {
1117                 const struct rte_flow_item item = {
1118                         .type = RTE_FLOW_ITEM_TYPE_ETH,
1119                 };
1120
1121                 return mlx5_flow_item_eth(&item, flow, flow_size, error);
1122         }
1123         return size;
1124 }
1125
1126 /**
1127  * Convert the @p action into a Verbs specification after ensuring the NIC
1128  * will understand and process it correctly.
1129  * If the necessary size for the conversion is greater than the @p flow_size,
1130  * nothing is written in @p flow, the validation is still performed.
1131  *
1132  * @param[in] action
1133  *   Action configuration.
1134  * @param[in, out] flow
1135  *   Pointer to flow structure.
1136  * @param[in] flow_size
1137  *   Size in bytes of the available space in @p flow, if too small, nothing is
1138  *   written.
1139  * @param[out] error
1140  *   Pointer to error structure.
1141  *
1142  * @return
1143  *   On success the number of bytes consumed/necessary, if the returned value
1144  *   is lesser or equal to @p flow_size, the @p action has fully been
1145  *   converted, otherwise another call with this returned memory size should
1146  *   be done.
1147  *   On error, a negative errno value is returned and rte_errno is set.
1148  */
1149 static int
1150 mlx5_flow_action_drop(const struct rte_flow_action *action,
1151                       struct rte_flow *flow, const size_t flow_size,
1152                       struct rte_flow_error *error)
1153 {
1154         unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
1155         struct ibv_flow_spec_action_drop drop = {
1156                         .type = IBV_FLOW_SPEC_ACTION_DROP,
1157                         .size = size,
1158         };
1159
1160         if (flow->fate)
1161                 return rte_flow_error_set(error, ENOTSUP,
1162                                           RTE_FLOW_ERROR_TYPE_ACTION,
1163                                           action,
1164                                           "multiple fate actions are not"
1165                                           " supported");
1166         if (flow->modifier & (MLX5_FLOW_MOD_FLAG | MLX5_FLOW_MOD_MARK))
1167                 return rte_flow_error_set(error, ENOTSUP,
1168                                           RTE_FLOW_ERROR_TYPE_ACTION,
1169                                           action,
1170                                           "drop is not compatible with"
1171                                           " flag/mark action");
1172         if (size < flow_size)
1173                 mlx5_flow_spec_verbs_add(flow, &drop, size);
1174         flow->fate |= MLX5_FLOW_FATE_DROP;
1175         return size;
1176 }
1177
1178 /**
1179  * Convert the @p action into @p flow after ensuring the NIC will understand
1180  * and process it correctly.
1181  *
1182  * @param[in] dev
1183  *   Pointer to Ethernet device structure.
1184  * @param[in] action
1185  *   Action configuration.
1186  * @param[in, out] flow
1187  *   Pointer to flow structure.
1188  * @param[out] error
1189  *   Pointer to error structure.
1190  *
1191  * @return
1192  *   0 on success, a negative errno value otherwise and rte_errno is set.
1193  */
1194 static int
1195 mlx5_flow_action_queue(struct rte_eth_dev *dev,
1196                        const struct rte_flow_action *action,
1197                        struct rte_flow *flow,
1198                        struct rte_flow_error *error)
1199 {
1200         struct priv *priv = dev->data->dev_private;
1201         const struct rte_flow_action_queue *queue = action->conf;
1202
1203         if (flow->fate)
1204                 return rte_flow_error_set(error, ENOTSUP,
1205                                           RTE_FLOW_ERROR_TYPE_ACTION,
1206                                           action,
1207                                           "multiple fate actions are not"
1208                                           " supported");
1209         if (queue->index >= priv->rxqs_n)
1210                 return rte_flow_error_set(error, EINVAL,
1211                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1212                                           &queue->index,
1213                                           "queue index out of range");
1214         if (!(*priv->rxqs)[queue->index])
1215                 return rte_flow_error_set(error, EINVAL,
1216                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1217                                           &queue->index,
1218                                           "queue is not configured");
1219         if (flow->queue)
1220                 (*flow->queue)[0] = queue->index;
1221         flow->rss.queue_num = 1;
1222         flow->fate |= MLX5_FLOW_FATE_QUEUE;
1223         return 0;
1224 }
1225
1226 /**
1227  * Ensure the @p action will be understood and used correctly by the  NIC.
1228  *
1229  * @param dev
1230  *   Pointer to Ethernet device structure.
1231  * @param action[in]
1232  *   Pointer to flow actions array.
1233  * @param flow[in, out]
1234  *   Pointer to the rte_flow structure.
1235  * @param error[in, out]
1236  *   Pointer to error structure.
1237  *
1238  * @return
1239  *   On success @p flow->queue array and @p flow->rss are filled and valid.
1240  *   On error, a negative errno value is returned and rte_errno is set.
1241  */
1242 static int
1243 mlx5_flow_action_rss(struct rte_eth_dev *dev,
1244                      const struct rte_flow_action *action,
1245                      struct rte_flow *flow,
1246                      struct rte_flow_error *error)
1247 {
1248         struct priv *priv = dev->data->dev_private;
1249         const struct rte_flow_action_rss *rss = action->conf;
1250         unsigned int i;
1251
1252         if (flow->fate)
1253                 return rte_flow_error_set(error, ENOTSUP,
1254                                           RTE_FLOW_ERROR_TYPE_ACTION,
1255                                           action,
1256                                           "multiple fate actions are not"
1257                                           " supported");
1258         if (rss->func != RTE_ETH_HASH_FUNCTION_DEFAULT &&
1259             rss->func != RTE_ETH_HASH_FUNCTION_TOEPLITZ)
1260                 return rte_flow_error_set(error, ENOTSUP,
1261                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1262                                           &rss->func,
1263                                           "RSS hash function not supported");
1264         if (rss->level > 1)
1265                 return rte_flow_error_set(error, ENOTSUP,
1266                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1267                                           &rss->level,
1268                                           "tunnel RSS is not supported");
1269         if (rss->key_len < MLX5_RSS_HASH_KEY_LEN)
1270                 return rte_flow_error_set(error, ENOTSUP,
1271                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1272                                           &rss->key_len,
1273                                           "RSS hash key too small");
1274         if (rss->key_len > MLX5_RSS_HASH_KEY_LEN)
1275                 return rte_flow_error_set(error, ENOTSUP,
1276                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1277                                           &rss->key_len,
1278                                           "RSS hash key too large");
1279         if (rss->queue_num > priv->config.ind_table_max_size)
1280                 return rte_flow_error_set(error, ENOTSUP,
1281                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1282                                           &rss->queue_num,
1283                                           "number of queues too large");
1284         if (rss->types & MLX5_RSS_HF_MASK)
1285                 return rte_flow_error_set(error, ENOTSUP,
1286                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1287                                           &rss->types,
1288                                           "some RSS protocols are not"
1289                                           " supported");
1290         for (i = 0; i != rss->queue_num; ++i) {
1291                 if (!(*priv->rxqs)[rss->queue[i]])
1292                         return rte_flow_error_set
1293                                 (error, EINVAL,
1294                                  RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1295                                  &rss->queue[i],
1296                                  "queue is not configured");
1297         }
1298         if (flow->queue)
1299                 memcpy((*flow->queue), rss->queue,
1300                        rss->queue_num * sizeof(uint16_t));
1301         flow->rss.queue_num = rss->queue_num;
1302         memcpy(flow->key, rss->key, MLX5_RSS_HASH_KEY_LEN);
1303         flow->rss.types = rss->types;
1304         flow->fate |= MLX5_FLOW_FATE_RSS;
1305         return 0;
1306 }
1307
1308 /**
1309  * Convert the @p action into a Verbs specification after ensuring the NIC
1310  * will understand and process it correctly.
1311  * If the necessary size for the conversion is greater than the @p flow_size,
1312  * nothing is written in @p flow, the validation is still performed.
1313  *
1314  * @param[in] action
1315  *   Action configuration.
1316  * @param[in, out] flow
1317  *   Pointer to flow structure.
1318  * @param[in] flow_size
1319  *   Size in bytes of the available space in @p flow, if too small, nothing is
1320  *   written.
1321  * @param[out] error
1322  *   Pointer to error structure.
1323  *
1324  * @return
1325  *   On success the number of bytes consumed/necessary, if the returned value
1326  *   is lesser or equal to @p flow_size, the @p action has fully been
1327  *   converted, otherwise another call with this returned memory size should
1328  *   be done.
1329  *   On error, a negative errno value is returned and rte_errno is set.
1330  */
1331 static int
1332 mlx5_flow_action_flag(const struct rte_flow_action *action,
1333                       struct rte_flow *flow, const size_t flow_size,
1334                       struct rte_flow_error *error)
1335 {
1336         unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1337         struct ibv_flow_spec_action_tag tag = {
1338                 .type = IBV_FLOW_SPEC_ACTION_TAG,
1339                 .size = size,
1340                 .tag_id = mlx5_flow_mark_set(MLX5_FLOW_MARK_DEFAULT),
1341         };
1342         struct mlx5_flow_verbs *verbs = flow->cur_verbs;
1343
1344         if (flow->modifier & MLX5_FLOW_MOD_FLAG)
1345                 return rte_flow_error_set(error, ENOTSUP,
1346                                           RTE_FLOW_ERROR_TYPE_ACTION,
1347                                           action,
1348                                           "flag action already present");
1349         if (flow->fate & MLX5_FLOW_FATE_DROP)
1350                 return rte_flow_error_set(error, ENOTSUP,
1351                                           RTE_FLOW_ERROR_TYPE_ACTION,
1352                                           action,
1353                                           "flag is not compatible with drop"
1354                                           " action");
1355         if (flow->modifier & MLX5_FLOW_MOD_MARK)
1356                 size = 0;
1357         else if (size <= flow_size && verbs)
1358                 mlx5_flow_spec_verbs_add(flow, &tag, size);
1359         flow->modifier |= MLX5_FLOW_MOD_FLAG;
1360         return size;
1361 }
1362
1363 /**
1364  * Update verbs specification to modify the flag to mark.
1365  *
1366  * @param[in, out] verbs
1367  *   Pointer to the mlx5_flow_verbs structure.
1368  * @param[in] mark_id
1369  *   Mark identifier to replace the flag.
1370  */
1371 static void
1372 mlx5_flow_verbs_mark_update(struct mlx5_flow_verbs *verbs, uint32_t mark_id)
1373 {
1374         struct ibv_spec_header *hdr;
1375         int i;
1376
1377         if (!verbs)
1378                 return;
1379         /* Update Verbs specification. */
1380         hdr = (struct ibv_spec_header *)verbs->specs;
1381         if (!hdr)
1382                 return;
1383         for (i = 0; i != verbs->attr->num_of_specs; ++i) {
1384                 if (hdr->type == IBV_FLOW_SPEC_ACTION_TAG) {
1385                         struct ibv_flow_spec_action_tag *t =
1386                                 (struct ibv_flow_spec_action_tag *)hdr;
1387
1388                         t->tag_id = mlx5_flow_mark_set(mark_id);
1389                 }
1390                 hdr = (struct ibv_spec_header *)((uintptr_t)hdr + hdr->size);
1391         }
1392 }
1393
1394 /**
1395  * Convert the @p action into @p flow (or by updating the already present
1396  * Flag Verbs specification) after ensuring the NIC will understand and
1397  * process it correctly.
1398  * If the necessary size for the conversion is greater than the @p flow_size,
1399  * nothing is written in @p flow, the validation is still performed.
1400  *
1401  * @param[in] action
1402  *   Action configuration.
1403  * @param[in, out] flow
1404  *   Pointer to flow structure.
1405  * @param[in] flow_size
1406  *   Size in bytes of the available space in @p flow, if too small, nothing is
1407  *   written.
1408  * @param[out] error
1409  *   Pointer to error structure.
1410  *
1411  * @return
1412  *   On success the number of bytes consumed/necessary, if the returned value
1413  *   is lesser or equal to @p flow_size, the @p action has fully been
1414  *   converted, otherwise another call with this returned memory size should
1415  *   be done.
1416  *   On error, a negative errno value is returned and rte_errno is set.
1417  */
1418 static int
1419 mlx5_flow_action_mark(const struct rte_flow_action *action,
1420                       struct rte_flow *flow, const size_t flow_size,
1421                       struct rte_flow_error *error)
1422 {
1423         const struct rte_flow_action_mark *mark = action->conf;
1424         unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1425         struct ibv_flow_spec_action_tag tag = {
1426                 .type = IBV_FLOW_SPEC_ACTION_TAG,
1427                 .size = size,
1428         };
1429         struct mlx5_flow_verbs *verbs = flow->cur_verbs;
1430
1431         if (!mark)
1432                 return rte_flow_error_set(error, EINVAL,
1433                                           RTE_FLOW_ERROR_TYPE_ACTION,
1434                                           action,
1435                                           "configuration cannot be null");
1436         if (mark->id >= MLX5_FLOW_MARK_MAX)
1437                 return rte_flow_error_set(error, EINVAL,
1438                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1439                                           &mark->id,
1440                                           "mark id must in 0 <= id < "
1441                                           RTE_STR(MLX5_FLOW_MARK_MAX));
1442         if (flow->modifier & MLX5_FLOW_MOD_MARK)
1443                 return rte_flow_error_set(error, ENOTSUP,
1444                                           RTE_FLOW_ERROR_TYPE_ACTION,
1445                                           action,
1446                                           "mark action already present");
1447         if (flow->fate & MLX5_FLOW_FATE_DROP)
1448                 return rte_flow_error_set(error, ENOTSUP,
1449                                           RTE_FLOW_ERROR_TYPE_ACTION,
1450                                           action,
1451                                           "mark is not compatible with drop"
1452                                           " action");
1453         if (flow->modifier & MLX5_FLOW_MOD_FLAG) {
1454                 mlx5_flow_verbs_mark_update(verbs, mark->id);
1455                 size = 0;
1456         } else if (size <= flow_size) {
1457                 tag.tag_id = mlx5_flow_mark_set(mark->id);
1458                 mlx5_flow_spec_verbs_add(flow, &tag, size);
1459         }
1460         flow->modifier |= MLX5_FLOW_MOD_MARK;
1461         return size;
1462 }
1463
1464 /**
1465  * Convert the @p action into @p flow after ensuring the NIC will understand
1466  * and process it correctly.
1467  * The conversion is performed action per action, each of them is written into
1468  * the @p flow if its size is lesser or equal to @p flow_size.
1469  * Validation and memory consumption computation are still performed until the
1470  * end of @p action, unless an error is encountered.
1471  *
1472  * @param[in] dev
1473  *   Pointer to Ethernet device structure.
1474  * @param[in] actions
1475  *   Pointer to flow actions array.
1476  * @param[in, out] flow
1477  *   Pointer to the rte_flow structure.
1478  * @param[in] flow_size
1479  *   Size in bytes of the available space in @p flow, if too small some
1480  *   garbage may be present.
1481  * @param[out] error
1482  *   Pointer to error structure.
1483  *
1484  * @return
1485  *   On success the number of bytes consumed/necessary, if the returned value
1486  *   is lesser or equal to @p flow_size, the @p actions has fully been
1487  *   converted, otherwise another call with this returned memory size should
1488  *   be done.
1489  *   On error, a negative errno value is returned and rte_errno is set.
1490  */
1491 static int
1492 mlx5_flow_actions(struct rte_eth_dev *dev,
1493                   const struct rte_flow_action actions[],
1494                   struct rte_flow *flow, const size_t flow_size,
1495                   struct rte_flow_error *error)
1496 {
1497         size_t size = 0;
1498         int remain = flow_size;
1499         int ret = 0;
1500
1501         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1502                 switch (actions->type) {
1503                 case RTE_FLOW_ACTION_TYPE_VOID:
1504                         break;
1505                 case RTE_FLOW_ACTION_TYPE_FLAG:
1506                         ret = mlx5_flow_action_flag(actions, flow, remain,
1507                                                     error);
1508                         break;
1509                 case RTE_FLOW_ACTION_TYPE_MARK:
1510                         ret = mlx5_flow_action_mark(actions, flow, remain,
1511                                                     error);
1512                         break;
1513                 case RTE_FLOW_ACTION_TYPE_DROP:
1514                         ret = mlx5_flow_action_drop(actions, flow, remain,
1515                                                     error);
1516                         break;
1517                 case RTE_FLOW_ACTION_TYPE_QUEUE:
1518                         ret = mlx5_flow_action_queue(dev, actions, flow, error);
1519                         break;
1520                 case RTE_FLOW_ACTION_TYPE_RSS:
1521                         ret = mlx5_flow_action_rss(dev, actions, flow, error);
1522                         break;
1523                 default:
1524                         return rte_flow_error_set(error, ENOTSUP,
1525                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1526                                                   actions,
1527                                                   "action not supported");
1528                 }
1529                 if (ret < 0)
1530                         return ret;
1531                 if (remain > ret)
1532                         remain -= ret;
1533                 else
1534                         remain = 0;
1535                 size += ret;
1536         }
1537         if (!flow->fate)
1538                 return rte_flow_error_set(error, ENOTSUP,
1539                                           RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1540                                           NULL,
1541                                           "no fate action found");
1542         return size;
1543 }
1544
1545 /**
1546  * Convert the @p attributes, @p pattern, @p action, into an flow for the NIC
1547  * after ensuring the NIC will understand and process it correctly.
1548  * The conversion is only performed item/action per item/action, each of
1549  * them is written into the @p flow if its size is lesser or equal to @p
1550  * flow_size.
1551  * Validation and memory consumption computation are still performed until the
1552  * end, unless an error is encountered.
1553  *
1554  * @param[in] dev
1555  *   Pointer to Ethernet device.
1556  * @param[in, out] flow
1557  *   Pointer to flow structure.
1558  * @param[in] flow_size
1559  *   Size in bytes of the available space in @p flow, if too small some
1560  *   garbage may be present.
1561  * @param[in] attributes
1562  *   Flow rule attributes.
1563  * @param[in] pattern
1564  *   Pattern specification (list terminated by the END pattern item).
1565  * @param[in] actions
1566  *   Associated actions (list terminated by the END action).
1567  * @param[out] error
1568  *   Perform verbose error reporting if not NULL.
1569  *
1570  * @return
1571  *   On success the number of bytes consumed/necessary, if the returned value
1572  *   is lesser or equal to @p flow_size, the flow has fully been converted and
1573  *   can be applied, otherwise another call with this returned memory size
1574  *   should be done.
1575  *   On error, a negative errno value is returned and rte_errno is set.
1576  */
1577 static int
1578 mlx5_flow_merge(struct rte_eth_dev *dev, struct rte_flow *flow,
1579                 const size_t flow_size,
1580                 const struct rte_flow_attr *attributes,
1581                 const struct rte_flow_item pattern[],
1582                 const struct rte_flow_action actions[],
1583                 struct rte_flow_error *error)
1584 {
1585         struct rte_flow local_flow = { .layers = 0, };
1586         size_t size = sizeof(*flow);
1587         union {
1588                 struct rte_flow_expand_rss buf;
1589                 uint8_t buffer[2048];
1590         } expand_buffer;
1591         struct rte_flow_expand_rss *buf = &expand_buffer.buf;
1592         struct mlx5_flow_verbs *original_verbs = NULL;
1593         size_t original_verbs_size = 0;
1594         uint32_t original_layers = 0;
1595         int expanded_pattern_idx = 0;
1596         int ret;
1597         uint32_t i;
1598
1599         if (size > flow_size)
1600                 flow = &local_flow;
1601         ret = mlx5_flow_attributes(dev, attributes, flow, error);
1602         if (ret < 0)
1603                 return ret;
1604         ret = mlx5_flow_actions(dev, actions, &local_flow, 0, error);
1605         if (ret < 0)
1606                 return ret;
1607         if (local_flow.rss.types) {
1608                 ret = rte_flow_expand_rss(buf, sizeof(expand_buffer.buffer),
1609                                           pattern, local_flow.rss.types,
1610                                           mlx5_support_expansion,
1611                                           MLX5_EXPANSION_ROOT);
1612                 assert(ret > 0 &&
1613                        (unsigned int)ret < sizeof(expand_buffer.buffer));
1614         } else {
1615                 buf->entries = 1;
1616                 buf->entry[0].pattern = (void *)(uintptr_t)pattern;
1617         }
1618         size += RTE_ALIGN_CEIL(local_flow.rss.queue_num * sizeof(uint16_t),
1619                                sizeof(void *));
1620         if (size <= flow_size)
1621                 flow->queue = (void *)(flow + 1);
1622         LIST_INIT(&flow->verbs);
1623         flow->layers = 0;
1624         flow->modifier = 0;
1625         flow->fate = 0;
1626         for (i = 0; i != buf->entries; ++i) {
1627                 size_t off = size;
1628                 size_t off2;
1629
1630                 flow->layers = original_layers;
1631                 size += sizeof(struct ibv_flow_attr) +
1632                         sizeof(struct mlx5_flow_verbs);
1633                 off2 = size;
1634                 if (size < flow_size) {
1635                         flow->cur_verbs = (void *)((uintptr_t)flow + off);
1636                         flow->cur_verbs->attr = (void *)(flow->cur_verbs + 1);
1637                         flow->cur_verbs->specs =
1638                                 (void *)(flow->cur_verbs->attr + 1);
1639                 }
1640                 /* First iteration convert the pattern into Verbs. */
1641                 if (i == 0) {
1642                         /* Actions don't need to be converted several time. */
1643                         ret = mlx5_flow_actions(dev, actions, flow,
1644                                                 (size < flow_size) ?
1645                                                 flow_size - size : 0,
1646                                                 error);
1647                         if (ret < 0)
1648                                 return ret;
1649                         size += ret;
1650                 } else {
1651                         /*
1652                          * Next iteration means the pattern has already been
1653                          * converted and an expansion is necessary to match
1654                          * the user RSS request.  For that only the expanded
1655                          * items will be converted, the common part with the
1656                          * user pattern are just copied into the next buffer
1657                          * zone.
1658                          */
1659                         size += original_verbs_size;
1660                         if (size < flow_size) {
1661                                 rte_memcpy(flow->cur_verbs->attr,
1662                                            original_verbs->attr,
1663                                            original_verbs_size +
1664                                            sizeof(struct ibv_flow_attr));
1665                                 flow->cur_verbs->size = original_verbs_size;
1666                         }
1667                 }
1668                 ret = mlx5_flow_items
1669                         ((const struct rte_flow_item *)
1670                          &buf->entry[i].pattern[expanded_pattern_idx],
1671                          flow,
1672                          (size < flow_size) ? flow_size - size : 0, error);
1673                 if (ret < 0)
1674                         return ret;
1675                 size += ret;
1676                 if (size <= flow_size) {
1677                         mlx5_flow_adjust_priority(dev, flow);
1678                         LIST_INSERT_HEAD(&flow->verbs, flow->cur_verbs, next);
1679                 }
1680                 /*
1681                  * Keep a pointer of the first verbs conversion and the layers
1682                  * it has encountered.
1683                  */
1684                 if (i == 0) {
1685                         original_verbs = flow->cur_verbs;
1686                         original_verbs_size = size - off2;
1687                         original_layers = flow->layers;
1688                         /*
1689                          * move the index of the expanded pattern to the
1690                          * first item not addressed yet.
1691                          */
1692                         if (pattern->type == RTE_FLOW_ITEM_TYPE_END) {
1693                                 expanded_pattern_idx++;
1694                         } else {
1695                                 const struct rte_flow_item *item = pattern;
1696
1697                                 for (item = pattern;
1698                                      item->type != RTE_FLOW_ITEM_TYPE_END;
1699                                      ++item)
1700                                         expanded_pattern_idx++;
1701                         }
1702                 }
1703         }
1704         /* Restore the origin layers in the flow. */
1705         flow->layers = original_layers;
1706         return size;
1707 }
1708
1709 /**
1710  * Mark the Rx queues mark flag if the flow has a mark or flag modifier.
1711  *
1712  * @param[in] dev
1713  *   Pointer to Ethernet device.
1714  * @param[in] flow
1715  *   Pointer to flow structure.
1716  */
1717 static void
1718 mlx5_flow_rxq_mark_set(struct rte_eth_dev *dev, struct rte_flow *flow)
1719 {
1720         struct priv *priv = dev->data->dev_private;
1721
1722         if (flow->modifier & (MLX5_FLOW_MOD_FLAG | MLX5_FLOW_MOD_MARK)) {
1723                 unsigned int i;
1724
1725                 for (i = 0; i != flow->rss.queue_num; ++i) {
1726                         int idx = (*flow->queue)[i];
1727                         struct mlx5_rxq_ctrl *rxq_ctrl =
1728                                 container_of((*priv->rxqs)[idx],
1729                                              struct mlx5_rxq_ctrl, rxq);
1730
1731                         rxq_ctrl->rxq.mark = 1;
1732                         rxq_ctrl->flow_mark_n++;
1733                 }
1734         }
1735 }
1736
1737 /**
1738  * Clear the Rx queue mark associated with the @p flow if no other flow uses
1739  * it with a mark request.
1740  *
1741  * @param dev
1742  *   Pointer to Ethernet device.
1743  * @param[in] flow
1744  *   Pointer to the flow.
1745  */
1746 static void
1747 mlx5_flow_rxq_mark_trim(struct rte_eth_dev *dev, struct rte_flow *flow)
1748 {
1749         struct priv *priv = dev->data->dev_private;
1750
1751         if (flow->modifier & (MLX5_FLOW_MOD_FLAG | MLX5_FLOW_MOD_MARK)) {
1752                 unsigned int i;
1753
1754                 for (i = 0; i != flow->rss.queue_num; ++i) {
1755                         int idx = (*flow->queue)[i];
1756                         struct mlx5_rxq_ctrl *rxq_ctrl =
1757                                 container_of((*priv->rxqs)[idx],
1758                                              struct mlx5_rxq_ctrl, rxq);
1759
1760                         rxq_ctrl->flow_mark_n--;
1761                         rxq_ctrl->rxq.mark = !!rxq_ctrl->flow_mark_n;
1762                 }
1763         }
1764 }
1765
1766 /**
1767  * Clear the mark bit in all Rx queues.
1768  *
1769  * @param dev
1770  *   Pointer to Ethernet device.
1771  */
1772 static void
1773 mlx5_flow_rxq_mark_clear(struct rte_eth_dev *dev)
1774 {
1775         struct priv *priv = dev->data->dev_private;
1776         unsigned int i;
1777         unsigned int idx;
1778
1779         for (idx = 0, i = 0; idx != priv->rxqs_n; ++i) {
1780                 struct mlx5_rxq_ctrl *rxq_ctrl;
1781
1782                 if (!(*priv->rxqs)[idx])
1783                         continue;
1784                 rxq_ctrl = container_of((*priv->rxqs)[idx],
1785                                         struct mlx5_rxq_ctrl, rxq);
1786                 rxq_ctrl->flow_mark_n = 0;
1787                 rxq_ctrl->rxq.mark = 0;
1788                 ++idx;
1789         }
1790 }
1791
1792 /**
1793  * Validate a flow supported by the NIC.
1794  *
1795  * @see rte_flow_validate()
1796  * @see rte_flow_ops
1797  */
1798 int
1799 mlx5_flow_validate(struct rte_eth_dev *dev,
1800                    const struct rte_flow_attr *attr,
1801                    const struct rte_flow_item items[],
1802                    const struct rte_flow_action actions[],
1803                    struct rte_flow_error *error)
1804 {
1805         int ret = mlx5_flow_merge(dev, NULL, 0, attr, items, actions, error);
1806
1807         if (ret < 0)
1808                 return ret;
1809         return 0;
1810 }
1811
1812 /**
1813  * Remove the flow.
1814  *
1815  * @param[in] dev
1816  *   Pointer to Ethernet device.
1817  * @param[in, out] flow
1818  *   Pointer to flow structure.
1819  */
1820 static void
1821 mlx5_flow_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
1822 {
1823         struct mlx5_flow_verbs *verbs;
1824
1825         LIST_FOREACH(verbs, &flow->verbs, next) {
1826                 if (verbs->flow) {
1827                         claim_zero(mlx5_glue->destroy_flow(verbs->flow));
1828                         verbs->flow = NULL;
1829                 }
1830                 if (verbs->hrxq) {
1831                         if (flow->fate & MLX5_FLOW_FATE_DROP)
1832                                 mlx5_hrxq_drop_release(dev);
1833                         else
1834                                 mlx5_hrxq_release(dev, verbs->hrxq);
1835                         verbs->hrxq = NULL;
1836                 }
1837         }
1838 }
1839
1840 /**
1841  * Apply the flow.
1842  *
1843  * @param[in] dev
1844  *   Pointer to Ethernet device structure.
1845  * @param[in, out] flow
1846  *   Pointer to flow structure.
1847  * @param[out] error
1848  *   Pointer to error structure.
1849  *
1850  * @return
1851  *   0 on success, a negative errno value otherwise and rte_errno is set.
1852  */
1853 static int
1854 mlx5_flow_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
1855                 struct rte_flow_error *error)
1856 {
1857         struct mlx5_flow_verbs *verbs;
1858         int err;
1859
1860         LIST_FOREACH(verbs, &flow->verbs, next) {
1861                 if (flow->fate & MLX5_FLOW_FATE_DROP) {
1862                         verbs->hrxq = mlx5_hrxq_drop_new(dev);
1863                         if (!verbs->hrxq) {
1864                                 rte_flow_error_set
1865                                         (error, errno,
1866                                          RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1867                                          NULL,
1868                                          "cannot get drop hash queue");
1869                                 goto error;
1870                         }
1871                 } else {
1872                         struct mlx5_hrxq *hrxq;
1873
1874                         hrxq = mlx5_hrxq_get(dev, flow->key,
1875                                              MLX5_RSS_HASH_KEY_LEN,
1876                                              verbs->hash_fields,
1877                                              (*flow->queue),
1878                                              flow->rss.queue_num, 0, 0);
1879                         if (!hrxq)
1880                                 hrxq = mlx5_hrxq_new(dev, flow->key,
1881                                                      MLX5_RSS_HASH_KEY_LEN,
1882                                                      verbs->hash_fields,
1883                                                      (*flow->queue),
1884                                                      flow->rss.queue_num, 0, 0);
1885                         if (!hrxq) {
1886                                 rte_flow_error_set
1887                                         (error, rte_errno,
1888                                          RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1889                                          NULL,
1890                                          "cannot get hash queue");
1891                                 goto error;
1892                         }
1893                         verbs->hrxq = hrxq;
1894                 }
1895                 verbs->flow =
1896                         mlx5_glue->create_flow(verbs->hrxq->qp, verbs->attr);
1897                 if (!verbs->flow) {
1898                         rte_flow_error_set(error, errno,
1899                                            RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1900                                            NULL,
1901                                            "hardware refuses to create flow");
1902                         goto error;
1903                 }
1904         }
1905         return 0;
1906 error:
1907         err = rte_errno; /* Save rte_errno before cleanup. */
1908         LIST_FOREACH(verbs, &flow->verbs, next) {
1909                 if (verbs->hrxq) {
1910                         if (flow->fate & MLX5_FLOW_FATE_DROP)
1911                                 mlx5_hrxq_drop_release(dev);
1912                         else
1913                                 mlx5_hrxq_release(dev, verbs->hrxq);
1914                         verbs->hrxq = NULL;
1915                 }
1916         }
1917         rte_errno = err; /* Restore rte_errno. */
1918         return -rte_errno;
1919 }
1920
1921 /**
1922  * Create a flow and add it to @p list.
1923  *
1924  * @param dev
1925  *   Pointer to Ethernet device.
1926  * @param list
1927  *   Pointer to a TAILQ flow list.
1928  * @param[in] attr
1929  *   Flow rule attributes.
1930  * @param[in] items
1931  *   Pattern specification (list terminated by the END pattern item).
1932  * @param[in] actions
1933  *   Associated actions (list terminated by the END action).
1934  * @param[out] error
1935  *   Perform verbose error reporting if not NULL.
1936  *
1937  * @return
1938  *   A flow on success, NULL otherwise and rte_errno is set.
1939  */
1940 static struct rte_flow *
1941 mlx5_flow_list_create(struct rte_eth_dev *dev,
1942                       struct mlx5_flows *list,
1943                       const struct rte_flow_attr *attr,
1944                       const struct rte_flow_item items[],
1945                       const struct rte_flow_action actions[],
1946                       struct rte_flow_error *error)
1947 {
1948         struct rte_flow *flow = NULL;
1949         size_t size = 0;
1950         int ret;
1951
1952         ret = mlx5_flow_merge(dev, flow, size, attr, items, actions, error);
1953         if (ret < 0)
1954                 return NULL;
1955         size = ret;
1956         flow = rte_calloc(__func__, 1, size, 0);
1957         if (!flow) {
1958                 rte_flow_error_set(error, ENOMEM,
1959                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1960                                    NULL,
1961                                    "not enough memory to create flow");
1962                 return NULL;
1963         }
1964         ret = mlx5_flow_merge(dev, flow, size, attr, items, actions, error);
1965         if (ret < 0) {
1966                 rte_free(flow);
1967                 return NULL;
1968         }
1969         assert((size_t)ret == size);
1970         if (dev->data->dev_started) {
1971                 ret = mlx5_flow_apply(dev, flow, error);
1972                 if (ret < 0) {
1973                         ret = rte_errno; /* Save rte_errno before cleanup. */
1974                         if (flow) {
1975                                 mlx5_flow_remove(dev, flow);
1976                                 rte_free(flow);
1977                         }
1978                         rte_errno = ret; /* Restore rte_errno. */
1979                         return NULL;
1980                 }
1981         }
1982         mlx5_flow_rxq_mark_set(dev, flow);
1983         TAILQ_INSERT_TAIL(list, flow, next);
1984         return flow;
1985 }
1986
1987 /**
1988  * Create a flow.
1989  *
1990  * @see rte_flow_create()
1991  * @see rte_flow_ops
1992  */
1993 struct rte_flow *
1994 mlx5_flow_create(struct rte_eth_dev *dev,
1995                  const struct rte_flow_attr *attr,
1996                  const struct rte_flow_item items[],
1997                  const struct rte_flow_action actions[],
1998                  struct rte_flow_error *error)
1999 {
2000         return mlx5_flow_list_create
2001                 (dev, &((struct priv *)dev->data->dev_private)->flows,
2002                  attr, items, actions, error);
2003 }
2004
2005 /**
2006  * Destroy a flow in a list.
2007  *
2008  * @param dev
2009  *   Pointer to Ethernet device.
2010  * @param list
2011  *   Pointer to a TAILQ flow list.
2012  * @param[in] flow
2013  *   Flow to destroy.
2014  */
2015 static void
2016 mlx5_flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list,
2017                        struct rte_flow *flow)
2018 {
2019         mlx5_flow_remove(dev, flow);
2020         TAILQ_REMOVE(list, flow, next);
2021         mlx5_flow_rxq_mark_trim(dev, flow);
2022         rte_free(flow);
2023 }
2024
2025 /**
2026  * Destroy all flows.
2027  *
2028  * @param dev
2029  *   Pointer to Ethernet device.
2030  * @param list
2031  *   Pointer to a TAILQ flow list.
2032  */
2033 void
2034 mlx5_flow_list_flush(struct rte_eth_dev *dev, struct mlx5_flows *list)
2035 {
2036         while (!TAILQ_EMPTY(list)) {
2037                 struct rte_flow *flow;
2038
2039                 flow = TAILQ_FIRST(list);
2040                 mlx5_flow_list_destroy(dev, list, flow);
2041         }
2042 }
2043
2044 /**
2045  * Remove all flows.
2046  *
2047  * @param dev
2048  *   Pointer to Ethernet device.
2049  * @param list
2050  *   Pointer to a TAILQ flow list.
2051  */
2052 void
2053 mlx5_flow_stop(struct rte_eth_dev *dev, struct mlx5_flows *list)
2054 {
2055         struct rte_flow *flow;
2056
2057         TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next)
2058                 mlx5_flow_remove(dev, flow);
2059         mlx5_flow_rxq_mark_clear(dev);
2060 }
2061
2062 /**
2063  * Add all flows.
2064  *
2065  * @param dev
2066  *   Pointer to Ethernet device.
2067  * @param list
2068  *   Pointer to a TAILQ flow list.
2069  *
2070  * @return
2071  *   0 on success, a negative errno value otherwise and rte_errno is set.
2072  */
2073 int
2074 mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list)
2075 {
2076         struct rte_flow *flow;
2077         struct rte_flow_error error;
2078         int ret = 0;
2079
2080         TAILQ_FOREACH(flow, list, next) {
2081                 ret = mlx5_flow_apply(dev, flow, &error);
2082                 if (ret < 0)
2083                         goto error;
2084                 mlx5_flow_rxq_mark_set(dev, flow);
2085         }
2086         return 0;
2087 error:
2088         ret = rte_errno; /* Save rte_errno before cleanup. */
2089         mlx5_flow_stop(dev, list);
2090         rte_errno = ret; /* Restore rte_errno. */
2091         return -rte_errno;
2092 }
2093
2094 /**
2095  * Verify the flow list is empty
2096  *
2097  * @param dev
2098  *  Pointer to Ethernet device.
2099  *
2100  * @return the number of flows not released.
2101  */
2102 int
2103 mlx5_flow_verify(struct rte_eth_dev *dev)
2104 {
2105         struct priv *priv = dev->data->dev_private;
2106         struct rte_flow *flow;
2107         int ret = 0;
2108
2109         TAILQ_FOREACH(flow, &priv->flows, next) {
2110                 DRV_LOG(DEBUG, "port %u flow %p still referenced",
2111                         dev->data->port_id, (void *)flow);
2112                 ++ret;
2113         }
2114         return ret;
2115 }
2116
2117 /**
2118  * Enable a control flow configured from the control plane.
2119  *
2120  * @param dev
2121  *   Pointer to Ethernet device.
2122  * @param eth_spec
2123  *   An Ethernet flow spec to apply.
2124  * @param eth_mask
2125  *   An Ethernet flow mask to apply.
2126  * @param vlan_spec
2127  *   A VLAN flow spec to apply.
2128  * @param vlan_mask
2129  *   A VLAN flow mask to apply.
2130  *
2131  * @return
2132  *   0 on success, a negative errno value otherwise and rte_errno is set.
2133  */
2134 int
2135 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
2136                     struct rte_flow_item_eth *eth_spec,
2137                     struct rte_flow_item_eth *eth_mask,
2138                     struct rte_flow_item_vlan *vlan_spec,
2139                     struct rte_flow_item_vlan *vlan_mask)
2140 {
2141         struct priv *priv = dev->data->dev_private;
2142         const struct rte_flow_attr attr = {
2143                 .ingress = 1,
2144                 .priority = MLX5_FLOW_PRIO_RSVD,
2145         };
2146         struct rte_flow_item items[] = {
2147                 {
2148                         .type = RTE_FLOW_ITEM_TYPE_ETH,
2149                         .spec = eth_spec,
2150                         .last = NULL,
2151                         .mask = eth_mask,
2152                 },
2153                 {
2154                         .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
2155                                 RTE_FLOW_ITEM_TYPE_END,
2156                         .spec = vlan_spec,
2157                         .last = NULL,
2158                         .mask = vlan_mask,
2159                 },
2160                 {
2161                         .type = RTE_FLOW_ITEM_TYPE_END,
2162                 },
2163         };
2164         uint16_t queue[priv->reta_idx_n];
2165         struct rte_flow_action_rss action_rss = {
2166                 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
2167                 .level = 0,
2168                 .types = priv->rss_conf.rss_hf,
2169                 .key_len = priv->rss_conf.rss_key_len,
2170                 .queue_num = priv->reta_idx_n,
2171                 .key = priv->rss_conf.rss_key,
2172                 .queue = queue,
2173         };
2174         struct rte_flow_action actions[] = {
2175                 {
2176                         .type = RTE_FLOW_ACTION_TYPE_RSS,
2177                         .conf = &action_rss,
2178                 },
2179                 {
2180                         .type = RTE_FLOW_ACTION_TYPE_END,
2181                 },
2182         };
2183         struct rte_flow *flow;
2184         struct rte_flow_error error;
2185         unsigned int i;
2186
2187         if (!priv->reta_idx_n) {
2188                 rte_errno = EINVAL;
2189                 return -rte_errno;
2190         }
2191         for (i = 0; i != priv->reta_idx_n; ++i)
2192                 queue[i] = (*priv->reta_idx)[i];
2193         flow = mlx5_flow_list_create(dev, &priv->ctrl_flows, &attr, items,
2194                                      actions, &error);
2195         if (!flow)
2196                 return -rte_errno;
2197         return 0;
2198 }
2199
2200 /**
2201  * Enable a flow control configured from the control plane.
2202  *
2203  * @param dev
2204  *   Pointer to Ethernet device.
2205  * @param eth_spec
2206  *   An Ethernet flow spec to apply.
2207  * @param eth_mask
2208  *   An Ethernet flow mask to apply.
2209  *
2210  * @return
2211  *   0 on success, a negative errno value otherwise and rte_errno is set.
2212  */
2213 int
2214 mlx5_ctrl_flow(struct rte_eth_dev *dev,
2215                struct rte_flow_item_eth *eth_spec,
2216                struct rte_flow_item_eth *eth_mask)
2217 {
2218         return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
2219 }
2220
2221 /**
2222  * Destroy a flow.
2223  *
2224  * @see rte_flow_destroy()
2225  * @see rte_flow_ops
2226  */
2227 int
2228 mlx5_flow_destroy(struct rte_eth_dev *dev,
2229                   struct rte_flow *flow,
2230                   struct rte_flow_error *error __rte_unused)
2231 {
2232         struct priv *priv = dev->data->dev_private;
2233
2234         mlx5_flow_list_destroy(dev, &priv->flows, flow);
2235         return 0;
2236 }
2237
2238 /**
2239  * Destroy all flows.
2240  *
2241  * @see rte_flow_flush()
2242  * @see rte_flow_ops
2243  */
2244 int
2245 mlx5_flow_flush(struct rte_eth_dev *dev,
2246                 struct rte_flow_error *error __rte_unused)
2247 {
2248         struct priv *priv = dev->data->dev_private;
2249
2250         mlx5_flow_list_flush(dev, &priv->flows);
2251         return 0;
2252 }
2253
2254 /**
2255  * Isolated mode.
2256  *
2257  * @see rte_flow_isolate()
2258  * @see rte_flow_ops
2259  */
2260 int
2261 mlx5_flow_isolate(struct rte_eth_dev *dev,
2262                   int enable,
2263                   struct rte_flow_error *error)
2264 {
2265         struct priv *priv = dev->data->dev_private;
2266
2267         if (dev->data->dev_started) {
2268                 rte_flow_error_set(error, EBUSY,
2269                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2270                                    NULL,
2271                                    "port must be stopped first");
2272                 return -rte_errno;
2273         }
2274         priv->isolated = !!enable;
2275         if (enable)
2276                 dev->dev_ops = &mlx5_dev_ops_isolate;
2277         else
2278                 dev->dev_ops = &mlx5_dev_ops;
2279         return 0;
2280 }
2281
2282 /**
2283  * Convert a flow director filter to a generic flow.
2284  *
2285  * @param dev
2286  *   Pointer to Ethernet device.
2287  * @param fdir_filter
2288  *   Flow director filter to add.
2289  * @param attributes
2290  *   Generic flow parameters structure.
2291  *
2292  * @return
2293  *   0 on success, a negative errno value otherwise and rte_errno is set.
2294  */
2295 static int
2296 mlx5_fdir_filter_convert(struct rte_eth_dev *dev,
2297                          const struct rte_eth_fdir_filter *fdir_filter,
2298                          struct mlx5_fdir *attributes)
2299 {
2300         struct priv *priv = dev->data->dev_private;
2301         const struct rte_eth_fdir_input *input = &fdir_filter->input;
2302         const struct rte_eth_fdir_masks *mask =
2303                 &dev->data->dev_conf.fdir_conf.mask;
2304
2305         /* Validate queue number. */
2306         if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
2307                 DRV_LOG(ERR, "port %u invalid queue number %d",
2308                         dev->data->port_id, fdir_filter->action.rx_queue);
2309                 rte_errno = EINVAL;
2310                 return -rte_errno;
2311         }
2312         attributes->attr.ingress = 1;
2313         attributes->items[0] = (struct rte_flow_item) {
2314                 .type = RTE_FLOW_ITEM_TYPE_ETH,
2315                 .spec = &attributes->l2,
2316                 .mask = &attributes->l2_mask,
2317         };
2318         switch (fdir_filter->action.behavior) {
2319         case RTE_ETH_FDIR_ACCEPT:
2320                 attributes->actions[0] = (struct rte_flow_action){
2321                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
2322                         .conf = &attributes->queue,
2323                 };
2324                 break;
2325         case RTE_ETH_FDIR_REJECT:
2326                 attributes->actions[0] = (struct rte_flow_action){
2327                         .type = RTE_FLOW_ACTION_TYPE_DROP,
2328                 };
2329                 break;
2330         default:
2331                 DRV_LOG(ERR, "port %u invalid behavior %d",
2332                         dev->data->port_id,
2333                         fdir_filter->action.behavior);
2334                 rte_errno = ENOTSUP;
2335                 return -rte_errno;
2336         }
2337         attributes->queue.index = fdir_filter->action.rx_queue;
2338         /* Handle L3. */
2339         switch (fdir_filter->input.flow_type) {
2340         case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
2341         case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
2342         case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
2343                 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2344                         .src_addr = input->flow.ip4_flow.src_ip,
2345                         .dst_addr = input->flow.ip4_flow.dst_ip,
2346                         .time_to_live = input->flow.ip4_flow.ttl,
2347                         .type_of_service = input->flow.ip4_flow.tos,
2348                         .next_proto_id = input->flow.ip4_flow.proto,
2349                 };
2350                 attributes->l3_mask.ipv4.hdr = (struct ipv4_hdr){
2351                         .src_addr = mask->ipv4_mask.src_ip,
2352                         .dst_addr = mask->ipv4_mask.dst_ip,
2353                         .time_to_live = mask->ipv4_mask.ttl,
2354                         .type_of_service = mask->ipv4_mask.tos,
2355                         .next_proto_id = mask->ipv4_mask.proto,
2356                 };
2357                 attributes->items[1] = (struct rte_flow_item){
2358                         .type = RTE_FLOW_ITEM_TYPE_IPV4,
2359                         .spec = &attributes->l3,
2360                         .mask = &attributes->l3_mask,
2361                 };
2362                 break;
2363         case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
2364         case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
2365         case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
2366                 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2367                         .hop_limits = input->flow.ipv6_flow.hop_limits,
2368                         .proto = input->flow.ipv6_flow.proto,
2369                 };
2370
2371                 memcpy(attributes->l3.ipv6.hdr.src_addr,
2372                        input->flow.ipv6_flow.src_ip,
2373                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2374                 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2375                        input->flow.ipv6_flow.dst_ip,
2376                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2377                 memcpy(attributes->l3_mask.ipv6.hdr.src_addr,
2378                        mask->ipv6_mask.src_ip,
2379                        RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
2380                 memcpy(attributes->l3_mask.ipv6.hdr.dst_addr,
2381                        mask->ipv6_mask.dst_ip,
2382                        RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
2383                 attributes->items[1] = (struct rte_flow_item){
2384                         .type = RTE_FLOW_ITEM_TYPE_IPV6,
2385                         .spec = &attributes->l3,
2386                         .mask = &attributes->l3_mask,
2387                 };
2388                 break;
2389         default:
2390                 DRV_LOG(ERR, "port %u invalid flow type%d",
2391                         dev->data->port_id, fdir_filter->input.flow_type);
2392                 rte_errno = ENOTSUP;
2393                 return -rte_errno;
2394         }
2395         /* Handle L4. */
2396         switch (fdir_filter->input.flow_type) {
2397         case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
2398                 attributes->l4.udp.hdr = (struct udp_hdr){
2399                         .src_port = input->flow.udp4_flow.src_port,
2400                         .dst_port = input->flow.udp4_flow.dst_port,
2401                 };
2402                 attributes->l4_mask.udp.hdr = (struct udp_hdr){
2403                         .src_port = mask->src_port_mask,
2404                         .dst_port = mask->dst_port_mask,
2405                 };
2406                 attributes->items[2] = (struct rte_flow_item){
2407                         .type = RTE_FLOW_ITEM_TYPE_UDP,
2408                         .spec = &attributes->l4,
2409                         .mask = &attributes->l4_mask,
2410                 };
2411                 break;
2412         case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
2413                 attributes->l4.tcp.hdr = (struct tcp_hdr){
2414                         .src_port = input->flow.tcp4_flow.src_port,
2415                         .dst_port = input->flow.tcp4_flow.dst_port,
2416                 };
2417                 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
2418                         .src_port = mask->src_port_mask,
2419                         .dst_port = mask->dst_port_mask,
2420                 };
2421                 attributes->items[2] = (struct rte_flow_item){
2422                         .type = RTE_FLOW_ITEM_TYPE_TCP,
2423                         .spec = &attributes->l4,
2424                         .mask = &attributes->l4_mask,
2425                 };
2426                 break;
2427         case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
2428                 attributes->l4.udp.hdr = (struct udp_hdr){
2429                         .src_port = input->flow.udp6_flow.src_port,
2430                         .dst_port = input->flow.udp6_flow.dst_port,
2431                 };
2432                 attributes->l4_mask.udp.hdr = (struct udp_hdr){
2433                         .src_port = mask->src_port_mask,
2434                         .dst_port = mask->dst_port_mask,
2435                 };
2436                 attributes->items[2] = (struct rte_flow_item){
2437                         .type = RTE_FLOW_ITEM_TYPE_UDP,
2438                         .spec = &attributes->l4,
2439                         .mask = &attributes->l4_mask,
2440                 };
2441                 break;
2442         case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
2443                 attributes->l4.tcp.hdr = (struct tcp_hdr){
2444                         .src_port = input->flow.tcp6_flow.src_port,
2445                         .dst_port = input->flow.tcp6_flow.dst_port,
2446                 };
2447                 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
2448                         .src_port = mask->src_port_mask,
2449                         .dst_port = mask->dst_port_mask,
2450                 };
2451                 attributes->items[2] = (struct rte_flow_item){
2452                         .type = RTE_FLOW_ITEM_TYPE_TCP,
2453                         .spec = &attributes->l4,
2454                         .mask = &attributes->l4_mask,
2455                 };
2456                 break;
2457         case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
2458         case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
2459                 break;
2460         default:
2461                 DRV_LOG(ERR, "port %u invalid flow type%d",
2462                         dev->data->port_id, fdir_filter->input.flow_type);
2463                 rte_errno = ENOTSUP;
2464                 return -rte_errno;
2465         }
2466         return 0;
2467 }
2468
2469 /**
2470  * Add new flow director filter and store it in list.
2471  *
2472  * @param dev
2473  *   Pointer to Ethernet device.
2474  * @param fdir_filter
2475  *   Flow director filter to add.
2476  *
2477  * @return
2478  *   0 on success, a negative errno value otherwise and rte_errno is set.
2479  */
2480 static int
2481 mlx5_fdir_filter_add(struct rte_eth_dev *dev,
2482                      const struct rte_eth_fdir_filter *fdir_filter)
2483 {
2484         struct priv *priv = dev->data->dev_private;
2485         struct mlx5_fdir attributes = {
2486                 .attr.group = 0,
2487                 .l2_mask = {
2488                         .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2489                         .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2490                         .type = 0,
2491                 },
2492         };
2493         struct rte_flow_error error;
2494         struct rte_flow *flow;
2495         int ret;
2496
2497         ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
2498         if (ret)
2499                 return ret;
2500         flow = mlx5_flow_list_create(dev, &priv->flows, &attributes.attr,
2501                                      attributes.items, attributes.actions,
2502                                      &error);
2503         if (flow) {
2504                 DRV_LOG(DEBUG, "port %u FDIR created %p", dev->data->port_id,
2505                         (void *)flow);
2506                 return 0;
2507         }
2508         return -rte_errno;
2509 }
2510
2511 /**
2512  * Delete specific filter.
2513  *
2514  * @param dev
2515  *   Pointer to Ethernet device.
2516  * @param fdir_filter
2517  *   Filter to be deleted.
2518  *
2519  * @return
2520  *   0 on success, a negative errno value otherwise and rte_errno is set.
2521  */
2522 static int
2523 mlx5_fdir_filter_delete(struct rte_eth_dev *dev __rte_unused,
2524                         const struct rte_eth_fdir_filter *fdir_filter
2525                         __rte_unused)
2526 {
2527         rte_errno = ENOTSUP;
2528         return -rte_errno;
2529 }
2530
2531 /**
2532  * Update queue for specific filter.
2533  *
2534  * @param dev
2535  *   Pointer to Ethernet device.
2536  * @param fdir_filter
2537  *   Filter to be updated.
2538  *
2539  * @return
2540  *   0 on success, a negative errno value otherwise and rte_errno is set.
2541  */
2542 static int
2543 mlx5_fdir_filter_update(struct rte_eth_dev *dev,
2544                         const struct rte_eth_fdir_filter *fdir_filter)
2545 {
2546         int ret;
2547
2548         ret = mlx5_fdir_filter_delete(dev, fdir_filter);
2549         if (ret)
2550                 return ret;
2551         return mlx5_fdir_filter_add(dev, fdir_filter);
2552 }
2553
2554 /**
2555  * Flush all filters.
2556  *
2557  * @param dev
2558  *   Pointer to Ethernet device.
2559  */
2560 static void
2561 mlx5_fdir_filter_flush(struct rte_eth_dev *dev)
2562 {
2563         struct priv *priv = dev->data->dev_private;
2564
2565         mlx5_flow_list_flush(dev, &priv->flows);
2566 }
2567
2568 /**
2569  * Get flow director information.
2570  *
2571  * @param dev
2572  *   Pointer to Ethernet device.
2573  * @param[out] fdir_info
2574  *   Resulting flow director information.
2575  */
2576 static void
2577 mlx5_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info)
2578 {
2579         struct rte_eth_fdir_masks *mask =
2580                 &dev->data->dev_conf.fdir_conf.mask;
2581
2582         fdir_info->mode = dev->data->dev_conf.fdir_conf.mode;
2583         fdir_info->guarant_spc = 0;
2584         rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
2585         fdir_info->max_flexpayload = 0;
2586         fdir_info->flow_types_mask[0] = 0;
2587         fdir_info->flex_payload_unit = 0;
2588         fdir_info->max_flex_payload_segment_num = 0;
2589         fdir_info->flex_payload_limit = 0;
2590         memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
2591 }
2592
2593 /**
2594  * Deal with flow director operations.
2595  *
2596  * @param dev
2597  *   Pointer to Ethernet device.
2598  * @param filter_op
2599  *   Operation to perform.
2600  * @param arg
2601  *   Pointer to operation-specific structure.
2602  *
2603  * @return
2604  *   0 on success, a negative errno value otherwise and rte_errno is set.
2605  */
2606 static int
2607 mlx5_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op,
2608                     void *arg)
2609 {
2610         enum rte_fdir_mode fdir_mode =
2611                 dev->data->dev_conf.fdir_conf.mode;
2612
2613         if (filter_op == RTE_ETH_FILTER_NOP)
2614                 return 0;
2615         if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
2616             fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
2617                 DRV_LOG(ERR, "port %u flow director mode %d not supported",
2618                         dev->data->port_id, fdir_mode);
2619                 rte_errno = EINVAL;
2620                 return -rte_errno;
2621         }
2622         switch (filter_op) {
2623         case RTE_ETH_FILTER_ADD:
2624                 return mlx5_fdir_filter_add(dev, arg);
2625         case RTE_ETH_FILTER_UPDATE:
2626                 return mlx5_fdir_filter_update(dev, arg);
2627         case RTE_ETH_FILTER_DELETE:
2628                 return mlx5_fdir_filter_delete(dev, arg);
2629         case RTE_ETH_FILTER_FLUSH:
2630                 mlx5_fdir_filter_flush(dev);
2631                 break;
2632         case RTE_ETH_FILTER_INFO:
2633                 mlx5_fdir_info_get(dev, arg);
2634                 break;
2635         default:
2636                 DRV_LOG(DEBUG, "port %u unknown operation %u",
2637                         dev->data->port_id, filter_op);
2638                 rte_errno = EINVAL;
2639                 return -rte_errno;
2640         }
2641         return 0;
2642 }
2643
2644 /**
2645  * Manage filter operations.
2646  *
2647  * @param dev
2648  *   Pointer to Ethernet device structure.
2649  * @param filter_type
2650  *   Filter type.
2651  * @param filter_op
2652  *   Operation to perform.
2653  * @param arg
2654  *   Pointer to operation-specific structure.
2655  *
2656  * @return
2657  *   0 on success, a negative errno value otherwise and rte_errno is set.
2658  */
2659 int
2660 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
2661                      enum rte_filter_type filter_type,
2662                      enum rte_filter_op filter_op,
2663                      void *arg)
2664 {
2665         switch (filter_type) {
2666         case RTE_ETH_FILTER_GENERIC:
2667                 if (filter_op != RTE_ETH_FILTER_GET) {
2668                         rte_errno = EINVAL;
2669                         return -rte_errno;
2670                 }
2671                 *(const void **)arg = &mlx5_flow_ops;
2672                 return 0;
2673         case RTE_ETH_FILTER_FDIR:
2674                 return mlx5_fdir_ctrl_func(dev, filter_op, arg);
2675         default:
2676                 DRV_LOG(ERR, "port %u filter type (%d) not supported",
2677                         dev->data->port_id, filter_type);
2678                 rte_errno = ENOTSUP;
2679                 return -rte_errno;
2680         }
2681         return 0;
2682 }