net/mlx5: use a macro for the RSS key size
[dpdk.git] / drivers / net / mlx5 / mlx5_flow.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2016 6WIND S.A.
3  * Copyright 2016 Mellanox Technologies, Ltd
4  */
5
6 #include <sys/queue.h>
7 #include <stdint.h>
8 #include <string.h>
9
10 /* Verbs header. */
11 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
12 #ifdef PEDANTIC
13 #pragma GCC diagnostic ignored "-Wpedantic"
14 #endif
15 #include <infiniband/verbs.h>
16 #ifdef PEDANTIC
17 #pragma GCC diagnostic error "-Wpedantic"
18 #endif
19
20 #include <rte_common.h>
21 #include <rte_ether.h>
22 #include <rte_eth_ctrl.h>
23 #include <rte_ethdev_driver.h>
24 #include <rte_flow.h>
25 #include <rte_flow_driver.h>
26 #include <rte_malloc.h>
27 #include <rte_ip.h>
28
29 #include "mlx5.h"
30 #include "mlx5_defs.h"
31 #include "mlx5_prm.h"
32 #include "mlx5_glue.h"
33
34 /* Dev ops structure defined in mlx5.c */
35 extern const struct eth_dev_ops mlx5_dev_ops;
36 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
37
38 /* Pattern Layer bits. */
39 #define MLX5_FLOW_LAYER_OUTER_L2 (1u << 0)
40 #define MLX5_FLOW_LAYER_OUTER_L3_IPV4 (1u << 1)
41 #define MLX5_FLOW_LAYER_OUTER_L3_IPV6 (1u << 2)
42 #define MLX5_FLOW_LAYER_OUTER_L4_UDP (1u << 3)
43 #define MLX5_FLOW_LAYER_OUTER_L4_TCP (1u << 4)
44 #define MLX5_FLOW_LAYER_OUTER_VLAN (1u << 5)
45 /* Masks. */
46 #define MLX5_FLOW_LAYER_OUTER_L3 \
47         (MLX5_FLOW_LAYER_OUTER_L3_IPV4 | MLX5_FLOW_LAYER_OUTER_L3_IPV6)
48 #define MLX5_FLOW_LAYER_OUTER_L4 \
49         (MLX5_FLOW_LAYER_OUTER_L4_UDP | MLX5_FLOW_LAYER_OUTER_L4_TCP)
50
51 /* Actions that modify the fate of matching traffic. */
52 #define MLX5_FLOW_FATE_DROP (1u << 0)
53 #define MLX5_FLOW_FATE_QUEUE (1u << 1)
54
55 /* Modify a packet. */
56 #define MLX5_FLOW_MOD_FLAG (1u << 0)
57 #define MLX5_FLOW_MOD_MARK (1u << 1)
58
59 /* possible L3 layers protocols filtering. */
60 #define MLX5_IP_PROTOCOL_TCP 6
61 #define MLX5_IP_PROTOCOL_UDP 17
62
63 /** Handles information leading to a drop fate. */
64 struct mlx5_flow_verbs {
65         unsigned int size; /**< Size of the attribute. */
66         struct {
67                 struct ibv_flow_attr *attr;
68                 /**< Pointer to the Specification buffer. */
69                 uint8_t *specs; /**< Pointer to the specifications. */
70         };
71         struct ibv_flow *flow; /**< Verbs flow pointer. */
72         struct mlx5_hrxq *hrxq; /**< Hash Rx queue object. */
73 };
74
75 /* Flow structure. */
76 struct rte_flow {
77         TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
78         struct rte_flow_attr attributes; /**< User flow attribute. */
79         uint32_t l3_protocol_en:1; /**< Protocol filtering requested. */
80         uint32_t layers;
81         /**< Bit-fields of present layers see MLX5_FLOW_LAYER_*. */
82         uint32_t modifier;
83         /**< Bit-fields of present modifier see MLX5_FLOW_MOD_*. */
84         uint32_t fate;
85         /**< Bit-fields of present fate see MLX5_FLOW_FATE_*. */
86         uint8_t l3_protocol; /**< valid when l3_protocol_en is set. */
87         struct mlx5_flow_verbs verbs; /* Verbs flow. */
88         uint16_t queue; /**< Destination queue to redirect traffic to. */
89 };
90
91 static const struct rte_flow_ops mlx5_flow_ops = {
92         .validate = mlx5_flow_validate,
93         .create = mlx5_flow_create,
94         .destroy = mlx5_flow_destroy,
95         .flush = mlx5_flow_flush,
96         .isolate = mlx5_flow_isolate,
97 };
98
99 /* Convert FDIR request to Generic flow. */
100 struct mlx5_fdir {
101         struct rte_flow_attr attr;
102         struct rte_flow_action actions[2];
103         struct rte_flow_item items[4];
104         struct rte_flow_item_eth l2;
105         struct rte_flow_item_eth l2_mask;
106         union {
107                 struct rte_flow_item_ipv4 ipv4;
108                 struct rte_flow_item_ipv6 ipv6;
109         } l3;
110         union {
111                 struct rte_flow_item_ipv4 ipv4;
112                 struct rte_flow_item_ipv6 ipv6;
113         } l3_mask;
114         union {
115                 struct rte_flow_item_udp udp;
116                 struct rte_flow_item_tcp tcp;
117         } l4;
118         union {
119                 struct rte_flow_item_udp udp;
120                 struct rte_flow_item_tcp tcp;
121         } l4_mask;
122         struct rte_flow_action_queue queue;
123 };
124
125 /* Verbs specification header. */
126 struct ibv_spec_header {
127         enum ibv_flow_spec_type type;
128         uint16_t size;
129 };
130
131  /**
132   * Discover the maximum number of priority available.
133   *
134   * @param[in] dev
135   *   Pointer to Ethernet device.
136   *
137   * @return
138   *   number of supported flow priority on success, a negative errno value
139   *   otherwise and rte_errno is set.
140   */
141 int
142 mlx5_flow_discover_priorities(struct rte_eth_dev *dev)
143 {
144         struct {
145                 struct ibv_flow_attr attr;
146                 struct ibv_flow_spec_eth eth;
147                 struct ibv_flow_spec_action_drop drop;
148         } flow_attr = {
149                 .attr = {
150                         .num_of_specs = 2,
151                 },
152                 .eth = {
153                         .type = IBV_FLOW_SPEC_ETH,
154                         .size = sizeof(struct ibv_flow_spec_eth),
155                 },
156                 .drop = {
157                         .size = sizeof(struct ibv_flow_spec_action_drop),
158                         .type = IBV_FLOW_SPEC_ACTION_DROP,
159                 },
160         };
161         struct ibv_flow *flow;
162         struct mlx5_hrxq *drop = mlx5_hrxq_drop_new(dev);
163         uint16_t vprio[] = { 8, 16 };
164         int i;
165
166         if (!drop) {
167                 rte_errno = ENOTSUP;
168                 return -rte_errno;
169         }
170         for (i = 0; i != RTE_DIM(vprio); i++) {
171                 flow_attr.attr.priority = vprio[i] - 1;
172                 flow = mlx5_glue->create_flow(drop->qp, &flow_attr.attr);
173                 if (!flow)
174                         break;
175                 claim_zero(mlx5_glue->destroy_flow(flow));
176         }
177         mlx5_hrxq_drop_release(dev);
178         DRV_LOG(INFO, "port %u flow maximum priority: %d",
179                 dev->data->port_id, vprio[i - 1]);
180         return vprio[i - 1];
181 }
182
183 /**
184  * Verify the @p attributes will be correctly understood by the NIC and store
185  * them in the @p flow if everything is correct.
186  *
187  * @param[in] dev
188  *   Pointer to Ethernet device.
189  * @param[in] attributes
190  *   Pointer to flow attributes
191  * @param[in, out] flow
192  *   Pointer to the rte_flow structure.
193  * @param[out] error
194  *   Pointer to error structure.
195  *
196  * @return
197  *   0 on success, a negative errno value otherwise and rte_errno is set.
198  */
199 static int
200 mlx5_flow_attributes(struct rte_eth_dev *dev,
201                      const struct rte_flow_attr *attributes,
202                      struct rte_flow *flow,
203                      struct rte_flow_error *error)
204 {
205         uint32_t priority_max =
206                 ((struct priv *)dev->data->dev_private)->config.flow_prio;
207
208         if (attributes->group)
209                 return rte_flow_error_set(error, ENOTSUP,
210                                           RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
211                                           NULL,
212                                           "groups is not supported");
213         if (attributes->priority >= priority_max)
214                 return rte_flow_error_set(error, ENOTSUP,
215                                           RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
216                                           NULL,
217                                           "priority out of range");
218         if (attributes->egress)
219                 return rte_flow_error_set(error, ENOTSUP,
220                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
221                                           NULL,
222                                           "egress is not supported");
223         if (attributes->transfer)
224                 return rte_flow_error_set(error, ENOTSUP,
225                                           RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
226                                           NULL,
227                                           "transfer is not supported");
228         if (!attributes->ingress)
229                 return rte_flow_error_set(error, ENOTSUP,
230                                           RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
231                                           NULL,
232                                           "ingress attribute is mandatory");
233         flow->attributes = *attributes;
234         return 0;
235 }
236
237 /**
238  * Verify the @p item specifications (spec, last, mask) are compatible with the
239  * NIC capabilities.
240  *
241  * @param[in] item
242  *   Item specification.
243  * @param[in] mask
244  *   @p item->mask or flow default bit-masks.
245  * @param[in] nic_mask
246  *   Bit-masks covering supported fields by the NIC to compare with user mask.
247  * @param[in] size
248  *   Bit-masks size in bytes.
249  * @param[out] error
250  *   Pointer to error structure.
251  *
252  * @return
253  *   0 on success, a negative errno value otherwise and rte_errno is set.
254  */
255 static int
256 mlx5_flow_item_acceptable(const struct rte_flow_item *item,
257                           const uint8_t *mask,
258                           const uint8_t *nic_mask,
259                           unsigned int size,
260                           struct rte_flow_error *error)
261 {
262         unsigned int i;
263
264         assert(nic_mask);
265         for (i = 0; i < size; ++i)
266                 if ((nic_mask[i] | mask[i]) != nic_mask[i])
267                         return rte_flow_error_set(error, ENOTSUP,
268                                                   RTE_FLOW_ERROR_TYPE_ITEM,
269                                                   item,
270                                                   "mask enables non supported"
271                                                   " bits");
272         if (!item->spec && (item->mask || item->last))
273                 return rte_flow_error_set(error, EINVAL,
274                                           RTE_FLOW_ERROR_TYPE_ITEM,
275                                           item,
276                                           "mask/last without a spec is not"
277                                           " supported");
278         if (item->spec && item->last) {
279                 uint8_t spec[size];
280                 uint8_t last[size];
281                 unsigned int i;
282                 int ret;
283
284                 for (i = 0; i < size; ++i) {
285                         spec[i] = ((const uint8_t *)item->spec)[i] & mask[i];
286                         last[i] = ((const uint8_t *)item->last)[i] & mask[i];
287                 }
288                 ret = memcmp(spec, last, size);
289                 if (ret != 0)
290                         return rte_flow_error_set(error, ENOTSUP,
291                                                   RTE_FLOW_ERROR_TYPE_ITEM,
292                                                   item,
293                                                   "range is not supported");
294         }
295         return 0;
296 }
297
298 /**
299  * Add a verbs specification into @p flow.
300  *
301  * @param[in, out] flow
302  *   Pointer to flow structure.
303  * @param[in] src
304  *   Create specification.
305  * @param[in] size
306  *   Size in bytes of the specification to copy.
307  */
308 static void
309 mlx5_flow_spec_verbs_add(struct rte_flow *flow, void *src, unsigned int size)
310 {
311         if (flow->verbs.specs) {
312                 void *dst;
313
314                 dst = (void *)(flow->verbs.specs + flow->verbs.size);
315                 memcpy(dst, src, size);
316                 ++flow->verbs.attr->num_of_specs;
317         }
318         flow->verbs.size += size;
319 }
320
321 /**
322  * Convert the @p item into a Verbs specification after ensuring the NIC
323  * will understand and process it correctly.
324  * If the necessary size for the conversion is greater than the @p flow_size,
325  * nothing is written in @p flow, the validation is still performed.
326  *
327  * @param[in] item
328  *   Item specification.
329  * @param[in, out] flow
330  *   Pointer to flow structure.
331  * @param[in] flow_size
332  *   Size in bytes of the available space in @p flow, if too small, nothing is
333  *   written.
334  * @param[out] error
335  *   Pointer to error structure.
336  *
337  * @return
338  *   On success the number of bytes consumed/necessary, if the returned value
339  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
340  *   otherwise another call with this returned memory size should be done.
341  *   On error, a negative errno value is returned and rte_errno is set.
342  */
343 static int
344 mlx5_flow_item_eth(const struct rte_flow_item *item, struct rte_flow *flow,
345                    const size_t flow_size, struct rte_flow_error *error)
346 {
347         const struct rte_flow_item_eth *spec = item->spec;
348         const struct rte_flow_item_eth *mask = item->mask;
349         const struct rte_flow_item_eth nic_mask = {
350                 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
351                 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
352                 .type = RTE_BE16(0xffff),
353         };
354         const unsigned int size = sizeof(struct ibv_flow_spec_eth);
355         struct ibv_flow_spec_eth eth = {
356                 .type = IBV_FLOW_SPEC_ETH,
357                 .size = size,
358         };
359         int ret;
360
361         if (flow->layers & MLX5_FLOW_LAYER_OUTER_L2)
362                 return rte_flow_error_set(error, ENOTSUP,
363                                           RTE_FLOW_ERROR_TYPE_ITEM,
364                                           item,
365                                           "L2 layers already configured");
366         if (!mask)
367                 mask = &rte_flow_item_eth_mask;
368         ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
369                                         (const uint8_t *)&nic_mask,
370                                         sizeof(struct rte_flow_item_eth),
371                                         error);
372         if (ret)
373                 return ret;
374         flow->layers |= MLX5_FLOW_LAYER_OUTER_L2;
375         if (size > flow_size)
376                 return size;
377         if (spec) {
378                 unsigned int i;
379
380                 memcpy(&eth.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
381                 memcpy(&eth.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
382                 eth.val.ether_type = spec->type;
383                 memcpy(&eth.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
384                 memcpy(&eth.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
385                 eth.mask.ether_type = mask->type;
386                 /* Remove unwanted bits from values. */
387                 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
388                         eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
389                         eth.val.src_mac[i] &= eth.mask.src_mac[i];
390                 }
391                 eth.val.ether_type &= eth.mask.ether_type;
392         }
393         mlx5_flow_spec_verbs_add(flow, &eth, size);
394         return size;
395 }
396
397 /**
398  * Update the VLAN tag in the Verbs Ethernet specification.
399  *
400  * @param[in, out] attr
401  *   Pointer to Verbs attributes structure.
402  * @param[in] eth
403  *   Verbs structure containing the VLAN information to copy.
404  */
405 static void
406 mlx5_flow_item_vlan_update(struct ibv_flow_attr *attr,
407                            struct ibv_flow_spec_eth *eth)
408 {
409         unsigned int i;
410         enum ibv_flow_spec_type search = IBV_FLOW_SPEC_ETH;
411         struct ibv_spec_header *hdr = (struct ibv_spec_header *)
412                 ((uint8_t *)attr + sizeof(struct ibv_flow_attr));
413
414         for (i = 0; i != attr->num_of_specs; ++i) {
415                 if (hdr->type == search) {
416                         struct ibv_flow_spec_eth *e =
417                                 (struct ibv_flow_spec_eth *)hdr;
418
419                         e->val.vlan_tag = eth->val.vlan_tag;
420                         e->mask.vlan_tag = eth->mask.vlan_tag;
421                         e->val.ether_type = eth->val.ether_type;
422                         e->mask.ether_type = eth->mask.ether_type;
423                         break;
424                 }
425                 hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size);
426         }
427 }
428
429 /**
430  * Convert the @p item into @p flow (or by updating the already present
431  * Ethernet Verbs) specification after ensuring the NIC will understand and
432  * process it correctly.
433  * If the necessary size for the conversion is greater than the @p flow_size,
434  * nothing is written in @p flow, the validation is still performed.
435  *
436  * @param[in] item
437  *   Item specification.
438  * @param[in, out] flow
439  *   Pointer to flow structure.
440  * @param[in] flow_size
441  *   Size in bytes of the available space in @p flow, if too small, nothing is
442  *   written.
443  * @param[out] error
444  *   Pointer to error structure.
445  *
446  * @return
447  *   On success the number of bytes consumed/necessary, if the returned value
448  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
449  *   otherwise another call with this returned memory size should be done.
450  *   On error, a negative errno value is returned and rte_errno is set.
451  */
452 static int
453 mlx5_flow_item_vlan(const struct rte_flow_item *item, struct rte_flow *flow,
454                     const size_t flow_size, struct rte_flow_error *error)
455 {
456         const struct rte_flow_item_vlan *spec = item->spec;
457         const struct rte_flow_item_vlan *mask = item->mask;
458         const struct rte_flow_item_vlan nic_mask = {
459                 .tci = RTE_BE16(0x0fff),
460                 .inner_type = RTE_BE16(0xffff),
461         };
462         unsigned int size = sizeof(struct ibv_flow_spec_eth);
463         struct ibv_flow_spec_eth eth = {
464                 .type = IBV_FLOW_SPEC_ETH,
465                 .size = size,
466         };
467         int ret;
468         const uint32_t l34m = MLX5_FLOW_LAYER_OUTER_L3 |
469                         MLX5_FLOW_LAYER_OUTER_L4;
470         const uint32_t vlanm = MLX5_FLOW_LAYER_OUTER_VLAN;
471         const uint32_t l2m = MLX5_FLOW_LAYER_OUTER_L2;
472
473         if (flow->layers & vlanm)
474                 return rte_flow_error_set(error, ENOTSUP,
475                                           RTE_FLOW_ERROR_TYPE_ITEM,
476                                           item,
477                                           "VLAN layer already configured");
478         else if ((flow->layers & l34m) != 0)
479                 return rte_flow_error_set(error, ENOTSUP,
480                                           RTE_FLOW_ERROR_TYPE_ITEM,
481                                           item,
482                                           "L2 layer cannot follow L3/L4 layer");
483         if (!mask)
484                 mask = &rte_flow_item_vlan_mask;
485         ret = mlx5_flow_item_acceptable
486                 (item, (const uint8_t *)mask,
487                  (const uint8_t *)&nic_mask,
488                  sizeof(struct rte_flow_item_vlan), error);
489         if (ret)
490                 return ret;
491         if (spec) {
492                 eth.val.vlan_tag = spec->tci;
493                 eth.mask.vlan_tag = mask->tci;
494                 eth.val.vlan_tag &= eth.mask.vlan_tag;
495                 eth.val.ether_type = spec->inner_type;
496                 eth.mask.ether_type = mask->inner_type;
497                 eth.val.ether_type &= eth.mask.ether_type;
498         }
499         /*
500          * From verbs perspective an empty VLAN is equivalent
501          * to a packet without VLAN layer.
502          */
503         if (!eth.mask.vlan_tag)
504                 return rte_flow_error_set(error, EINVAL,
505                                           RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
506                                           item->spec,
507                                           "VLAN cannot be empty");
508         if (!(flow->layers & l2m)) {
509                 if (size <= flow_size)
510                         mlx5_flow_spec_verbs_add(flow, &eth, size);
511         } else {
512                 if (flow->verbs.attr)
513                         mlx5_flow_item_vlan_update(flow->verbs.attr, &eth);
514                 size = 0; /* Only an update is done in eth specification. */
515         }
516         flow->layers |= MLX5_FLOW_LAYER_OUTER_L2 |
517                 MLX5_FLOW_LAYER_OUTER_VLAN;
518         return size;
519 }
520
521 /**
522  * Convert the @p item into a Verbs specification after ensuring the NIC
523  * will understand and process it correctly.
524  * If the necessary size for the conversion is greater than the @p flow_size,
525  * nothing is written in @p flow, the validation is still performed.
526  *
527  * @param[in] item
528  *   Item specification.
529  * @param[in, out] flow
530  *   Pointer to flow structure.
531  * @param[in] flow_size
532  *   Size in bytes of the available space in @p flow, if too small, nothing is
533  *   written.
534  * @param[out] error
535  *   Pointer to error structure.
536  *
537  * @return
538  *   On success the number of bytes consumed/necessary, if the returned value
539  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
540  *   otherwise another call with this returned memory size should be done.
541  *   On error, a negative errno value is returned and rte_errno is set.
542  */
543 static int
544 mlx5_flow_item_ipv4(const struct rte_flow_item *item, struct rte_flow *flow,
545                     const size_t flow_size, struct rte_flow_error *error)
546 {
547         const struct rte_flow_item_ipv4 *spec = item->spec;
548         const struct rte_flow_item_ipv4 *mask = item->mask;
549         const struct rte_flow_item_ipv4 nic_mask = {
550                 .hdr = {
551                         .src_addr = RTE_BE32(0xffffffff),
552                         .dst_addr = RTE_BE32(0xffffffff),
553                         .type_of_service = 0xff,
554                         .next_proto_id = 0xff,
555                 },
556         };
557         unsigned int size = sizeof(struct ibv_flow_spec_ipv4_ext);
558         struct ibv_flow_spec_ipv4_ext ipv4 = {
559                 .type = IBV_FLOW_SPEC_IPV4_EXT,
560                 .size = size,
561         };
562         int ret;
563
564         if (flow->layers & MLX5_FLOW_LAYER_OUTER_L3)
565                 return rte_flow_error_set(error, ENOTSUP,
566                                           RTE_FLOW_ERROR_TYPE_ITEM,
567                                           item,
568                                           "multiple L3 layers not supported");
569         else if (flow->layers & MLX5_FLOW_LAYER_OUTER_L4)
570                 return rte_flow_error_set(error, ENOTSUP,
571                                           RTE_FLOW_ERROR_TYPE_ITEM,
572                                           item,
573                                           "L3 cannot follow an L4 layer.");
574         if (!mask)
575                 mask = &rte_flow_item_ipv4_mask;
576         ret = mlx5_flow_item_acceptable
577                 (item, (const uint8_t *)mask,
578                  (const uint8_t *)&nic_mask,
579                  sizeof(struct rte_flow_item_ipv4), error);
580         if (ret < 0)
581                 return ret;
582         flow->layers |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
583         if (spec) {
584                 ipv4.val = (struct ibv_flow_ipv4_ext_filter){
585                         .src_ip = spec->hdr.src_addr,
586                         .dst_ip = spec->hdr.dst_addr,
587                         .proto = spec->hdr.next_proto_id,
588                         .tos = spec->hdr.type_of_service,
589                 };
590                 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
591                         .src_ip = mask->hdr.src_addr,
592                         .dst_ip = mask->hdr.dst_addr,
593                         .proto = mask->hdr.next_proto_id,
594                         .tos = mask->hdr.type_of_service,
595                 };
596                 /* Remove unwanted bits from values. */
597                 ipv4.val.src_ip &= ipv4.mask.src_ip;
598                 ipv4.val.dst_ip &= ipv4.mask.dst_ip;
599                 ipv4.val.proto &= ipv4.mask.proto;
600                 ipv4.val.tos &= ipv4.mask.tos;
601         }
602         flow->l3_protocol_en = !!ipv4.mask.proto;
603         flow->l3_protocol = ipv4.val.proto;
604         if (size <= flow_size)
605                 mlx5_flow_spec_verbs_add(flow, &ipv4, size);
606         return size;
607 }
608
609 /**
610  * Convert the @p item into a Verbs specification after ensuring the NIC
611  * will understand and process it correctly.
612  * If the necessary size for the conversion is greater than the @p flow_size,
613  * nothing is written in @p flow, the validation is still performed.
614  *
615  * @param[in] item
616  *   Item specification.
617  * @param[in, out] flow
618  *   Pointer to flow structure.
619  * @param[in] flow_size
620  *   Size in bytes of the available space in @p flow, if too small, nothing is
621  *   written.
622  * @param[out] error
623  *   Pointer to error structure.
624  *
625  * @return
626  *   On success the number of bytes consumed/necessary, if the returned value
627  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
628  *   otherwise another call with this returned memory size should be done.
629  *   On error, a negative errno value is returned and rte_errno is set.
630  */
631 static int
632 mlx5_flow_item_ipv6(const struct rte_flow_item *item, struct rte_flow *flow,
633                     const size_t flow_size, struct rte_flow_error *error)
634 {
635         const struct rte_flow_item_ipv6 *spec = item->spec;
636         const struct rte_flow_item_ipv6 *mask = item->mask;
637         const struct rte_flow_item_ipv6 nic_mask = {
638                 .hdr = {
639                         .src_addr =
640                                 "\xff\xff\xff\xff\xff\xff\xff\xff"
641                                 "\xff\xff\xff\xff\xff\xff\xff\xff",
642                         .dst_addr =
643                                 "\xff\xff\xff\xff\xff\xff\xff\xff"
644                                 "\xff\xff\xff\xff\xff\xff\xff\xff",
645                         .vtc_flow = RTE_BE32(0xffffffff),
646                         .proto = 0xff,
647                         .hop_limits = 0xff,
648                 },
649         };
650         unsigned int size = sizeof(struct ibv_flow_spec_ipv6);
651         struct ibv_flow_spec_ipv6 ipv6 = {
652                 .type = IBV_FLOW_SPEC_IPV6,
653                 .size = size,
654         };
655         int ret;
656
657         if (flow->layers & MLX5_FLOW_LAYER_OUTER_L3)
658                 return rte_flow_error_set(error, ENOTSUP,
659                                           RTE_FLOW_ERROR_TYPE_ITEM,
660                                           item,
661                                           "multiple L3 layers not supported");
662         else if (flow->layers & MLX5_FLOW_LAYER_OUTER_L4)
663                 return rte_flow_error_set(error, ENOTSUP,
664                                           RTE_FLOW_ERROR_TYPE_ITEM,
665                                           item,
666                                           "L3 cannot follow an L4 layer.");
667         if (!mask)
668                 mask = &rte_flow_item_ipv6_mask;
669         ret = mlx5_flow_item_acceptable
670                 (item, (const uint8_t *)mask,
671                  (const uint8_t *)&nic_mask,
672                  sizeof(struct rte_flow_item_ipv6), error);
673         if (ret < 0)
674                 return ret;
675         flow->layers |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
676         if (spec) {
677                 unsigned int i;
678                 uint32_t vtc_flow_val;
679                 uint32_t vtc_flow_mask;
680
681                 memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
682                        RTE_DIM(ipv6.val.src_ip));
683                 memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
684                        RTE_DIM(ipv6.val.dst_ip));
685                 memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
686                        RTE_DIM(ipv6.mask.src_ip));
687                 memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
688                        RTE_DIM(ipv6.mask.dst_ip));
689                 vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
690                 vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
691                 ipv6.val.flow_label =
692                         rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >>
693                                          IPV6_HDR_FL_SHIFT);
694                 ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >>
695                                          IPV6_HDR_TC_SHIFT;
696                 ipv6.val.next_hdr = spec->hdr.proto;
697                 ipv6.val.hop_limit = spec->hdr.hop_limits;
698                 ipv6.mask.flow_label =
699                         rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >>
700                                          IPV6_HDR_FL_SHIFT);
701                 ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >>
702                                           IPV6_HDR_TC_SHIFT;
703                 ipv6.mask.next_hdr = mask->hdr.proto;
704                 ipv6.mask.hop_limit = mask->hdr.hop_limits;
705                 /* Remove unwanted bits from values. */
706                 for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
707                         ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
708                         ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
709                 }
710                 ipv6.val.flow_label &= ipv6.mask.flow_label;
711                 ipv6.val.traffic_class &= ipv6.mask.traffic_class;
712                 ipv6.val.next_hdr &= ipv6.mask.next_hdr;
713                 ipv6.val.hop_limit &= ipv6.mask.hop_limit;
714         }
715         flow->l3_protocol_en = !!ipv6.mask.next_hdr;
716         flow->l3_protocol = ipv6.val.next_hdr;
717         if (size <= flow_size)
718                 mlx5_flow_spec_verbs_add(flow, &ipv6, size);
719         return size;
720 }
721
722 /**
723  * Convert the @p item into a Verbs specification after ensuring the NIC
724  * will understand and process it correctly.
725  * If the necessary size for the conversion is greater than the @p flow_size,
726  * nothing is written in @p flow, the validation is still performed.
727  *
728  * @param[in] item
729  *   Item specification.
730  * @param[in, out] flow
731  *   Pointer to flow structure.
732  * @param[in] flow_size
733  *   Size in bytes of the available space in @p flow, if too small, nothing is
734  *   written.
735  * @param[out] error
736  *   Pointer to error structure.
737  *
738  * @return
739  *   On success the number of bytes consumed/necessary, if the returned value
740  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
741  *   otherwise another call with this returned memory size should be done.
742  *   On error, a negative errno value is returned and rte_errno is set.
743  */
744 static int
745 mlx5_flow_item_udp(const struct rte_flow_item *item, struct rte_flow *flow,
746                    const size_t flow_size, struct rte_flow_error *error)
747 {
748         const struct rte_flow_item_udp *spec = item->spec;
749         const struct rte_flow_item_udp *mask = item->mask;
750         unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
751         struct ibv_flow_spec_tcp_udp udp = {
752                 .type = IBV_FLOW_SPEC_UDP,
753                 .size = size,
754         };
755         int ret;
756
757         if (!(flow->layers & MLX5_FLOW_LAYER_OUTER_L3))
758                 return rte_flow_error_set(error, ENOTSUP,
759                                           RTE_FLOW_ERROR_TYPE_ITEM,
760                                           item,
761                                           "L3 is mandatory to filter on L4");
762         if (flow->layers & MLX5_FLOW_LAYER_OUTER_L4)
763                 return rte_flow_error_set(error, ENOTSUP,
764                                           RTE_FLOW_ERROR_TYPE_ITEM,
765                                           item,
766                                           "L4 layer is already present");
767         if (flow->l3_protocol_en && flow->l3_protocol != MLX5_IP_PROTOCOL_UDP)
768                 return rte_flow_error_set(error, ENOTSUP,
769                                           RTE_FLOW_ERROR_TYPE_ITEM,
770                                           item,
771                                           "protocol filtering not compatible"
772                                           " with UDP layer");
773         if (!mask)
774                 mask = &rte_flow_item_udp_mask;
775         ret = mlx5_flow_item_acceptable
776                 (item, (const uint8_t *)mask,
777                  (const uint8_t *)&rte_flow_item_udp_mask,
778                  sizeof(struct rte_flow_item_udp), error);
779         if (ret < 0)
780                 return ret;
781         flow->layers |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
782         if (size > flow_size)
783                 return size;
784         if (spec) {
785                 udp.val.dst_port = spec->hdr.dst_port;
786                 udp.val.src_port = spec->hdr.src_port;
787                 udp.mask.dst_port = mask->hdr.dst_port;
788                 udp.mask.src_port = mask->hdr.src_port;
789                 /* Remove unwanted bits from values. */
790                 udp.val.src_port &= udp.mask.src_port;
791                 udp.val.dst_port &= udp.mask.dst_port;
792         }
793         mlx5_flow_spec_verbs_add(flow, &udp, size);
794         return size;
795 }
796
797 /**
798  * Convert the @p item into a Verbs specification after ensuring the NIC
799  * will understand and process it correctly.
800  * If the necessary size for the conversion is greater than the @p flow_size,
801  * nothing is written in @p flow, the validation is still performed.
802  *
803  * @param[in] item
804  *   Item specification.
805  * @param[in, out] flow
806  *   Pointer to flow structure.
807  * @param[in] flow_size
808  *   Size in bytes of the available space in @p flow, if too small, nothing is
809  *   written.
810  * @param[out] error
811  *   Pointer to error structure.
812  *
813  * @return
814  *   On success the number of bytes consumed/necessary, if the returned value
815  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
816  *   otherwise another call with this returned memory size should be done.
817  *   On error, a negative errno value is returned and rte_errno is set.
818  */
819 static int
820 mlx5_flow_item_tcp(const struct rte_flow_item *item, struct rte_flow *flow,
821                    const size_t flow_size, struct rte_flow_error *error)
822 {
823         const struct rte_flow_item_tcp *spec = item->spec;
824         const struct rte_flow_item_tcp *mask = item->mask;
825         unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
826         struct ibv_flow_spec_tcp_udp tcp = {
827                 .type = IBV_FLOW_SPEC_TCP,
828                 .size = size,
829         };
830         int ret;
831
832         if (flow->l3_protocol_en && flow->l3_protocol != MLX5_IP_PROTOCOL_TCP)
833                 return rte_flow_error_set(error, ENOTSUP,
834                                           RTE_FLOW_ERROR_TYPE_ITEM,
835                                           item,
836                                           "protocol filtering not compatible"
837                                           " with TCP layer");
838         if (!(flow->layers & MLX5_FLOW_LAYER_OUTER_L3))
839                 return rte_flow_error_set(error, ENOTSUP,
840                                           RTE_FLOW_ERROR_TYPE_ITEM,
841                                           item,
842                                           "L3 is mandatory to filter on L4");
843         if (flow->layers & MLX5_FLOW_LAYER_OUTER_L4)
844                 return rte_flow_error_set(error, ENOTSUP,
845                                           RTE_FLOW_ERROR_TYPE_ITEM,
846                                           item,
847                                           "L4 layer is already present");
848         if (!mask)
849                 mask = &rte_flow_item_tcp_mask;
850         ret = mlx5_flow_item_acceptable
851                 (item, (const uint8_t *)mask,
852                  (const uint8_t *)&rte_flow_item_tcp_mask,
853                  sizeof(struct rte_flow_item_tcp), error);
854         if (ret < 0)
855                 return ret;
856         flow->layers |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
857         if (size > flow_size)
858                 return size;
859         if (spec) {
860                 tcp.val.dst_port = spec->hdr.dst_port;
861                 tcp.val.src_port = spec->hdr.src_port;
862                 tcp.mask.dst_port = mask->hdr.dst_port;
863                 tcp.mask.src_port = mask->hdr.src_port;
864                 /* Remove unwanted bits from values. */
865                 tcp.val.src_port &= tcp.mask.src_port;
866                 tcp.val.dst_port &= tcp.mask.dst_port;
867         }
868         mlx5_flow_spec_verbs_add(flow, &tcp, size);
869         return size;
870 }
871
872 /**
873  * Convert the @p pattern into a Verbs specifications after ensuring the NIC
874  * will understand and process it correctly.
875  * The conversion is performed item per item, each of them is written into
876  * the @p flow if its size is lesser or equal to @p flow_size.
877  * Validation and memory consumption computation are still performed until the
878  * end of @p pattern, unless an error is encountered.
879  *
880  * @param[in] pattern
881  *   Flow pattern.
882  * @param[in, out] flow
883  *   Pointer to the rte_flow structure.
884  * @param[in] flow_size
885  *   Size in bytes of the available space in @p flow, if too small some
886  *   garbage may be present.
887  * @param[out] error
888  *   Pointer to error structure.
889  *
890  * @return
891  *   On success the number of bytes consumed/necessary, if the returned value
892  *   is lesser or equal to @p flow_size, the @pattern  has fully been
893  *   converted, otherwise another call with this returned memory size should
894  *   be done.
895  *   On error, a negative errno value is returned and rte_errno is set.
896  */
897 static int
898 mlx5_flow_items(const struct rte_flow_item pattern[],
899                 struct rte_flow *flow, const size_t flow_size,
900                 struct rte_flow_error *error)
901 {
902         int remain = flow_size;
903         size_t size = 0;
904
905         for (; pattern->type != RTE_FLOW_ITEM_TYPE_END; pattern++) {
906                 int ret = 0;
907
908                 switch (pattern->type) {
909                 case RTE_FLOW_ITEM_TYPE_VOID:
910                         break;
911                 case RTE_FLOW_ITEM_TYPE_ETH:
912                         ret = mlx5_flow_item_eth(pattern, flow, remain, error);
913                         break;
914                 case RTE_FLOW_ITEM_TYPE_VLAN:
915                         ret = mlx5_flow_item_vlan(pattern, flow, remain, error);
916                         break;
917                 case RTE_FLOW_ITEM_TYPE_IPV4:
918                         ret = mlx5_flow_item_ipv4(pattern, flow, remain, error);
919                         break;
920                 case RTE_FLOW_ITEM_TYPE_IPV6:
921                         ret = mlx5_flow_item_ipv6(pattern, flow, remain, error);
922                         break;
923                 case RTE_FLOW_ITEM_TYPE_UDP:
924                         ret = mlx5_flow_item_udp(pattern, flow, remain, error);
925                         break;
926                 case RTE_FLOW_ITEM_TYPE_TCP:
927                         ret = mlx5_flow_item_tcp(pattern, flow, remain, error);
928                         break;
929                 default:
930                         return rte_flow_error_set(error, ENOTSUP,
931                                                   RTE_FLOW_ERROR_TYPE_ITEM,
932                                                   pattern,
933                                                   "item not supported");
934                 }
935                 if (ret < 0)
936                         return ret;
937                 if (remain > ret)
938                         remain -= ret;
939                 else
940                         remain = 0;
941                 size += ret;
942         }
943         if (!flow->layers) {
944                 const struct rte_flow_item item = {
945                         .type = RTE_FLOW_ITEM_TYPE_ETH,
946                 };
947
948                 return mlx5_flow_item_eth(&item, flow, flow_size, error);
949         }
950         return size;
951 }
952
953 /**
954  * Convert the @p action into a Verbs specification after ensuring the NIC
955  * will understand and process it correctly.
956  * If the necessary size for the conversion is greater than the @p flow_size,
957  * nothing is written in @p flow, the validation is still performed.
958  *
959  * @param[in] action
960  *   Action configuration.
961  * @param[in, out] flow
962  *   Pointer to flow structure.
963  * @param[in] flow_size
964  *   Size in bytes of the available space in @p flow, if too small, nothing is
965  *   written.
966  * @param[out] error
967  *   Pointer to error structure.
968  *
969  * @return
970  *   On success the number of bytes consumed/necessary, if the returned value
971  *   is lesser or equal to @p flow_size, the @p action has fully been
972  *   converted, otherwise another call with this returned memory size should
973  *   be done.
974  *   On error, a negative errno value is returned and rte_errno is set.
975  */
976 static int
977 mlx5_flow_action_drop(const struct rte_flow_action *action,
978                       struct rte_flow *flow, const size_t flow_size,
979                       struct rte_flow_error *error)
980 {
981         unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
982         struct ibv_flow_spec_action_drop drop = {
983                         .type = IBV_FLOW_SPEC_ACTION_DROP,
984                         .size = size,
985         };
986
987         if (flow->fate)
988                 return rte_flow_error_set(error, ENOTSUP,
989                                           RTE_FLOW_ERROR_TYPE_ACTION,
990                                           action,
991                                           "multiple fate actions are not"
992                                           " supported");
993         if (flow->modifier & (MLX5_FLOW_MOD_FLAG | MLX5_FLOW_MOD_MARK))
994                 return rte_flow_error_set(error, ENOTSUP,
995                                           RTE_FLOW_ERROR_TYPE_ACTION,
996                                           action,
997                                           "drop is not compatible with"
998                                           " flag/mark action");
999         if (size < flow_size)
1000                 mlx5_flow_spec_verbs_add(flow, &drop, size);
1001         flow->fate |= MLX5_FLOW_FATE_DROP;
1002         return size;
1003 }
1004
1005 /**
1006  * Convert the @p action into @p flow after ensuring the NIC will understand
1007  * and process it correctly.
1008  *
1009  * @param[in] dev
1010  *   Pointer to Ethernet device structure.
1011  * @param[in] action
1012  *   Action configuration.
1013  * @param[in, out] flow
1014  *   Pointer to flow structure.
1015  * @param[out] error
1016  *   Pointer to error structure.
1017  *
1018  * @return
1019  *   0 on success, a negative errno value otherwise and rte_errno is set.
1020  */
1021 static int
1022 mlx5_flow_action_queue(struct rte_eth_dev *dev,
1023                        const struct rte_flow_action *action,
1024                        struct rte_flow *flow,
1025                        struct rte_flow_error *error)
1026 {
1027         struct priv *priv = dev->data->dev_private;
1028         const struct rte_flow_action_queue *queue = action->conf;
1029
1030         if (flow->fate)
1031                 return rte_flow_error_set(error, ENOTSUP,
1032                                           RTE_FLOW_ERROR_TYPE_ACTION,
1033                                           action,
1034                                           "multiple fate actions are not"
1035                                           " supported");
1036         if (queue->index >= priv->rxqs_n)
1037                 return rte_flow_error_set(error, EINVAL,
1038                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1039                                           &queue->index,
1040                                           "queue index out of range");
1041         if (!(*priv->rxqs)[queue->index])
1042                 return rte_flow_error_set(error, EINVAL,
1043                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1044                                           &queue->index,
1045                                           "queue is not configured");
1046         flow->queue = queue->index;
1047         flow->fate |= MLX5_FLOW_FATE_QUEUE;
1048         return 0;
1049 }
1050
1051 /**
1052  * Convert the @p action into a Verbs specification after ensuring the NIC
1053  * will understand and process it correctly.
1054  * If the necessary size for the conversion is greater than the @p flow_size,
1055  * nothing is written in @p flow, the validation is still performed.
1056  *
1057  * @param[in] action
1058  *   Action configuration.
1059  * @param[in, out] flow
1060  *   Pointer to flow structure.
1061  * @param[in] flow_size
1062  *   Size in bytes of the available space in @p flow, if too small, nothing is
1063  *   written.
1064  * @param[out] error
1065  *   Pointer to error structure.
1066  *
1067  * @return
1068  *   On success the number of bytes consumed/necessary, if the returned value
1069  *   is lesser or equal to @p flow_size, the @p action has fully been
1070  *   converted, otherwise another call with this returned memory size should
1071  *   be done.
1072  *   On error, a negative errno value is returned and rte_errno is set.
1073  */
1074 static int
1075 mlx5_flow_action_flag(const struct rte_flow_action *action,
1076                       struct rte_flow *flow, const size_t flow_size,
1077                       struct rte_flow_error *error)
1078 {
1079         unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1080         struct ibv_flow_spec_action_tag tag = {
1081                 .type = IBV_FLOW_SPEC_ACTION_TAG,
1082                 .size = size,
1083                 .tag_id = mlx5_flow_mark_set(MLX5_FLOW_MARK_DEFAULT),
1084         };
1085
1086         if (flow->modifier & MLX5_FLOW_MOD_FLAG)
1087                 return rte_flow_error_set(error, ENOTSUP,
1088                                           RTE_FLOW_ERROR_TYPE_ACTION,
1089                                           action,
1090                                           "flag action already present");
1091         if (flow->fate & MLX5_FLOW_FATE_DROP)
1092                 return rte_flow_error_set(error, ENOTSUP,
1093                                           RTE_FLOW_ERROR_TYPE_ACTION,
1094                                           action,
1095                                           "flag is not compatible with drop"
1096                                           " action");
1097         if (flow->modifier & MLX5_FLOW_MOD_MARK)
1098                 return 0;
1099         flow->modifier |= MLX5_FLOW_MOD_FLAG;
1100         if (size <= flow_size)
1101                 mlx5_flow_spec_verbs_add(flow, &tag, size);
1102         return size;
1103 }
1104
1105 /**
1106  * Update verbs specification to modify the flag to mark.
1107  *
1108  * @param[in, out] flow
1109  *   Pointer to the rte_flow structure.
1110  * @param[in] mark_id
1111  *   Mark identifier to replace the flag.
1112  */
1113 static void
1114 mlx5_flow_verbs_mark_update(struct rte_flow *flow, uint32_t mark_id)
1115 {
1116         struct ibv_spec_header *hdr;
1117         int i;
1118
1119         /* Update Verbs specification. */
1120         hdr = (struct ibv_spec_header *)flow->verbs.specs;
1121         if (!hdr)
1122                 return;
1123         for (i = 0; i != flow->verbs.attr->num_of_specs; ++i) {
1124                 if (hdr->type == IBV_FLOW_SPEC_ACTION_TAG) {
1125                         struct ibv_flow_spec_action_tag *t =
1126                                 (struct ibv_flow_spec_action_tag *)hdr;
1127
1128                         t->tag_id = mlx5_flow_mark_set(mark_id);
1129                 }
1130                 hdr = (struct ibv_spec_header *)((uintptr_t)hdr + hdr->size);
1131         }
1132 }
1133
1134 /**
1135  * Convert the @p action into @p flow (or by updating the already present
1136  * Flag Verbs specification) after ensuring the NIC will understand and
1137  * process it correctly.
1138  * If the necessary size for the conversion is greater than the @p flow_size,
1139  * nothing is written in @p flow, the validation is still performed.
1140  *
1141  * @param[in] action
1142  *   Action configuration.
1143  * @param[in, out] flow
1144  *   Pointer to flow structure.
1145  * @param[in] flow_size
1146  *   Size in bytes of the available space in @p flow, if too small, nothing is
1147  *   written.
1148  * @param[out] error
1149  *   Pointer to error structure.
1150  *
1151  * @return
1152  *   On success the number of bytes consumed/necessary, if the returned value
1153  *   is lesser or equal to @p flow_size, the @p action has fully been
1154  *   converted, otherwise another call with this returned memory size should
1155  *   be done.
1156  *   On error, a negative errno value is returned and rte_errno is set.
1157  */
1158 static int
1159 mlx5_flow_action_mark(const struct rte_flow_action *action,
1160                       struct rte_flow *flow, const size_t flow_size,
1161                       struct rte_flow_error *error)
1162 {
1163         const struct rte_flow_action_mark *mark = action->conf;
1164         unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1165         struct ibv_flow_spec_action_tag tag = {
1166                 .type = IBV_FLOW_SPEC_ACTION_TAG,
1167                 .size = size,
1168         };
1169
1170         if (!mark)
1171                 return rte_flow_error_set(error, EINVAL,
1172                                           RTE_FLOW_ERROR_TYPE_ACTION,
1173                                           action,
1174                                           "configuration cannot be null");
1175         if (mark->id >= MLX5_FLOW_MARK_MAX)
1176                 return rte_flow_error_set(error, EINVAL,
1177                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1178                                           &mark->id,
1179                                           "mark id must in 0 <= id < "
1180                                           RTE_STR(MLX5_FLOW_MARK_MAX));
1181         if (flow->modifier & MLX5_FLOW_MOD_MARK)
1182                 return rte_flow_error_set(error, ENOTSUP,
1183                                           RTE_FLOW_ERROR_TYPE_ACTION,
1184                                           action,
1185                                           "mark action already present");
1186         if (flow->fate & MLX5_FLOW_FATE_DROP)
1187                 return rte_flow_error_set(error, ENOTSUP,
1188                                           RTE_FLOW_ERROR_TYPE_ACTION,
1189                                           action,
1190                                           "mark is not compatible with drop"
1191                                           " action");
1192         if (flow->modifier & MLX5_FLOW_MOD_FLAG) {
1193                 mlx5_flow_verbs_mark_update(flow, mark->id);
1194                 size = 0; /**< Only an update is done in the specification. */
1195         } else {
1196                 tag.tag_id = mlx5_flow_mark_set(mark->id);
1197                 if (size <= flow_size) {
1198                         tag.tag_id = mlx5_flow_mark_set(mark->id);
1199                         mlx5_flow_spec_verbs_add(flow, &tag, size);
1200                 }
1201         }
1202         flow->modifier |= MLX5_FLOW_MOD_MARK;
1203         return size;
1204 }
1205
1206 /**
1207  * Convert the @p action into @p flow after ensuring the NIC will understand
1208  * and process it correctly.
1209  * The conversion is performed action per action, each of them is written into
1210  * the @p flow if its size is lesser or equal to @p flow_size.
1211  * Validation and memory consumption computation are still performed until the
1212  * end of @p action, unless an error is encountered.
1213  *
1214  * @param[in] dev
1215  *   Pointer to Ethernet device structure.
1216  * @param[in] actions
1217  *   Pointer to flow actions array.
1218  * @param[in, out] flow
1219  *   Pointer to the rte_flow structure.
1220  * @param[in] flow_size
1221  *   Size in bytes of the available space in @p flow, if too small some
1222  *   garbage may be present.
1223  * @param[out] error
1224  *   Pointer to error structure.
1225  *
1226  * @return
1227  *   On success the number of bytes consumed/necessary, if the returned value
1228  *   is lesser or equal to @p flow_size, the @p actions has fully been
1229  *   converted, otherwise another call with this returned memory size should
1230  *   be done.
1231  *   On error, a negative errno value is returned and rte_errno is set.
1232  */
1233 static int
1234 mlx5_flow_actions(struct rte_eth_dev *dev,
1235                   const struct rte_flow_action actions[],
1236                   struct rte_flow *flow, const size_t flow_size,
1237                   struct rte_flow_error *error)
1238 {
1239         size_t size = 0;
1240         int remain = flow_size;
1241         int ret = 0;
1242
1243         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1244                 switch (actions->type) {
1245                 case RTE_FLOW_ACTION_TYPE_VOID:
1246                         break;
1247                 case RTE_FLOW_ACTION_TYPE_FLAG:
1248                         ret = mlx5_flow_action_flag(actions, flow, remain,
1249                                                     error);
1250                         break;
1251                 case RTE_FLOW_ACTION_TYPE_MARK:
1252                         ret = mlx5_flow_action_mark(actions, flow, remain,
1253                                                     error);
1254                         break;
1255                 case RTE_FLOW_ACTION_TYPE_DROP:
1256                         ret = mlx5_flow_action_drop(actions, flow, remain,
1257                                                     error);
1258                         break;
1259                 case RTE_FLOW_ACTION_TYPE_QUEUE:
1260                         ret = mlx5_flow_action_queue(dev, actions, flow, error);
1261                         break;
1262                 default:
1263                         return rte_flow_error_set(error, ENOTSUP,
1264                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1265                                                   actions,
1266                                                   "action not supported");
1267                 }
1268                 if (ret < 0)
1269                         return ret;
1270                 if (remain > ret)
1271                         remain -= ret;
1272                 else
1273                         remain = 0;
1274                 size += ret;
1275         }
1276         if (!flow->fate)
1277                 return rte_flow_error_set(error, ENOTSUP,
1278                                           RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1279                                           NULL,
1280                                           "no fate action found");
1281         return size;
1282 }
1283
1284 /**
1285  * Convert the @p attributes, @p pattern, @p action, into an flow for the NIC
1286  * after ensuring the NIC will understand and process it correctly.
1287  * The conversion is only performed item/action per item/action, each of
1288  * them is written into the @p flow if its size is lesser or equal to @p
1289  * flow_size.
1290  * Validation and memory consumption computation are still performed until the
1291  * end, unless an error is encountered.
1292  *
1293  * @param[in] dev
1294  *   Pointer to Ethernet device.
1295  * @param[in, out] flow
1296  *   Pointer to flow structure.
1297  * @param[in] flow_size
1298  *   Size in bytes of the available space in @p flow, if too small some
1299  *   garbage may be present.
1300  * @param[in] attributes
1301  *   Flow rule attributes.
1302  * @param[in] pattern
1303  *   Pattern specification (list terminated by the END pattern item).
1304  * @param[in] actions
1305  *   Associated actions (list terminated by the END action).
1306  * @param[out] error
1307  *   Perform verbose error reporting if not NULL.
1308  *
1309  * @return
1310  *   On success the number of bytes consumed/necessary, if the returned value
1311  *   is lesser or equal to @p flow_size, the flow has fully been converted and
1312  *   can be applied, otherwise another call with this returned memory size
1313  *   should be done.
1314  *   On error, a negative errno value is returned and rte_errno is set.
1315  */
1316 static int
1317 mlx5_flow_merge(struct rte_eth_dev *dev, struct rte_flow *flow,
1318                 const size_t flow_size,
1319                 const struct rte_flow_attr *attributes,
1320                 const struct rte_flow_item pattern[],
1321                 const struct rte_flow_action actions[],
1322                 struct rte_flow_error *error)
1323 {
1324         struct rte_flow local_flow = { .layers = 0, };
1325         size_t size = sizeof(*flow) + sizeof(struct ibv_flow_attr);
1326         int remain = (flow_size > size) ? flow_size - size : 0;
1327         int ret;
1328
1329         if (!remain)
1330                 flow = &local_flow;
1331         ret = mlx5_flow_attributes(dev, attributes, flow, error);
1332         if (ret < 0)
1333                 return ret;
1334         ret = mlx5_flow_items(pattern, flow, remain, error);
1335         if (ret < 0)
1336                 return ret;
1337         size += ret;
1338         remain = (flow_size > size) ? flow_size - size : 0;
1339         ret = mlx5_flow_actions(dev, actions, flow, remain, error);
1340         if (ret < 0)
1341                 return ret;
1342         size += ret;
1343         if (size <= flow_size)
1344                 flow->verbs.attr->priority = flow->attributes.priority;
1345         return size;
1346 }
1347
1348 /**
1349  * Mark the Rx queues mark flag if the flow has a mark or flag modifier.
1350  *
1351  * @param[in] dev
1352  *   Pointer to Ethernet device.
1353  * @param[in] flow
1354  *   Pointer to flow structure.
1355  */
1356 static void
1357 mlx5_flow_rxq_mark_set(struct rte_eth_dev *dev, struct rte_flow *flow)
1358 {
1359         struct priv *priv = dev->data->dev_private;
1360
1361         if (flow->modifier & (MLX5_FLOW_MOD_FLAG | MLX5_FLOW_MOD_MARK)) {
1362                 struct mlx5_rxq_ctrl *rxq_ctrl =
1363                         container_of((*priv->rxqs)[flow->queue],
1364                                      struct mlx5_rxq_ctrl, rxq);
1365
1366                 rxq_ctrl->rxq.mark = 1;
1367                 rxq_ctrl->flow_mark_n++;
1368         }
1369 }
1370
1371 /**
1372  * Clear the Rx queue mark associated with the @p flow if no other flow uses
1373  * it with a mark request.
1374  *
1375  * @param dev
1376  *   Pointer to Ethernet device.
1377  * @param[in] flow
1378  *   Pointer to the flow.
1379  */
1380 static void
1381 mlx5_flow_rxq_mark_trim(struct rte_eth_dev *dev, struct rte_flow *flow)
1382 {
1383         struct priv *priv = dev->data->dev_private;
1384
1385         if (flow->modifier & (MLX5_FLOW_MOD_FLAG | MLX5_FLOW_MOD_MARK)) {
1386                 struct mlx5_rxq_ctrl *rxq_ctrl =
1387                         container_of((*priv->rxqs)[flow->queue],
1388                                      struct mlx5_rxq_ctrl, rxq);
1389
1390                 rxq_ctrl->flow_mark_n--;
1391                 rxq_ctrl->rxq.mark = !!rxq_ctrl->flow_mark_n;
1392         }
1393 }
1394
1395 /**
1396  * Clear the mark bit in all Rx queues.
1397  *
1398  * @param dev
1399  *   Pointer to Ethernet device.
1400  */
1401 static void
1402 mlx5_flow_rxq_mark_clear(struct rte_eth_dev *dev)
1403 {
1404         struct priv *priv = dev->data->dev_private;
1405         unsigned int i;
1406         unsigned int idx;
1407
1408         for (idx = 0, i = 0; idx != priv->rxqs_n; ++i) {
1409                 struct mlx5_rxq_ctrl *rxq_ctrl;
1410
1411                 if (!(*priv->rxqs)[idx])
1412                         continue;
1413                 rxq_ctrl = container_of((*priv->rxqs)[idx],
1414                                         struct mlx5_rxq_ctrl, rxq);
1415                 rxq_ctrl->flow_mark_n = 0;
1416                 rxq_ctrl->rxq.mark = 0;
1417                 ++idx;
1418         }
1419 }
1420
1421 /**
1422  * Validate a flow supported by the NIC.
1423  *
1424  * @see rte_flow_validate()
1425  * @see rte_flow_ops
1426  */
1427 int
1428 mlx5_flow_validate(struct rte_eth_dev *dev,
1429                    const struct rte_flow_attr *attr,
1430                    const struct rte_flow_item items[],
1431                    const struct rte_flow_action actions[],
1432                    struct rte_flow_error *error)
1433 {
1434         int ret = mlx5_flow_merge(dev, NULL, 0, attr, items, actions, error);
1435
1436         if (ret < 0)
1437                 return ret;
1438         return 0;
1439 }
1440
1441 /**
1442  * Remove the flow.
1443  *
1444  * @param[in] dev
1445  *   Pointer to Ethernet device.
1446  * @param[in, out] flow
1447  *   Pointer to flow structure.
1448  */
1449 static void
1450 mlx5_flow_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
1451 {
1452         if (flow->fate & MLX5_FLOW_FATE_DROP) {
1453                 if (flow->verbs.flow) {
1454                         claim_zero(mlx5_glue->destroy_flow(flow->verbs.flow));
1455                         flow->verbs.flow = NULL;
1456                 }
1457         }
1458         if (flow->verbs.hrxq) {
1459                 if (flow->fate & MLX5_FLOW_FATE_DROP)
1460                         mlx5_hrxq_drop_release(dev);
1461                 else if (flow->fate & MLX5_FLOW_FATE_QUEUE)
1462                         mlx5_hrxq_release(dev, flow->verbs.hrxq);
1463                 flow->verbs.hrxq = NULL;
1464         }
1465 }
1466
1467 /**
1468  * Apply the flow.
1469  *
1470  * @param[in] dev
1471  *   Pointer to Ethernet device structure.
1472  * @param[in, out] flow
1473  *   Pointer to flow structure.
1474  * @param[out] error
1475  *   Pointer to error structure.
1476  *
1477  * @return
1478  *   0 on success, a negative errno value otherwise and rte_errno is set.
1479  */
1480 static int
1481 mlx5_flow_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
1482                 struct rte_flow_error *error)
1483 {
1484         if (flow->fate & MLX5_FLOW_FATE_DROP) {
1485                 flow->verbs.hrxq = mlx5_hrxq_drop_new(dev);
1486                 if (!flow->verbs.hrxq)
1487                         return rte_flow_error_set
1488                                 (error, errno,
1489                                  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1490                                  NULL,
1491                                  "cannot allocate Drop queue");
1492         } else if (flow->fate & MLX5_FLOW_FATE_QUEUE) {
1493                 struct mlx5_hrxq *hrxq;
1494
1495                 hrxq = mlx5_hrxq_get(dev, rss_hash_default_key,
1496                                      MLX5_RSS_HASH_KEY_LEN, 0,
1497                                      &flow->queue, 1, 0, 0);
1498                 if (!hrxq)
1499                         hrxq = mlx5_hrxq_new(dev, rss_hash_default_key,
1500                                              MLX5_RSS_HASH_KEY_LEN, 0,
1501                                              &flow->queue, 1, 0, 0);
1502                 if (!hrxq)
1503                         return rte_flow_error_set(error, rte_errno,
1504                                         RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1505                                         NULL,
1506                                         "cannot create flow");
1507                 flow->verbs.hrxq = hrxq;
1508         }
1509         flow->verbs.flow =
1510                 mlx5_glue->create_flow(flow->verbs.hrxq->qp, flow->verbs.attr);
1511         if (!flow->verbs.flow) {
1512                 if (flow->fate & MLX5_FLOW_FATE_DROP)
1513                         mlx5_hrxq_drop_release(dev);
1514                 else
1515                         mlx5_hrxq_release(dev, flow->verbs.hrxq);
1516                 flow->verbs.hrxq = NULL;
1517                 return rte_flow_error_set(error, errno,
1518                                           RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1519                                           NULL,
1520                                           "kernel module refuses to create"
1521                                           " flow");
1522         }
1523         return 0;
1524 }
1525
1526 /**
1527  * Create a flow and add it to @p list.
1528  *
1529  * @param dev
1530  *   Pointer to Ethernet device.
1531  * @param list
1532  *   Pointer to a TAILQ flow list.
1533  * @param[in] attr
1534  *   Flow rule attributes.
1535  * @param[in] items
1536  *   Pattern specification (list terminated by the END pattern item).
1537  * @param[in] actions
1538  *   Associated actions (list terminated by the END action).
1539  * @param[out] error
1540  *   Perform verbose error reporting if not NULL.
1541  *
1542  * @return
1543  *   A flow on success, NULL otherwise and rte_errno is set.
1544  */
1545 static struct rte_flow *
1546 mlx5_flow_list_create(struct rte_eth_dev *dev,
1547                       struct mlx5_flows *list,
1548                       const struct rte_flow_attr *attr,
1549                       const struct rte_flow_item items[],
1550                       const struct rte_flow_action actions[],
1551                       struct rte_flow_error *error)
1552 {
1553         struct rte_flow *flow;
1554         size_t size;
1555         int ret;
1556
1557         ret = mlx5_flow_merge(dev, NULL, 0, attr, items, actions, error);
1558         if (ret < 0)
1559                 return NULL;
1560         size = ret;
1561         flow = rte_zmalloc(__func__, size, 0);
1562         if (!flow) {
1563                 rte_flow_error_set(error, ENOMEM,
1564                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1565                                    NULL,
1566                                    "cannot allocate memory");
1567                 return NULL;
1568         }
1569         flow->verbs.attr = (struct ibv_flow_attr *)(flow + 1);
1570         flow->verbs.specs = (uint8_t *)(flow->verbs.attr + 1);
1571         ret = mlx5_flow_merge(dev, flow, size, attr, items, actions, error);
1572         if (ret < 0)
1573                 goto error;
1574         assert((size_t)ret == size);
1575         if (dev->data->dev_started) {
1576                 ret = mlx5_flow_apply(dev, flow, error);
1577                 if (ret < 0)
1578                         goto error;
1579         }
1580         mlx5_flow_rxq_mark_set(dev, flow);
1581         TAILQ_INSERT_TAIL(list, flow, next);
1582         return flow;
1583 error:
1584         ret = rte_errno; /* Save rte_errno before cleanup. */
1585         mlx5_flow_remove(dev, flow);
1586         rte_free(flow);
1587         rte_errno = ret; /* Restore rte_errno. */
1588         return NULL;
1589 }
1590
1591 /**
1592  * Create a flow.
1593  *
1594  * @see rte_flow_create()
1595  * @see rte_flow_ops
1596  */
1597 struct rte_flow *
1598 mlx5_flow_create(struct rte_eth_dev *dev,
1599                  const struct rte_flow_attr *attr,
1600                  const struct rte_flow_item items[],
1601                  const struct rte_flow_action actions[],
1602                  struct rte_flow_error *error)
1603 {
1604         return mlx5_flow_list_create
1605                 (dev, &((struct priv *)dev->data->dev_private)->flows,
1606                  attr, items, actions, error);
1607 }
1608
1609 /**
1610  * Destroy a flow in a list.
1611  *
1612  * @param dev
1613  *   Pointer to Ethernet device.
1614  * @param list
1615  *   Pointer to a TAILQ flow list.
1616  * @param[in] flow
1617  *   Flow to destroy.
1618  */
1619 static void
1620 mlx5_flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list,
1621                        struct rte_flow *flow)
1622 {
1623         mlx5_flow_remove(dev, flow);
1624         TAILQ_REMOVE(list, flow, next);
1625         mlx5_flow_rxq_mark_trim(dev, flow);
1626         rte_free(flow);
1627 }
1628
1629 /**
1630  * Destroy all flows.
1631  *
1632  * @param dev
1633  *   Pointer to Ethernet device.
1634  * @param list
1635  *   Pointer to a TAILQ flow list.
1636  */
1637 void
1638 mlx5_flow_list_flush(struct rte_eth_dev *dev, struct mlx5_flows *list)
1639 {
1640         while (!TAILQ_EMPTY(list)) {
1641                 struct rte_flow *flow;
1642
1643                 flow = TAILQ_FIRST(list);
1644                 mlx5_flow_list_destroy(dev, list, flow);
1645         }
1646 }
1647
1648 /**
1649  * Remove all flows.
1650  *
1651  * @param dev
1652  *   Pointer to Ethernet device.
1653  * @param list
1654  *   Pointer to a TAILQ flow list.
1655  */
1656 void
1657 mlx5_flow_stop(struct rte_eth_dev *dev, struct mlx5_flows *list)
1658 {
1659         struct rte_flow *flow;
1660
1661         TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next)
1662                 mlx5_flow_remove(dev, flow);
1663         mlx5_flow_rxq_mark_clear(dev);
1664 }
1665
1666 /**
1667  * Add all flows.
1668  *
1669  * @param dev
1670  *   Pointer to Ethernet device.
1671  * @param list
1672  *   Pointer to a TAILQ flow list.
1673  *
1674  * @return
1675  *   0 on success, a negative errno value otherwise and rte_errno is set.
1676  */
1677 int
1678 mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list)
1679 {
1680         struct rte_flow *flow;
1681         struct rte_flow_error error;
1682         int ret = 0;
1683
1684         TAILQ_FOREACH(flow, list, next) {
1685                 ret = mlx5_flow_apply(dev, flow, &error);
1686                 if (ret < 0)
1687                         goto error;
1688                 mlx5_flow_rxq_mark_set(dev, flow);
1689         }
1690         return 0;
1691 error:
1692         ret = rte_errno; /* Save rte_errno before cleanup. */
1693         mlx5_flow_stop(dev, list);
1694         rte_errno = ret; /* Restore rte_errno. */
1695         return -rte_errno;
1696 }
1697
1698 /**
1699  * Verify the flow list is empty
1700  *
1701  * @param dev
1702  *  Pointer to Ethernet device.
1703  *
1704  * @return the number of flows not released.
1705  */
1706 int
1707 mlx5_flow_verify(struct rte_eth_dev *dev)
1708 {
1709         struct priv *priv = dev->data->dev_private;
1710         struct rte_flow *flow;
1711         int ret = 0;
1712
1713         TAILQ_FOREACH(flow, &priv->flows, next) {
1714                 DRV_LOG(DEBUG, "port %u flow %p still referenced",
1715                         dev->data->port_id, (void *)flow);
1716                 ++ret;
1717         }
1718         return ret;
1719 }
1720
1721 /**
1722  * Enable a control flow configured from the control plane.
1723  *
1724  * @param dev
1725  *   Pointer to Ethernet device.
1726  * @param eth_spec
1727  *   An Ethernet flow spec to apply.
1728  * @param eth_mask
1729  *   An Ethernet flow mask to apply.
1730  * @param vlan_spec
1731  *   A VLAN flow spec to apply.
1732  * @param vlan_mask
1733  *   A VLAN flow mask to apply.
1734  *
1735  * @return
1736  *   0 on success, a negative errno value otherwise and rte_errno is set.
1737  */
1738 int
1739 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
1740                     struct rte_flow_item_eth *eth_spec,
1741                     struct rte_flow_item_eth *eth_mask,
1742                     struct rte_flow_item_vlan *vlan_spec,
1743                     struct rte_flow_item_vlan *vlan_mask)
1744 {
1745         struct priv *priv = dev->data->dev_private;
1746         const struct rte_flow_attr attr = {
1747                 .ingress = 1,
1748                 .priority = priv->config.flow_prio - 1,
1749         };
1750         struct rte_flow_item items[] = {
1751                 {
1752                         .type = RTE_FLOW_ITEM_TYPE_ETH,
1753                         .spec = eth_spec,
1754                         .last = NULL,
1755                         .mask = eth_mask,
1756                 },
1757                 {
1758                         .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
1759                                 RTE_FLOW_ITEM_TYPE_END,
1760                         .spec = vlan_spec,
1761                         .last = NULL,
1762                         .mask = vlan_mask,
1763                 },
1764                 {
1765                         .type = RTE_FLOW_ITEM_TYPE_END,
1766                 },
1767         };
1768         uint16_t queue[priv->reta_idx_n];
1769         struct rte_flow_action_rss action_rss = {
1770                 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
1771                 .level = 0,
1772                 .types = priv->rss_conf.rss_hf,
1773                 .key_len = priv->rss_conf.rss_key_len,
1774                 .queue_num = priv->reta_idx_n,
1775                 .key = priv->rss_conf.rss_key,
1776                 .queue = queue,
1777         };
1778         struct rte_flow_action actions[] = {
1779                 {
1780                         .type = RTE_FLOW_ACTION_TYPE_RSS,
1781                         .conf = &action_rss,
1782                 },
1783                 {
1784                         .type = RTE_FLOW_ACTION_TYPE_END,
1785                 },
1786         };
1787         struct rte_flow *flow;
1788         struct rte_flow_error error;
1789         unsigned int i;
1790
1791         if (!priv->reta_idx_n) {
1792                 rte_errno = EINVAL;
1793                 return -rte_errno;
1794         }
1795         for (i = 0; i != priv->reta_idx_n; ++i)
1796                 queue[i] = (*priv->reta_idx)[i];
1797         flow = mlx5_flow_list_create(dev, &priv->ctrl_flows, &attr, items,
1798                                      actions, &error);
1799         if (!flow)
1800                 return -rte_errno;
1801         return 0;
1802 }
1803
1804 /**
1805  * Enable a flow control configured from the control plane.
1806  *
1807  * @param dev
1808  *   Pointer to Ethernet device.
1809  * @param eth_spec
1810  *   An Ethernet flow spec to apply.
1811  * @param eth_mask
1812  *   An Ethernet flow mask to apply.
1813  *
1814  * @return
1815  *   0 on success, a negative errno value otherwise and rte_errno is set.
1816  */
1817 int
1818 mlx5_ctrl_flow(struct rte_eth_dev *dev,
1819                struct rte_flow_item_eth *eth_spec,
1820                struct rte_flow_item_eth *eth_mask)
1821 {
1822         return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
1823 }
1824
1825 /**
1826  * Destroy a flow.
1827  *
1828  * @see rte_flow_destroy()
1829  * @see rte_flow_ops
1830  */
1831 int
1832 mlx5_flow_destroy(struct rte_eth_dev *dev,
1833                   struct rte_flow *flow,
1834                   struct rte_flow_error *error __rte_unused)
1835 {
1836         struct priv *priv = dev->data->dev_private;
1837
1838         mlx5_flow_list_destroy(dev, &priv->flows, flow);
1839         return 0;
1840 }
1841
1842 /**
1843  * Destroy all flows.
1844  *
1845  * @see rte_flow_flush()
1846  * @see rte_flow_ops
1847  */
1848 int
1849 mlx5_flow_flush(struct rte_eth_dev *dev,
1850                 struct rte_flow_error *error __rte_unused)
1851 {
1852         struct priv *priv = dev->data->dev_private;
1853
1854         mlx5_flow_list_flush(dev, &priv->flows);
1855         return 0;
1856 }
1857
1858 /**
1859  * Isolated mode.
1860  *
1861  * @see rte_flow_isolate()
1862  * @see rte_flow_ops
1863  */
1864 int
1865 mlx5_flow_isolate(struct rte_eth_dev *dev,
1866                   int enable,
1867                   struct rte_flow_error *error)
1868 {
1869         struct priv *priv = dev->data->dev_private;
1870
1871         if (dev->data->dev_started) {
1872                 rte_flow_error_set(error, EBUSY,
1873                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1874                                    NULL,
1875                                    "port must be stopped first");
1876                 return -rte_errno;
1877         }
1878         priv->isolated = !!enable;
1879         if (enable)
1880                 dev->dev_ops = &mlx5_dev_ops_isolate;
1881         else
1882                 dev->dev_ops = &mlx5_dev_ops;
1883         return 0;
1884 }
1885
1886 /**
1887  * Convert a flow director filter to a generic flow.
1888  *
1889  * @param dev
1890  *   Pointer to Ethernet device.
1891  * @param fdir_filter
1892  *   Flow director filter to add.
1893  * @param attributes
1894  *   Generic flow parameters structure.
1895  *
1896  * @return
1897  *   0 on success, a negative errno value otherwise and rte_errno is set.
1898  */
1899 static int
1900 mlx5_fdir_filter_convert(struct rte_eth_dev *dev,
1901                          const struct rte_eth_fdir_filter *fdir_filter,
1902                          struct mlx5_fdir *attributes)
1903 {
1904         struct priv *priv = dev->data->dev_private;
1905         const struct rte_eth_fdir_input *input = &fdir_filter->input;
1906         const struct rte_eth_fdir_masks *mask =
1907                 &dev->data->dev_conf.fdir_conf.mask;
1908
1909         /* Validate queue number. */
1910         if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
1911                 DRV_LOG(ERR, "port %u invalid queue number %d",
1912                         dev->data->port_id, fdir_filter->action.rx_queue);
1913                 rte_errno = EINVAL;
1914                 return -rte_errno;
1915         }
1916         attributes->attr.ingress = 1;
1917         attributes->items[0] = (struct rte_flow_item) {
1918                 .type = RTE_FLOW_ITEM_TYPE_ETH,
1919                 .spec = &attributes->l2,
1920                 .mask = &attributes->l2_mask,
1921         };
1922         switch (fdir_filter->action.behavior) {
1923         case RTE_ETH_FDIR_ACCEPT:
1924                 attributes->actions[0] = (struct rte_flow_action){
1925                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
1926                         .conf = &attributes->queue,
1927                 };
1928                 break;
1929         case RTE_ETH_FDIR_REJECT:
1930                 attributes->actions[0] = (struct rte_flow_action){
1931                         .type = RTE_FLOW_ACTION_TYPE_DROP,
1932                 };
1933                 break;
1934         default:
1935                 DRV_LOG(ERR, "port %u invalid behavior %d",
1936                         dev->data->port_id,
1937                         fdir_filter->action.behavior);
1938                 rte_errno = ENOTSUP;
1939                 return -rte_errno;
1940         }
1941         attributes->queue.index = fdir_filter->action.rx_queue;
1942         /* Handle L3. */
1943         switch (fdir_filter->input.flow_type) {
1944         case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
1945         case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
1946         case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
1947                 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
1948                         .src_addr = input->flow.ip4_flow.src_ip,
1949                         .dst_addr = input->flow.ip4_flow.dst_ip,
1950                         .time_to_live = input->flow.ip4_flow.ttl,
1951                         .type_of_service = input->flow.ip4_flow.tos,
1952                         .next_proto_id = input->flow.ip4_flow.proto,
1953                 };
1954                 attributes->l3_mask.ipv4.hdr = (struct ipv4_hdr){
1955                         .src_addr = mask->ipv4_mask.src_ip,
1956                         .dst_addr = mask->ipv4_mask.dst_ip,
1957                         .time_to_live = mask->ipv4_mask.ttl,
1958                         .type_of_service = mask->ipv4_mask.tos,
1959                         .next_proto_id = mask->ipv4_mask.proto,
1960                 };
1961                 attributes->items[1] = (struct rte_flow_item){
1962                         .type = RTE_FLOW_ITEM_TYPE_IPV4,
1963                         .spec = &attributes->l3,
1964                         .mask = &attributes->l3_mask,
1965                 };
1966                 break;
1967         case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
1968         case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
1969         case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
1970                 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
1971                         .hop_limits = input->flow.ipv6_flow.hop_limits,
1972                         .proto = input->flow.ipv6_flow.proto,
1973                 };
1974
1975                 memcpy(attributes->l3.ipv6.hdr.src_addr,
1976                        input->flow.ipv6_flow.src_ip,
1977                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
1978                 memcpy(attributes->l3.ipv6.hdr.dst_addr,
1979                        input->flow.ipv6_flow.dst_ip,
1980                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
1981                 memcpy(attributes->l3_mask.ipv6.hdr.src_addr,
1982                        mask->ipv6_mask.src_ip,
1983                        RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
1984                 memcpy(attributes->l3_mask.ipv6.hdr.dst_addr,
1985                        mask->ipv6_mask.dst_ip,
1986                        RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
1987                 attributes->items[1] = (struct rte_flow_item){
1988                         .type = RTE_FLOW_ITEM_TYPE_IPV6,
1989                         .spec = &attributes->l3,
1990                         .mask = &attributes->l3_mask,
1991                 };
1992                 break;
1993         default:
1994                 DRV_LOG(ERR, "port %u invalid flow type%d",
1995                         dev->data->port_id, fdir_filter->input.flow_type);
1996                 rte_errno = ENOTSUP;
1997                 return -rte_errno;
1998         }
1999         /* Handle L4. */
2000         switch (fdir_filter->input.flow_type) {
2001         case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
2002                 attributes->l4.udp.hdr = (struct udp_hdr){
2003                         .src_port = input->flow.udp4_flow.src_port,
2004                         .dst_port = input->flow.udp4_flow.dst_port,
2005                 };
2006                 attributes->l4_mask.udp.hdr = (struct udp_hdr){
2007                         .src_port = mask->src_port_mask,
2008                         .dst_port = mask->dst_port_mask,
2009                 };
2010                 attributes->items[2] = (struct rte_flow_item){
2011                         .type = RTE_FLOW_ITEM_TYPE_UDP,
2012                         .spec = &attributes->l4,
2013                         .mask = &attributes->l4_mask,
2014                 };
2015                 break;
2016         case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
2017                 attributes->l4.tcp.hdr = (struct tcp_hdr){
2018                         .src_port = input->flow.tcp4_flow.src_port,
2019                         .dst_port = input->flow.tcp4_flow.dst_port,
2020                 };
2021                 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
2022                         .src_port = mask->src_port_mask,
2023                         .dst_port = mask->dst_port_mask,
2024                 };
2025                 attributes->items[2] = (struct rte_flow_item){
2026                         .type = RTE_FLOW_ITEM_TYPE_TCP,
2027                         .spec = &attributes->l4,
2028                         .mask = &attributes->l4_mask,
2029                 };
2030                 break;
2031         case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
2032                 attributes->l4.udp.hdr = (struct udp_hdr){
2033                         .src_port = input->flow.udp6_flow.src_port,
2034                         .dst_port = input->flow.udp6_flow.dst_port,
2035                 };
2036                 attributes->l4_mask.udp.hdr = (struct udp_hdr){
2037                         .src_port = mask->src_port_mask,
2038                         .dst_port = mask->dst_port_mask,
2039                 };
2040                 attributes->items[2] = (struct rte_flow_item){
2041                         .type = RTE_FLOW_ITEM_TYPE_UDP,
2042                         .spec = &attributes->l4,
2043                         .mask = &attributes->l4_mask,
2044                 };
2045                 break;
2046         case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
2047                 attributes->l4.tcp.hdr = (struct tcp_hdr){
2048                         .src_port = input->flow.tcp6_flow.src_port,
2049                         .dst_port = input->flow.tcp6_flow.dst_port,
2050                 };
2051                 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
2052                         .src_port = mask->src_port_mask,
2053                         .dst_port = mask->dst_port_mask,
2054                 };
2055                 attributes->items[2] = (struct rte_flow_item){
2056                         .type = RTE_FLOW_ITEM_TYPE_TCP,
2057                         .spec = &attributes->l4,
2058                         .mask = &attributes->l4_mask,
2059                 };
2060                 break;
2061         case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
2062         case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
2063                 break;
2064         default:
2065                 DRV_LOG(ERR, "port %u invalid flow type%d",
2066                         dev->data->port_id, fdir_filter->input.flow_type);
2067                 rte_errno = ENOTSUP;
2068                 return -rte_errno;
2069         }
2070         return 0;
2071 }
2072
2073 /**
2074  * Add new flow director filter and store it in list.
2075  *
2076  * @param dev
2077  *   Pointer to Ethernet device.
2078  * @param fdir_filter
2079  *   Flow director filter to add.
2080  *
2081  * @return
2082  *   0 on success, a negative errno value otherwise and rte_errno is set.
2083  */
2084 static int
2085 mlx5_fdir_filter_add(struct rte_eth_dev *dev,
2086                      const struct rte_eth_fdir_filter *fdir_filter)
2087 {
2088         struct priv *priv = dev->data->dev_private;
2089         struct mlx5_fdir attributes = {
2090                 .attr.group = 0,
2091                 .l2_mask = {
2092                         .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2093                         .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2094                         .type = 0,
2095                 },
2096         };
2097         struct rte_flow_error error;
2098         struct rte_flow *flow;
2099         int ret;
2100
2101         ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
2102         if (ret)
2103                 return ret;
2104         flow = mlx5_flow_list_create(dev, &priv->flows, &attributes.attr,
2105                                      attributes.items, attributes.actions,
2106                                      &error);
2107         if (flow) {
2108                 DRV_LOG(DEBUG, "port %u FDIR created %p", dev->data->port_id,
2109                         (void *)flow);
2110                 return 0;
2111         }
2112         return -rte_errno;
2113 }
2114
2115 /**
2116  * Delete specific filter.
2117  *
2118  * @param dev
2119  *   Pointer to Ethernet device.
2120  * @param fdir_filter
2121  *   Filter to be deleted.
2122  *
2123  * @return
2124  *   0 on success, a negative errno value otherwise and rte_errno is set.
2125  */
2126 static int
2127 mlx5_fdir_filter_delete(struct rte_eth_dev *dev __rte_unused,
2128                         const struct rte_eth_fdir_filter *fdir_filter
2129                         __rte_unused)
2130 {
2131         rte_errno = ENOTSUP;
2132         return -rte_errno;
2133 }
2134
2135 /**
2136  * Update queue for specific filter.
2137  *
2138  * @param dev
2139  *   Pointer to Ethernet device.
2140  * @param fdir_filter
2141  *   Filter to be updated.
2142  *
2143  * @return
2144  *   0 on success, a negative errno value otherwise and rte_errno is set.
2145  */
2146 static int
2147 mlx5_fdir_filter_update(struct rte_eth_dev *dev,
2148                         const struct rte_eth_fdir_filter *fdir_filter)
2149 {
2150         int ret;
2151
2152         ret = mlx5_fdir_filter_delete(dev, fdir_filter);
2153         if (ret)
2154                 return ret;
2155         return mlx5_fdir_filter_add(dev, fdir_filter);
2156 }
2157
2158 /**
2159  * Flush all filters.
2160  *
2161  * @param dev
2162  *   Pointer to Ethernet device.
2163  */
2164 static void
2165 mlx5_fdir_filter_flush(struct rte_eth_dev *dev)
2166 {
2167         struct priv *priv = dev->data->dev_private;
2168
2169         mlx5_flow_list_flush(dev, &priv->flows);
2170 }
2171
2172 /**
2173  * Get flow director information.
2174  *
2175  * @param dev
2176  *   Pointer to Ethernet device.
2177  * @param[out] fdir_info
2178  *   Resulting flow director information.
2179  */
2180 static void
2181 mlx5_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info)
2182 {
2183         struct rte_eth_fdir_masks *mask =
2184                 &dev->data->dev_conf.fdir_conf.mask;
2185
2186         fdir_info->mode = dev->data->dev_conf.fdir_conf.mode;
2187         fdir_info->guarant_spc = 0;
2188         rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
2189         fdir_info->max_flexpayload = 0;
2190         fdir_info->flow_types_mask[0] = 0;
2191         fdir_info->flex_payload_unit = 0;
2192         fdir_info->max_flex_payload_segment_num = 0;
2193         fdir_info->flex_payload_limit = 0;
2194         memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
2195 }
2196
2197 /**
2198  * Deal with flow director operations.
2199  *
2200  * @param dev
2201  *   Pointer to Ethernet device.
2202  * @param filter_op
2203  *   Operation to perform.
2204  * @param arg
2205  *   Pointer to operation-specific structure.
2206  *
2207  * @return
2208  *   0 on success, a negative errno value otherwise and rte_errno is set.
2209  */
2210 static int
2211 mlx5_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op,
2212                     void *arg)
2213 {
2214         enum rte_fdir_mode fdir_mode =
2215                 dev->data->dev_conf.fdir_conf.mode;
2216
2217         if (filter_op == RTE_ETH_FILTER_NOP)
2218                 return 0;
2219         if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
2220             fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
2221                 DRV_LOG(ERR, "port %u flow director mode %d not supported",
2222                         dev->data->port_id, fdir_mode);
2223                 rte_errno = EINVAL;
2224                 return -rte_errno;
2225         }
2226         switch (filter_op) {
2227         case RTE_ETH_FILTER_ADD:
2228                 return mlx5_fdir_filter_add(dev, arg);
2229         case RTE_ETH_FILTER_UPDATE:
2230                 return mlx5_fdir_filter_update(dev, arg);
2231         case RTE_ETH_FILTER_DELETE:
2232                 return mlx5_fdir_filter_delete(dev, arg);
2233         case RTE_ETH_FILTER_FLUSH:
2234                 mlx5_fdir_filter_flush(dev);
2235                 break;
2236         case RTE_ETH_FILTER_INFO:
2237                 mlx5_fdir_info_get(dev, arg);
2238                 break;
2239         default:
2240                 DRV_LOG(DEBUG, "port %u unknown operation %u",
2241                         dev->data->port_id, filter_op);
2242                 rte_errno = EINVAL;
2243                 return -rte_errno;
2244         }
2245         return 0;
2246 }
2247
2248 /**
2249  * Manage filter operations.
2250  *
2251  * @param dev
2252  *   Pointer to Ethernet device structure.
2253  * @param filter_type
2254  *   Filter type.
2255  * @param filter_op
2256  *   Operation to perform.
2257  * @param arg
2258  *   Pointer to operation-specific structure.
2259  *
2260  * @return
2261  *   0 on success, a negative errno value otherwise and rte_errno is set.
2262  */
2263 int
2264 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
2265                      enum rte_filter_type filter_type,
2266                      enum rte_filter_op filter_op,
2267                      void *arg)
2268 {
2269         switch (filter_type) {
2270         case RTE_ETH_FILTER_GENERIC:
2271                 if (filter_op != RTE_ETH_FILTER_GET) {
2272                         rte_errno = EINVAL;
2273                         return -rte_errno;
2274                 }
2275                 *(const void **)arg = &mlx5_flow_ops;
2276                 return 0;
2277         case RTE_ETH_FILTER_FDIR:
2278                 return mlx5_fdir_ctrl_func(dev, filter_op, arg);
2279         default:
2280                 DRV_LOG(ERR, "port %u filter type (%d) not supported",
2281                         dev->data->port_id, filter_type);
2282                 rte_errno = ENOTSUP;
2283                 return -rte_errno;
2284         }
2285         return 0;
2286 }