net/mlx5: add flow TCP item
[dpdk.git] / drivers / net / mlx5 / mlx5_flow.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2016 6WIND S.A.
3  * Copyright 2016 Mellanox Technologies, Ltd
4  */
5
6 #include <sys/queue.h>
7 #include <stdint.h>
8 #include <string.h>
9
10 /* Verbs header. */
11 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
12 #ifdef PEDANTIC
13 #pragma GCC diagnostic ignored "-Wpedantic"
14 #endif
15 #include <infiniband/verbs.h>
16 #ifdef PEDANTIC
17 #pragma GCC diagnostic error "-Wpedantic"
18 #endif
19
20 #include <rte_common.h>
21 #include <rte_ether.h>
22 #include <rte_eth_ctrl.h>
23 #include <rte_ethdev_driver.h>
24 #include <rte_flow.h>
25 #include <rte_flow_driver.h>
26 #include <rte_malloc.h>
27 #include <rte_ip.h>
28
29 #include "mlx5.h"
30 #include "mlx5_defs.h"
31 #include "mlx5_prm.h"
32 #include "mlx5_glue.h"
33
34 /* Dev ops structure defined in mlx5.c */
35 extern const struct eth_dev_ops mlx5_dev_ops;
36 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
37
38 /* Pattern Layer bits. */
39 #define MLX5_FLOW_LAYER_OUTER_L2 (1u << 0)
40 #define MLX5_FLOW_LAYER_OUTER_L3_IPV4 (1u << 1)
41 #define MLX5_FLOW_LAYER_OUTER_L3_IPV6 (1u << 2)
42 #define MLX5_FLOW_LAYER_OUTER_L4_UDP (1u << 3)
43 #define MLX5_FLOW_LAYER_OUTER_L4_TCP (1u << 4)
44 #define MLX5_FLOW_LAYER_OUTER_VLAN (1u << 5)
45 /* Masks. */
46 #define MLX5_FLOW_LAYER_OUTER_L3 \
47         (MLX5_FLOW_LAYER_OUTER_L3_IPV4 | MLX5_FLOW_LAYER_OUTER_L3_IPV6)
48 #define MLX5_FLOW_LAYER_OUTER_L4 \
49         (MLX5_FLOW_LAYER_OUTER_L4_UDP | MLX5_FLOW_LAYER_OUTER_L4_TCP)
50
51 /* Actions that modify the fate of matching traffic. */
52 #define MLX5_FLOW_FATE_DROP (1u << 0)
53 #define MLX5_FLOW_FATE_QUEUE (1u << 1)
54
55 /* possible L3 layers protocols filtering. */
56 #define MLX5_IP_PROTOCOL_TCP 6
57 #define MLX5_IP_PROTOCOL_UDP 17
58
59 /** Handles information leading to a drop fate. */
60 struct mlx5_flow_verbs {
61         unsigned int size; /**< Size of the attribute. */
62         struct {
63                 struct ibv_flow_attr *attr;
64                 /**< Pointer to the Specification buffer. */
65                 uint8_t *specs; /**< Pointer to the specifications. */
66         };
67         struct ibv_flow *flow; /**< Verbs flow pointer. */
68         struct mlx5_hrxq *hrxq; /**< Hash Rx queue object. */
69 };
70
71 /* Flow structure. */
72 struct rte_flow {
73         TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
74         struct rte_flow_attr attributes; /**< User flow attribute. */
75         uint32_t l3_protocol_en:1; /**< Protocol filtering requested. */
76         uint32_t layers;
77         /**< Bit-fields of present layers see MLX5_FLOW_LAYER_*. */
78         uint32_t fate;
79         /**< Bit-fields of present fate see MLX5_FLOW_FATE_*. */
80         uint8_t l3_protocol; /**< valid when l3_protocol_en is set. */
81         struct mlx5_flow_verbs verbs; /* Verbs flow. */
82         uint16_t queue; /**< Destination queue to redirect traffic to. */
83 };
84
85 static const struct rte_flow_ops mlx5_flow_ops = {
86         .validate = mlx5_flow_validate,
87         .create = mlx5_flow_create,
88         .destroy = mlx5_flow_destroy,
89         .flush = mlx5_flow_flush,
90         .isolate = mlx5_flow_isolate,
91 };
92
93 /* Convert FDIR request to Generic flow. */
94 struct mlx5_fdir {
95         struct rte_flow_attr attr;
96         struct rte_flow_action actions[2];
97         struct rte_flow_item items[4];
98         struct rte_flow_item_eth l2;
99         struct rte_flow_item_eth l2_mask;
100         union {
101                 struct rte_flow_item_ipv4 ipv4;
102                 struct rte_flow_item_ipv6 ipv6;
103         } l3;
104         union {
105                 struct rte_flow_item_ipv4 ipv4;
106                 struct rte_flow_item_ipv6 ipv6;
107         } l3_mask;
108         union {
109                 struct rte_flow_item_udp udp;
110                 struct rte_flow_item_tcp tcp;
111         } l4;
112         union {
113                 struct rte_flow_item_udp udp;
114                 struct rte_flow_item_tcp tcp;
115         } l4_mask;
116         struct rte_flow_action_queue queue;
117 };
118
119 /* Verbs specification header. */
120 struct ibv_spec_header {
121         enum ibv_flow_spec_type type;
122         uint16_t size;
123 };
124
125  /**
126   * Discover the maximum number of priority available.
127   *
128   * @param[in] dev
129   *   Pointer to Ethernet device.
130   *
131   * @return
132   *   number of supported flow priority on success, a negative errno value
133   *   otherwise and rte_errno is set.
134   */
135 int
136 mlx5_flow_discover_priorities(struct rte_eth_dev *dev)
137 {
138         struct {
139                 struct ibv_flow_attr attr;
140                 struct ibv_flow_spec_eth eth;
141                 struct ibv_flow_spec_action_drop drop;
142         } flow_attr = {
143                 .attr = {
144                         .num_of_specs = 2,
145                 },
146                 .eth = {
147                         .type = IBV_FLOW_SPEC_ETH,
148                         .size = sizeof(struct ibv_flow_spec_eth),
149                 },
150                 .drop = {
151                         .size = sizeof(struct ibv_flow_spec_action_drop),
152                         .type = IBV_FLOW_SPEC_ACTION_DROP,
153                 },
154         };
155         struct ibv_flow *flow;
156         struct mlx5_hrxq *drop = mlx5_hrxq_drop_new(dev);
157         uint16_t vprio[] = { 8, 16 };
158         int i;
159
160         if (!drop) {
161                 rte_errno = ENOTSUP;
162                 return -rte_errno;
163         }
164         for (i = 0; i != RTE_DIM(vprio); i++) {
165                 flow_attr.attr.priority = vprio[i] - 1;
166                 flow = mlx5_glue->create_flow(drop->qp, &flow_attr.attr);
167                 if (!flow)
168                         break;
169                 claim_zero(mlx5_glue->destroy_flow(flow));
170         }
171         mlx5_hrxq_drop_release(dev);
172         DRV_LOG(INFO, "port %u flow maximum priority: %d",
173                 dev->data->port_id, vprio[i - 1]);
174         return vprio[i - 1];
175 }
176
177 /**
178  * Verify the @p attributes will be correctly understood by the NIC and store
179  * them in the @p flow if everything is correct.
180  *
181  * @param[in] dev
182  *   Pointer to Ethernet device.
183  * @param[in] attributes
184  *   Pointer to flow attributes
185  * @param[in, out] flow
186  *   Pointer to the rte_flow structure.
187  * @param[out] error
188  *   Pointer to error structure.
189  *
190  * @return
191  *   0 on success, a negative errno value otherwise and rte_errno is set.
192  */
193 static int
194 mlx5_flow_attributes(struct rte_eth_dev *dev,
195                      const struct rte_flow_attr *attributes,
196                      struct rte_flow *flow,
197                      struct rte_flow_error *error)
198 {
199         uint32_t priority_max =
200                 ((struct priv *)dev->data->dev_private)->config.flow_prio;
201
202         if (attributes->group)
203                 return rte_flow_error_set(error, ENOTSUP,
204                                           RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
205                                           NULL,
206                                           "groups is not supported");
207         if (attributes->priority >= priority_max)
208                 return rte_flow_error_set(error, ENOTSUP,
209                                           RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
210                                           NULL,
211                                           "priority out of range");
212         if (attributes->egress)
213                 return rte_flow_error_set(error, ENOTSUP,
214                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
215                                           NULL,
216                                           "egress is not supported");
217         if (attributes->transfer)
218                 return rte_flow_error_set(error, ENOTSUP,
219                                           RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
220                                           NULL,
221                                           "transfer is not supported");
222         if (!attributes->ingress)
223                 return rte_flow_error_set(error, ENOTSUP,
224                                           RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
225                                           NULL,
226                                           "ingress attribute is mandatory");
227         flow->attributes = *attributes;
228         return 0;
229 }
230
231 /**
232  * Verify the @p item specifications (spec, last, mask) are compatible with the
233  * NIC capabilities.
234  *
235  * @param[in] item
236  *   Item specification.
237  * @param[in] mask
238  *   @p item->mask or flow default bit-masks.
239  * @param[in] nic_mask
240  *   Bit-masks covering supported fields by the NIC to compare with user mask.
241  * @param[in] size
242  *   Bit-masks size in bytes.
243  * @param[out] error
244  *   Pointer to error structure.
245  *
246  * @return
247  *   0 on success, a negative errno value otherwise and rte_errno is set.
248  */
249 static int
250 mlx5_flow_item_acceptable(const struct rte_flow_item *item,
251                           const uint8_t *mask,
252                           const uint8_t *nic_mask,
253                           unsigned int size,
254                           struct rte_flow_error *error)
255 {
256         unsigned int i;
257
258         assert(nic_mask);
259         for (i = 0; i < size; ++i)
260                 if ((nic_mask[i] | mask[i]) != nic_mask[i])
261                         return rte_flow_error_set(error, ENOTSUP,
262                                                   RTE_FLOW_ERROR_TYPE_ITEM,
263                                                   item,
264                                                   "mask enables non supported"
265                                                   " bits");
266         if (!item->spec && (item->mask || item->last))
267                 return rte_flow_error_set(error, EINVAL,
268                                           RTE_FLOW_ERROR_TYPE_ITEM,
269                                           item,
270                                           "mask/last without a spec is not"
271                                           " supported");
272         if (item->spec && item->last) {
273                 uint8_t spec[size];
274                 uint8_t last[size];
275                 unsigned int i;
276                 int ret;
277
278                 for (i = 0; i < size; ++i) {
279                         spec[i] = ((const uint8_t *)item->spec)[i] & mask[i];
280                         last[i] = ((const uint8_t *)item->last)[i] & mask[i];
281                 }
282                 ret = memcmp(spec, last, size);
283                 if (ret != 0)
284                         return rte_flow_error_set(error, ENOTSUP,
285                                                   RTE_FLOW_ERROR_TYPE_ITEM,
286                                                   item,
287                                                   "range is not supported");
288         }
289         return 0;
290 }
291
292 /**
293  * Add a verbs specification into @p flow.
294  *
295  * @param[in, out] flow
296  *   Pointer to flow structure.
297  * @param[in] src
298  *   Create specification.
299  * @param[in] size
300  *   Size in bytes of the specification to copy.
301  */
302 static void
303 mlx5_flow_spec_verbs_add(struct rte_flow *flow, void *src, unsigned int size)
304 {
305         if (flow->verbs.specs) {
306                 void *dst;
307
308                 dst = (void *)(flow->verbs.specs + flow->verbs.size);
309                 memcpy(dst, src, size);
310                 ++flow->verbs.attr->num_of_specs;
311         }
312         flow->verbs.size += size;
313 }
314
315 /**
316  * Convert the @p item into a Verbs specification after ensuring the NIC
317  * will understand and process it correctly.
318  * If the necessary size for the conversion is greater than the @p flow_size,
319  * nothing is written in @p flow, the validation is still performed.
320  *
321  * @param[in] item
322  *   Item specification.
323  * @param[in, out] flow
324  *   Pointer to flow structure.
325  * @param[in] flow_size
326  *   Size in bytes of the available space in @p flow, if too small, nothing is
327  *   written.
328  * @param[out] error
329  *   Pointer to error structure.
330  *
331  * @return
332  *   On success the number of bytes consumed/necessary, if the returned value
333  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
334  *   otherwise another call with this returned memory size should be done.
335  *   On error, a negative errno value is returned and rte_errno is set.
336  */
337 static int
338 mlx5_flow_item_eth(const struct rte_flow_item *item, struct rte_flow *flow,
339                    const size_t flow_size, struct rte_flow_error *error)
340 {
341         const struct rte_flow_item_eth *spec = item->spec;
342         const struct rte_flow_item_eth *mask = item->mask;
343         const struct rte_flow_item_eth nic_mask = {
344                 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
345                 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
346                 .type = RTE_BE16(0xffff),
347         };
348         const unsigned int size = sizeof(struct ibv_flow_spec_eth);
349         struct ibv_flow_spec_eth eth = {
350                 .type = IBV_FLOW_SPEC_ETH,
351                 .size = size,
352         };
353         int ret;
354
355         if (flow->layers & MLX5_FLOW_LAYER_OUTER_L2)
356                 return rte_flow_error_set(error, ENOTSUP,
357                                           RTE_FLOW_ERROR_TYPE_ITEM,
358                                           item,
359                                           "L2 layers already configured");
360         if (!mask)
361                 mask = &rte_flow_item_eth_mask;
362         ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
363                                         (const uint8_t *)&nic_mask,
364                                         sizeof(struct rte_flow_item_eth),
365                                         error);
366         if (ret)
367                 return ret;
368         flow->layers |= MLX5_FLOW_LAYER_OUTER_L2;
369         if (size > flow_size)
370                 return size;
371         if (spec) {
372                 unsigned int i;
373
374                 memcpy(&eth.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
375                 memcpy(&eth.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
376                 eth.val.ether_type = spec->type;
377                 memcpy(&eth.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
378                 memcpy(&eth.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
379                 eth.mask.ether_type = mask->type;
380                 /* Remove unwanted bits from values. */
381                 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
382                         eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
383                         eth.val.src_mac[i] &= eth.mask.src_mac[i];
384                 }
385                 eth.val.ether_type &= eth.mask.ether_type;
386         }
387         mlx5_flow_spec_verbs_add(flow, &eth, size);
388         return size;
389 }
390
391 /**
392  * Update the VLAN tag in the Verbs Ethernet specification.
393  *
394  * @param[in, out] attr
395  *   Pointer to Verbs attributes structure.
396  * @param[in] eth
397  *   Verbs structure containing the VLAN information to copy.
398  */
399 static void
400 mlx5_flow_item_vlan_update(struct ibv_flow_attr *attr,
401                            struct ibv_flow_spec_eth *eth)
402 {
403         unsigned int i;
404         enum ibv_flow_spec_type search = IBV_FLOW_SPEC_ETH;
405         struct ibv_spec_header *hdr = (struct ibv_spec_header *)
406                 ((uint8_t *)attr + sizeof(struct ibv_flow_attr));
407
408         for (i = 0; i != attr->num_of_specs; ++i) {
409                 if (hdr->type == search) {
410                         struct ibv_flow_spec_eth *e =
411                                 (struct ibv_flow_spec_eth *)hdr;
412
413                         e->val.vlan_tag = eth->val.vlan_tag;
414                         e->mask.vlan_tag = eth->mask.vlan_tag;
415                         e->val.ether_type = eth->val.ether_type;
416                         e->mask.ether_type = eth->mask.ether_type;
417                         break;
418                 }
419                 hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size);
420         }
421 }
422
423 /**
424  * Convert the @p item into @p flow (or by updating the already present
425  * Ethernet Verbs) specification after ensuring the NIC will understand and
426  * process it correctly.
427  * If the necessary size for the conversion is greater than the @p flow_size,
428  * nothing is written in @p flow, the validation is still performed.
429  *
430  * @param[in] item
431  *   Item specification.
432  * @param[in, out] flow
433  *   Pointer to flow structure.
434  * @param[in] flow_size
435  *   Size in bytes of the available space in @p flow, if too small, nothing is
436  *   written.
437  * @param[out] error
438  *   Pointer to error structure.
439  *
440  * @return
441  *   On success the number of bytes consumed/necessary, if the returned value
442  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
443  *   otherwise another call with this returned memory size should be done.
444  *   On error, a negative errno value is returned and rte_errno is set.
445  */
446 static int
447 mlx5_flow_item_vlan(const struct rte_flow_item *item, struct rte_flow *flow,
448                     const size_t flow_size, struct rte_flow_error *error)
449 {
450         const struct rte_flow_item_vlan *spec = item->spec;
451         const struct rte_flow_item_vlan *mask = item->mask;
452         const struct rte_flow_item_vlan nic_mask = {
453                 .tci = RTE_BE16(0x0fff),
454                 .inner_type = RTE_BE16(0xffff),
455         };
456         unsigned int size = sizeof(struct ibv_flow_spec_eth);
457         struct ibv_flow_spec_eth eth = {
458                 .type = IBV_FLOW_SPEC_ETH,
459                 .size = size,
460         };
461         int ret;
462         const uint32_t l34m = MLX5_FLOW_LAYER_OUTER_L3 |
463                         MLX5_FLOW_LAYER_OUTER_L4;
464         const uint32_t vlanm = MLX5_FLOW_LAYER_OUTER_VLAN;
465         const uint32_t l2m = MLX5_FLOW_LAYER_OUTER_L2;
466
467         if (flow->layers & vlanm)
468                 return rte_flow_error_set(error, ENOTSUP,
469                                           RTE_FLOW_ERROR_TYPE_ITEM,
470                                           item,
471                                           "VLAN layer already configured");
472         else if ((flow->layers & l34m) != 0)
473                 return rte_flow_error_set(error, ENOTSUP,
474                                           RTE_FLOW_ERROR_TYPE_ITEM,
475                                           item,
476                                           "L2 layer cannot follow L3/L4 layer");
477         if (!mask)
478                 mask = &rte_flow_item_vlan_mask;
479         ret = mlx5_flow_item_acceptable
480                 (item, (const uint8_t *)mask,
481                  (const uint8_t *)&nic_mask,
482                  sizeof(struct rte_flow_item_vlan), error);
483         if (ret)
484                 return ret;
485         if (spec) {
486                 eth.val.vlan_tag = spec->tci;
487                 eth.mask.vlan_tag = mask->tci;
488                 eth.val.vlan_tag &= eth.mask.vlan_tag;
489                 eth.val.ether_type = spec->inner_type;
490                 eth.mask.ether_type = mask->inner_type;
491                 eth.val.ether_type &= eth.mask.ether_type;
492         }
493         /*
494          * From verbs perspective an empty VLAN is equivalent
495          * to a packet without VLAN layer.
496          */
497         if (!eth.mask.vlan_tag)
498                 return rte_flow_error_set(error, EINVAL,
499                                           RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
500                                           item->spec,
501                                           "VLAN cannot be empty");
502         if (!(flow->layers & l2m)) {
503                 if (size <= flow_size)
504                         mlx5_flow_spec_verbs_add(flow, &eth, size);
505         } else {
506                 if (flow->verbs.attr)
507                         mlx5_flow_item_vlan_update(flow->verbs.attr, &eth);
508                 size = 0; /* Only an update is done in eth specification. */
509         }
510         flow->layers |= MLX5_FLOW_LAYER_OUTER_L2 |
511                 MLX5_FLOW_LAYER_OUTER_VLAN;
512         return size;
513 }
514
515 /**
516  * Convert the @p item into a Verbs specification after ensuring the NIC
517  * will understand and process it correctly.
518  * If the necessary size for the conversion is greater than the @p flow_size,
519  * nothing is written in @p flow, the validation is still performed.
520  *
521  * @param[in] item
522  *   Item specification.
523  * @param[in, out] flow
524  *   Pointer to flow structure.
525  * @param[in] flow_size
526  *   Size in bytes of the available space in @p flow, if too small, nothing is
527  *   written.
528  * @param[out] error
529  *   Pointer to error structure.
530  *
531  * @return
532  *   On success the number of bytes consumed/necessary, if the returned value
533  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
534  *   otherwise another call with this returned memory size should be done.
535  *   On error, a negative errno value is returned and rte_errno is set.
536  */
537 static int
538 mlx5_flow_item_ipv4(const struct rte_flow_item *item, struct rte_flow *flow,
539                     const size_t flow_size, struct rte_flow_error *error)
540 {
541         const struct rte_flow_item_ipv4 *spec = item->spec;
542         const struct rte_flow_item_ipv4 *mask = item->mask;
543         const struct rte_flow_item_ipv4 nic_mask = {
544                 .hdr = {
545                         .src_addr = RTE_BE32(0xffffffff),
546                         .dst_addr = RTE_BE32(0xffffffff),
547                         .type_of_service = 0xff,
548                         .next_proto_id = 0xff,
549                 },
550         };
551         unsigned int size = sizeof(struct ibv_flow_spec_ipv4_ext);
552         struct ibv_flow_spec_ipv4_ext ipv4 = {
553                 .type = IBV_FLOW_SPEC_IPV4_EXT,
554                 .size = size,
555         };
556         int ret;
557
558         if (flow->layers & MLX5_FLOW_LAYER_OUTER_L3)
559                 return rte_flow_error_set(error, ENOTSUP,
560                                           RTE_FLOW_ERROR_TYPE_ITEM,
561                                           item,
562                                           "multiple L3 layers not supported");
563         else if (flow->layers & MLX5_FLOW_LAYER_OUTER_L4)
564                 return rte_flow_error_set(error, ENOTSUP,
565                                           RTE_FLOW_ERROR_TYPE_ITEM,
566                                           item,
567                                           "L3 cannot follow an L4 layer.");
568         if (!mask)
569                 mask = &rte_flow_item_ipv4_mask;
570         ret = mlx5_flow_item_acceptable
571                 (item, (const uint8_t *)mask,
572                  (const uint8_t *)&nic_mask,
573                  sizeof(struct rte_flow_item_ipv4), error);
574         if (ret < 0)
575                 return ret;
576         flow->layers |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
577         if (spec) {
578                 ipv4.val = (struct ibv_flow_ipv4_ext_filter){
579                         .src_ip = spec->hdr.src_addr,
580                         .dst_ip = spec->hdr.dst_addr,
581                         .proto = spec->hdr.next_proto_id,
582                         .tos = spec->hdr.type_of_service,
583                 };
584                 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
585                         .src_ip = mask->hdr.src_addr,
586                         .dst_ip = mask->hdr.dst_addr,
587                         .proto = mask->hdr.next_proto_id,
588                         .tos = mask->hdr.type_of_service,
589                 };
590                 /* Remove unwanted bits from values. */
591                 ipv4.val.src_ip &= ipv4.mask.src_ip;
592                 ipv4.val.dst_ip &= ipv4.mask.dst_ip;
593                 ipv4.val.proto &= ipv4.mask.proto;
594                 ipv4.val.tos &= ipv4.mask.tos;
595         }
596         flow->l3_protocol_en = !!ipv4.mask.proto;
597         flow->l3_protocol = ipv4.val.proto;
598         if (size <= flow_size)
599                 mlx5_flow_spec_verbs_add(flow, &ipv4, size);
600         return size;
601 }
602
603 /**
604  * Convert the @p item into a Verbs specification after ensuring the NIC
605  * will understand and process it correctly.
606  * If the necessary size for the conversion is greater than the @p flow_size,
607  * nothing is written in @p flow, the validation is still performed.
608  *
609  * @param[in] item
610  *   Item specification.
611  * @param[in, out] flow
612  *   Pointer to flow structure.
613  * @param[in] flow_size
614  *   Size in bytes of the available space in @p flow, if too small, nothing is
615  *   written.
616  * @param[out] error
617  *   Pointer to error structure.
618  *
619  * @return
620  *   On success the number of bytes consumed/necessary, if the returned value
621  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
622  *   otherwise another call with this returned memory size should be done.
623  *   On error, a negative errno value is returned and rte_errno is set.
624  */
625 static int
626 mlx5_flow_item_ipv6(const struct rte_flow_item *item, struct rte_flow *flow,
627                     const size_t flow_size, struct rte_flow_error *error)
628 {
629         const struct rte_flow_item_ipv6 *spec = item->spec;
630         const struct rte_flow_item_ipv6 *mask = item->mask;
631         const struct rte_flow_item_ipv6 nic_mask = {
632                 .hdr = {
633                         .src_addr =
634                                 "\xff\xff\xff\xff\xff\xff\xff\xff"
635                                 "\xff\xff\xff\xff\xff\xff\xff\xff",
636                         .dst_addr =
637                                 "\xff\xff\xff\xff\xff\xff\xff\xff"
638                                 "\xff\xff\xff\xff\xff\xff\xff\xff",
639                         .vtc_flow = RTE_BE32(0xffffffff),
640                         .proto = 0xff,
641                         .hop_limits = 0xff,
642                 },
643         };
644         unsigned int size = sizeof(struct ibv_flow_spec_ipv6);
645         struct ibv_flow_spec_ipv6 ipv6 = {
646                 .type = IBV_FLOW_SPEC_IPV6,
647                 .size = size,
648         };
649         int ret;
650
651         if (flow->layers & MLX5_FLOW_LAYER_OUTER_L3)
652                 return rte_flow_error_set(error, ENOTSUP,
653                                           RTE_FLOW_ERROR_TYPE_ITEM,
654                                           item,
655                                           "multiple L3 layers not supported");
656         else if (flow->layers & MLX5_FLOW_LAYER_OUTER_L4)
657                 return rte_flow_error_set(error, ENOTSUP,
658                                           RTE_FLOW_ERROR_TYPE_ITEM,
659                                           item,
660                                           "L3 cannot follow an L4 layer.");
661         if (!mask)
662                 mask = &rte_flow_item_ipv6_mask;
663         ret = mlx5_flow_item_acceptable
664                 (item, (const uint8_t *)mask,
665                  (const uint8_t *)&nic_mask,
666                  sizeof(struct rte_flow_item_ipv6), error);
667         if (ret < 0)
668                 return ret;
669         flow->layers |= MLX5_FLOW_LAYER_OUTER_L3_IPV6;
670         if (spec) {
671                 unsigned int i;
672                 uint32_t vtc_flow_val;
673                 uint32_t vtc_flow_mask;
674
675                 memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
676                        RTE_DIM(ipv6.val.src_ip));
677                 memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
678                        RTE_DIM(ipv6.val.dst_ip));
679                 memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
680                        RTE_DIM(ipv6.mask.src_ip));
681                 memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
682                        RTE_DIM(ipv6.mask.dst_ip));
683                 vtc_flow_val = rte_be_to_cpu_32(spec->hdr.vtc_flow);
684                 vtc_flow_mask = rte_be_to_cpu_32(mask->hdr.vtc_flow);
685                 ipv6.val.flow_label =
686                         rte_cpu_to_be_32((vtc_flow_val & IPV6_HDR_FL_MASK) >>
687                                          IPV6_HDR_FL_SHIFT);
688                 ipv6.val.traffic_class = (vtc_flow_val & IPV6_HDR_TC_MASK) >>
689                                          IPV6_HDR_TC_SHIFT;
690                 ipv6.val.next_hdr = spec->hdr.proto;
691                 ipv6.val.hop_limit = spec->hdr.hop_limits;
692                 ipv6.mask.flow_label =
693                         rte_cpu_to_be_32((vtc_flow_mask & IPV6_HDR_FL_MASK) >>
694                                          IPV6_HDR_FL_SHIFT);
695                 ipv6.mask.traffic_class = (vtc_flow_mask & IPV6_HDR_TC_MASK) >>
696                                           IPV6_HDR_TC_SHIFT;
697                 ipv6.mask.next_hdr = mask->hdr.proto;
698                 ipv6.mask.hop_limit = mask->hdr.hop_limits;
699                 /* Remove unwanted bits from values. */
700                 for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
701                         ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
702                         ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
703                 }
704                 ipv6.val.flow_label &= ipv6.mask.flow_label;
705                 ipv6.val.traffic_class &= ipv6.mask.traffic_class;
706                 ipv6.val.next_hdr &= ipv6.mask.next_hdr;
707                 ipv6.val.hop_limit &= ipv6.mask.hop_limit;
708         }
709         flow->l3_protocol_en = !!ipv6.mask.next_hdr;
710         flow->l3_protocol = ipv6.val.next_hdr;
711         if (size <= flow_size)
712                 mlx5_flow_spec_verbs_add(flow, &ipv6, size);
713         return size;
714 }
715
716 /**
717  * Convert the @p item into a Verbs specification after ensuring the NIC
718  * will understand and process it correctly.
719  * If the necessary size for the conversion is greater than the @p flow_size,
720  * nothing is written in @p flow, the validation is still performed.
721  *
722  * @param[in] item
723  *   Item specification.
724  * @param[in, out] flow
725  *   Pointer to flow structure.
726  * @param[in] flow_size
727  *   Size in bytes of the available space in @p flow, if too small, nothing is
728  *   written.
729  * @param[out] error
730  *   Pointer to error structure.
731  *
732  * @return
733  *   On success the number of bytes consumed/necessary, if the returned value
734  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
735  *   otherwise another call with this returned memory size should be done.
736  *   On error, a negative errno value is returned and rte_errno is set.
737  */
738 static int
739 mlx5_flow_item_udp(const struct rte_flow_item *item, struct rte_flow *flow,
740                    const size_t flow_size, struct rte_flow_error *error)
741 {
742         const struct rte_flow_item_udp *spec = item->spec;
743         const struct rte_flow_item_udp *mask = item->mask;
744         unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
745         struct ibv_flow_spec_tcp_udp udp = {
746                 .type = IBV_FLOW_SPEC_UDP,
747                 .size = size,
748         };
749         int ret;
750
751         if (!(flow->layers & MLX5_FLOW_LAYER_OUTER_L3))
752                 return rte_flow_error_set(error, ENOTSUP,
753                                           RTE_FLOW_ERROR_TYPE_ITEM,
754                                           item,
755                                           "L3 is mandatory to filter on L4");
756         if (flow->layers & MLX5_FLOW_LAYER_OUTER_L4)
757                 return rte_flow_error_set(error, ENOTSUP,
758                                           RTE_FLOW_ERROR_TYPE_ITEM,
759                                           item,
760                                           "L4 layer is already present");
761         if (flow->l3_protocol_en && flow->l3_protocol != MLX5_IP_PROTOCOL_UDP)
762                 return rte_flow_error_set(error, ENOTSUP,
763                                           RTE_FLOW_ERROR_TYPE_ITEM,
764                                           item,
765                                           "protocol filtering not compatible"
766                                           " with UDP layer");
767         if (!mask)
768                 mask = &rte_flow_item_udp_mask;
769         ret = mlx5_flow_item_acceptable
770                 (item, (const uint8_t *)mask,
771                  (const uint8_t *)&rte_flow_item_udp_mask,
772                  sizeof(struct rte_flow_item_udp), error);
773         if (ret < 0)
774                 return ret;
775         flow->layers |= MLX5_FLOW_LAYER_OUTER_L4_UDP;
776         if (size > flow_size)
777                 return size;
778         if (spec) {
779                 udp.val.dst_port = spec->hdr.dst_port;
780                 udp.val.src_port = spec->hdr.src_port;
781                 udp.mask.dst_port = mask->hdr.dst_port;
782                 udp.mask.src_port = mask->hdr.src_port;
783                 /* Remove unwanted bits from values. */
784                 udp.val.src_port &= udp.mask.src_port;
785                 udp.val.dst_port &= udp.mask.dst_port;
786         }
787         mlx5_flow_spec_verbs_add(flow, &udp, size);
788         return size;
789 }
790
791 /**
792  * Convert the @p item into a Verbs specification after ensuring the NIC
793  * will understand and process it correctly.
794  * If the necessary size for the conversion is greater than the @p flow_size,
795  * nothing is written in @p flow, the validation is still performed.
796  *
797  * @param[in] item
798  *   Item specification.
799  * @param[in, out] flow
800  *   Pointer to flow structure.
801  * @param[in] flow_size
802  *   Size in bytes of the available space in @p flow, if too small, nothing is
803  *   written.
804  * @param[out] error
805  *   Pointer to error structure.
806  *
807  * @return
808  *   On success the number of bytes consumed/necessary, if the returned value
809  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
810  *   otherwise another call with this returned memory size should be done.
811  *   On error, a negative errno value is returned and rte_errno is set.
812  */
813 static int
814 mlx5_flow_item_tcp(const struct rte_flow_item *item, struct rte_flow *flow,
815                    const size_t flow_size, struct rte_flow_error *error)
816 {
817         const struct rte_flow_item_tcp *spec = item->spec;
818         const struct rte_flow_item_tcp *mask = item->mask;
819         unsigned int size = sizeof(struct ibv_flow_spec_tcp_udp);
820         struct ibv_flow_spec_tcp_udp tcp = {
821                 .type = IBV_FLOW_SPEC_TCP,
822                 .size = size,
823         };
824         int ret;
825
826         if (flow->l3_protocol_en && flow->l3_protocol != MLX5_IP_PROTOCOL_TCP)
827                 return rte_flow_error_set(error, ENOTSUP,
828                                           RTE_FLOW_ERROR_TYPE_ITEM,
829                                           item,
830                                           "protocol filtering not compatible"
831                                           " with TCP layer");
832         if (!(flow->layers & MLX5_FLOW_LAYER_OUTER_L3))
833                 return rte_flow_error_set(error, ENOTSUP,
834                                           RTE_FLOW_ERROR_TYPE_ITEM,
835                                           item,
836                                           "L3 is mandatory to filter on L4");
837         if (flow->layers & MLX5_FLOW_LAYER_OUTER_L4)
838                 return rte_flow_error_set(error, ENOTSUP,
839                                           RTE_FLOW_ERROR_TYPE_ITEM,
840                                           item,
841                                           "L4 layer is already present");
842         if (!mask)
843                 mask = &rte_flow_item_tcp_mask;
844         ret = mlx5_flow_item_acceptable
845                 (item, (const uint8_t *)mask,
846                  (const uint8_t *)&rte_flow_item_tcp_mask,
847                  sizeof(struct rte_flow_item_tcp), error);
848         if (ret < 0)
849                 return ret;
850         flow->layers |= MLX5_FLOW_LAYER_OUTER_L4_TCP;
851         if (size > flow_size)
852                 return size;
853         if (spec) {
854                 tcp.val.dst_port = spec->hdr.dst_port;
855                 tcp.val.src_port = spec->hdr.src_port;
856                 tcp.mask.dst_port = mask->hdr.dst_port;
857                 tcp.mask.src_port = mask->hdr.src_port;
858                 /* Remove unwanted bits from values. */
859                 tcp.val.src_port &= tcp.mask.src_port;
860                 tcp.val.dst_port &= tcp.mask.dst_port;
861         }
862         mlx5_flow_spec_verbs_add(flow, &tcp, size);
863         return size;
864 }
865
866 /**
867  * Convert the @p pattern into a Verbs specifications after ensuring the NIC
868  * will understand and process it correctly.
869  * The conversion is performed item per item, each of them is written into
870  * the @p flow if its size is lesser or equal to @p flow_size.
871  * Validation and memory consumption computation are still performed until the
872  * end of @p pattern, unless an error is encountered.
873  *
874  * @param[in] pattern
875  *   Flow pattern.
876  * @param[in, out] flow
877  *   Pointer to the rte_flow structure.
878  * @param[in] flow_size
879  *   Size in bytes of the available space in @p flow, if too small some
880  *   garbage may be present.
881  * @param[out] error
882  *   Pointer to error structure.
883  *
884  * @return
885  *   On success the number of bytes consumed/necessary, if the returned value
886  *   is lesser or equal to @p flow_size, the @pattern  has fully been
887  *   converted, otherwise another call with this returned memory size should
888  *   be done.
889  *   On error, a negative errno value is returned and rte_errno is set.
890  */
891 static int
892 mlx5_flow_items(const struct rte_flow_item pattern[],
893                 struct rte_flow *flow, const size_t flow_size,
894                 struct rte_flow_error *error)
895 {
896         int remain = flow_size;
897         size_t size = 0;
898
899         for (; pattern->type != RTE_FLOW_ITEM_TYPE_END; pattern++) {
900                 int ret = 0;
901
902                 switch (pattern->type) {
903                 case RTE_FLOW_ITEM_TYPE_VOID:
904                         break;
905                 case RTE_FLOW_ITEM_TYPE_ETH:
906                         ret = mlx5_flow_item_eth(pattern, flow, remain, error);
907                         break;
908                 case RTE_FLOW_ITEM_TYPE_VLAN:
909                         ret = mlx5_flow_item_vlan(pattern, flow, remain, error);
910                         break;
911                 case RTE_FLOW_ITEM_TYPE_IPV4:
912                         ret = mlx5_flow_item_ipv4(pattern, flow, remain, error);
913                         break;
914                 case RTE_FLOW_ITEM_TYPE_IPV6:
915                         ret = mlx5_flow_item_ipv6(pattern, flow, remain, error);
916                         break;
917                 case RTE_FLOW_ITEM_TYPE_UDP:
918                         ret = mlx5_flow_item_udp(pattern, flow, remain, error);
919                         break;
920                 case RTE_FLOW_ITEM_TYPE_TCP:
921                         ret = mlx5_flow_item_tcp(pattern, flow, remain, error);
922                         break;
923                 default:
924                         return rte_flow_error_set(error, ENOTSUP,
925                                                   RTE_FLOW_ERROR_TYPE_ITEM,
926                                                   pattern,
927                                                   "item not supported");
928                 }
929                 if (ret < 0)
930                         return ret;
931                 if (remain > ret)
932                         remain -= ret;
933                 else
934                         remain = 0;
935                 size += ret;
936         }
937         if (!flow->layers) {
938                 const struct rte_flow_item item = {
939                         .type = RTE_FLOW_ITEM_TYPE_ETH,
940                 };
941
942                 return mlx5_flow_item_eth(&item, flow, flow_size, error);
943         }
944         return size;
945 }
946
947 /**
948  * Convert the @p action into a Verbs specification after ensuring the NIC
949  * will understand and process it correctly.
950  * If the necessary size for the conversion is greater than the @p flow_size,
951  * nothing is written in @p flow, the validation is still performed.
952  *
953  * @param[in] action
954  *   Action configuration.
955  * @param[in, out] flow
956  *   Pointer to flow structure.
957  * @param[in] flow_size
958  *   Size in bytes of the available space in @p flow, if too small, nothing is
959  *   written.
960  * @param[out] error
961  *   Pointer to error structure.
962  *
963  * @return
964  *   On success the number of bytes consumed/necessary, if the returned value
965  *   is lesser or equal to @p flow_size, the @p action has fully been
966  *   converted, otherwise another call with this returned memory size should
967  *   be done.
968  *   On error, a negative errno value is returned and rte_errno is set.
969  */
970 static int
971 mlx5_flow_action_drop(const struct rte_flow_action *action,
972                       struct rte_flow *flow, const size_t flow_size,
973                       struct rte_flow_error *error)
974 {
975         unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
976         struct ibv_flow_spec_action_drop drop = {
977                         .type = IBV_FLOW_SPEC_ACTION_DROP,
978                         .size = size,
979         };
980
981         if (flow->fate)
982                 return rte_flow_error_set(error, ENOTSUP,
983                                           RTE_FLOW_ERROR_TYPE_ACTION,
984                                           action,
985                                           "multiple fate actions are not"
986                                           " supported");
987         if (size < flow_size)
988                 mlx5_flow_spec_verbs_add(flow, &drop, size);
989         flow->fate |= MLX5_FLOW_FATE_DROP;
990         return size;
991 }
992
993 /**
994  * Convert the @p action into @p flow after ensuring the NIC will understand
995  * and process it correctly.
996  *
997  * @param[in] dev
998  *   Pointer to Ethernet device structure.
999  * @param[in] action
1000  *   Action configuration.
1001  * @param[in, out] flow
1002  *   Pointer to flow structure.
1003  * @param[out] error
1004  *   Pointer to error structure.
1005  *
1006  * @return
1007  *   0 on success, a negative errno value otherwise and rte_errno is set.
1008  */
1009 static int
1010 mlx5_flow_action_queue(struct rte_eth_dev *dev,
1011                        const struct rte_flow_action *action,
1012                        struct rte_flow *flow,
1013                        struct rte_flow_error *error)
1014 {
1015         struct priv *priv = dev->data->dev_private;
1016         const struct rte_flow_action_queue *queue = action->conf;
1017
1018         if (flow->fate)
1019                 return rte_flow_error_set(error, ENOTSUP,
1020                                           RTE_FLOW_ERROR_TYPE_ACTION,
1021                                           action,
1022                                           "multiple fate actions are not"
1023                                           " supported");
1024         if (queue->index >= priv->rxqs_n)
1025                 return rte_flow_error_set(error, EINVAL,
1026                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1027                                           &queue->index,
1028                                           "queue index out of range");
1029         if (!(*priv->rxqs)[queue->index])
1030                 return rte_flow_error_set(error, EINVAL,
1031                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
1032                                           &queue->index,
1033                                           "queue is not configured");
1034         flow->queue = queue->index;
1035         flow->fate |= MLX5_FLOW_FATE_QUEUE;
1036         return 0;
1037 }
1038
1039 /**
1040  * Convert the @p action into @p flow after ensuring the NIC will understand
1041  * and process it correctly.
1042  * The conversion is performed action per action, each of them is written into
1043  * the @p flow if its size is lesser or equal to @p flow_size.
1044  * Validation and memory consumption computation are still performed until the
1045  * end of @p action, unless an error is encountered.
1046  *
1047  * @param[in] dev
1048  *   Pointer to Ethernet device structure.
1049  * @param[in] actions
1050  *   Pointer to flow actions array.
1051  * @param[in, out] flow
1052  *   Pointer to the rte_flow structure.
1053  * @param[in] flow_size
1054  *   Size in bytes of the available space in @p flow, if too small some
1055  *   garbage may be present.
1056  * @param[out] error
1057  *   Pointer to error structure.
1058  *
1059  * @return
1060  *   On success the number of bytes consumed/necessary, if the returned value
1061  *   is lesser or equal to @p flow_size, the @p actions has fully been
1062  *   converted, otherwise another call with this returned memory size should
1063  *   be done.
1064  *   On error, a negative errno value is returned and rte_errno is set.
1065  */
1066 static int
1067 mlx5_flow_actions(struct rte_eth_dev *dev,
1068                   const struct rte_flow_action actions[],
1069                   struct rte_flow *flow, const size_t flow_size,
1070                   struct rte_flow_error *error)
1071 {
1072         size_t size = 0;
1073         int remain = flow_size;
1074         int ret = 0;
1075
1076         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
1077                 switch (actions->type) {
1078                 case RTE_FLOW_ACTION_TYPE_VOID:
1079                         break;
1080                 case RTE_FLOW_ACTION_TYPE_DROP:
1081                         ret = mlx5_flow_action_drop(actions, flow, remain,
1082                                                     error);
1083                         break;
1084                 case RTE_FLOW_ACTION_TYPE_QUEUE:
1085                         ret = mlx5_flow_action_queue(dev, actions, flow, error);
1086                         break;
1087                 default:
1088                         return rte_flow_error_set(error, ENOTSUP,
1089                                                   RTE_FLOW_ERROR_TYPE_ACTION,
1090                                                   actions,
1091                                                   "action not supported");
1092                 }
1093                 if (ret < 0)
1094                         return ret;
1095                 if (remain > ret)
1096                         remain -= ret;
1097                 else
1098                         remain = 0;
1099                 size += ret;
1100         }
1101         if (!flow->fate)
1102                 return rte_flow_error_set(error, ENOTSUP,
1103                                           RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1104                                           NULL,
1105                                           "no fate action found");
1106         return size;
1107 }
1108
1109 /**
1110  * Convert the @p attributes, @p pattern, @p action, into an flow for the NIC
1111  * after ensuring the NIC will understand and process it correctly.
1112  * The conversion is only performed item/action per item/action, each of
1113  * them is written into the @p flow if its size is lesser or equal to @p
1114  * flow_size.
1115  * Validation and memory consumption computation are still performed until the
1116  * end, unless an error is encountered.
1117  *
1118  * @param[in] dev
1119  *   Pointer to Ethernet device.
1120  * @param[in, out] flow
1121  *   Pointer to flow structure.
1122  * @param[in] flow_size
1123  *   Size in bytes of the available space in @p flow, if too small some
1124  *   garbage may be present.
1125  * @param[in] attributes
1126  *   Flow rule attributes.
1127  * @param[in] pattern
1128  *   Pattern specification (list terminated by the END pattern item).
1129  * @param[in] actions
1130  *   Associated actions (list terminated by the END action).
1131  * @param[out] error
1132  *   Perform verbose error reporting if not NULL.
1133  *
1134  * @return
1135  *   On success the number of bytes consumed/necessary, if the returned value
1136  *   is lesser or equal to @p flow_size, the flow has fully been converted and
1137  *   can be applied, otherwise another call with this returned memory size
1138  *   should be done.
1139  *   On error, a negative errno value is returned and rte_errno is set.
1140  */
1141 static int
1142 mlx5_flow_merge(struct rte_eth_dev *dev, struct rte_flow *flow,
1143                 const size_t flow_size,
1144                 const struct rte_flow_attr *attributes,
1145                 const struct rte_flow_item pattern[],
1146                 const struct rte_flow_action actions[],
1147                 struct rte_flow_error *error)
1148 {
1149         struct rte_flow local_flow = { .layers = 0, };
1150         size_t size = sizeof(*flow) + sizeof(struct ibv_flow_attr);
1151         int remain = (flow_size > size) ? flow_size - size : 0;
1152         int ret;
1153
1154         if (!remain)
1155                 flow = &local_flow;
1156         ret = mlx5_flow_attributes(dev, attributes, flow, error);
1157         if (ret < 0)
1158                 return ret;
1159         ret = mlx5_flow_items(pattern, flow, remain, error);
1160         if (ret < 0)
1161                 return ret;
1162         size += ret;
1163         remain = (flow_size > size) ? flow_size - size : 0;
1164         ret = mlx5_flow_actions(dev, actions, flow, remain, error);
1165         if (ret < 0)
1166                 return ret;
1167         size += ret;
1168         if (size <= flow_size)
1169                 flow->verbs.attr->priority = flow->attributes.priority;
1170         return size;
1171 }
1172
1173 /**
1174  * Validate a flow supported by the NIC.
1175  *
1176  * @see rte_flow_validate()
1177  * @see rte_flow_ops
1178  */
1179 int
1180 mlx5_flow_validate(struct rte_eth_dev *dev,
1181                    const struct rte_flow_attr *attr,
1182                    const struct rte_flow_item items[],
1183                    const struct rte_flow_action actions[],
1184                    struct rte_flow_error *error)
1185 {
1186         int ret = mlx5_flow_merge(dev, NULL, 0, attr, items, actions, error);
1187
1188         if (ret < 0)
1189                 return ret;
1190         return 0;
1191 }
1192
1193 /**
1194  * Remove the flow.
1195  *
1196  * @param[in] dev
1197  *   Pointer to Ethernet device.
1198  * @param[in, out] flow
1199  *   Pointer to flow structure.
1200  */
1201 static void
1202 mlx5_flow_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
1203 {
1204         if (flow->fate & MLX5_FLOW_FATE_DROP) {
1205                 if (flow->verbs.flow) {
1206                         claim_zero(mlx5_glue->destroy_flow(flow->verbs.flow));
1207                         flow->verbs.flow = NULL;
1208                 }
1209         }
1210         if (flow->verbs.hrxq) {
1211                 if (flow->fate & MLX5_FLOW_FATE_DROP)
1212                         mlx5_hrxq_drop_release(dev);
1213                 else if (flow->fate & MLX5_FLOW_FATE_QUEUE)
1214                         mlx5_hrxq_release(dev, flow->verbs.hrxq);
1215                 flow->verbs.hrxq = NULL;
1216         }
1217 }
1218
1219 /**
1220  * Apply the flow.
1221  *
1222  * @param[in] dev
1223  *   Pointer to Ethernet device structure.
1224  * @param[in, out] flow
1225  *   Pointer to flow structure.
1226  * @param[out] error
1227  *   Pointer to error structure.
1228  *
1229  * @return
1230  *   0 on success, a negative errno value otherwise and rte_errno is set.
1231  */
1232 static int
1233 mlx5_flow_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
1234                 struct rte_flow_error *error)
1235 {
1236         if (flow->fate & MLX5_FLOW_FATE_DROP) {
1237                 flow->verbs.hrxq = mlx5_hrxq_drop_new(dev);
1238                 if (!flow->verbs.hrxq)
1239                         return rte_flow_error_set
1240                                 (error, errno,
1241                                  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1242                                  NULL,
1243                                  "cannot allocate Drop queue");
1244         } else if (flow->fate & MLX5_FLOW_FATE_QUEUE) {
1245                 struct mlx5_hrxq *hrxq;
1246
1247                 hrxq = mlx5_hrxq_get(dev, rss_hash_default_key,
1248                                      rss_hash_default_key_len, 0,
1249                                      &flow->queue, 1, 0, 0);
1250                 if (!hrxq)
1251                         hrxq = mlx5_hrxq_new(dev, rss_hash_default_key,
1252                                              rss_hash_default_key_len, 0,
1253                                              &flow->queue, 1, 0, 0);
1254                 if (!hrxq)
1255                         return rte_flow_error_set(error, rte_errno,
1256                                         RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1257                                         NULL,
1258                                         "cannot create flow");
1259                 flow->verbs.hrxq = hrxq;
1260         }
1261         flow->verbs.flow =
1262                 mlx5_glue->create_flow(flow->verbs.hrxq->qp, flow->verbs.attr);
1263         if (!flow->verbs.flow) {
1264                 if (flow->fate & MLX5_FLOW_FATE_DROP)
1265                         mlx5_hrxq_drop_release(dev);
1266                 else
1267                         mlx5_hrxq_release(dev, flow->verbs.hrxq);
1268                 flow->verbs.hrxq = NULL;
1269                 return rte_flow_error_set(error, errno,
1270                                           RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1271                                           NULL,
1272                                           "kernel module refuses to create"
1273                                           " flow");
1274         }
1275         return 0;
1276 }
1277
1278 /**
1279  * Create a flow and add it to @p list.
1280  *
1281  * @param dev
1282  *   Pointer to Ethernet device.
1283  * @param list
1284  *   Pointer to a TAILQ flow list.
1285  * @param[in] attr
1286  *   Flow rule attributes.
1287  * @param[in] items
1288  *   Pattern specification (list terminated by the END pattern item).
1289  * @param[in] actions
1290  *   Associated actions (list terminated by the END action).
1291  * @param[out] error
1292  *   Perform verbose error reporting if not NULL.
1293  *
1294  * @return
1295  *   A flow on success, NULL otherwise and rte_errno is set.
1296  */
1297 static struct rte_flow *
1298 mlx5_flow_list_create(struct rte_eth_dev *dev,
1299                       struct mlx5_flows *list,
1300                       const struct rte_flow_attr *attr,
1301                       const struct rte_flow_item items[],
1302                       const struct rte_flow_action actions[],
1303                       struct rte_flow_error *error)
1304 {
1305         struct rte_flow *flow;
1306         size_t size;
1307         int ret;
1308
1309         ret = mlx5_flow_merge(dev, NULL, 0, attr, items, actions, error);
1310         if (ret < 0)
1311                 return NULL;
1312         size = ret;
1313         flow = rte_zmalloc(__func__, size, 0);
1314         if (!flow) {
1315                 rte_flow_error_set(error, ENOMEM,
1316                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1317                                    NULL,
1318                                    "cannot allocate memory");
1319                 return NULL;
1320         }
1321         flow->verbs.attr = (struct ibv_flow_attr *)(flow + 1);
1322         flow->verbs.specs = (uint8_t *)(flow->verbs.attr + 1);
1323         ret = mlx5_flow_merge(dev, flow, size, attr, items, actions, error);
1324         if (ret < 0)
1325                 goto error;
1326         assert((size_t)ret == size);
1327         if (dev->data->dev_started) {
1328                 ret = mlx5_flow_apply(dev, flow, error);
1329                 if (ret < 0)
1330                         goto error;
1331         }
1332         TAILQ_INSERT_TAIL(list, flow, next);
1333         return flow;
1334 error:
1335         ret = rte_errno; /* Save rte_errno before cleanup. */
1336         mlx5_flow_remove(dev, flow);
1337         rte_free(flow);
1338         rte_errno = ret; /* Restore rte_errno. */
1339         return NULL;
1340 }
1341
1342 /**
1343  * Create a flow.
1344  *
1345  * @see rte_flow_create()
1346  * @see rte_flow_ops
1347  */
1348 struct rte_flow *
1349 mlx5_flow_create(struct rte_eth_dev *dev,
1350                  const struct rte_flow_attr *attr,
1351                  const struct rte_flow_item items[],
1352                  const struct rte_flow_action actions[],
1353                  struct rte_flow_error *error)
1354 {
1355         return mlx5_flow_list_create
1356                 (dev, &((struct priv *)dev->data->dev_private)->flows,
1357                  attr, items, actions, error);
1358 }
1359
1360 /**
1361  * Destroy a flow in a list.
1362  *
1363  * @param dev
1364  *   Pointer to Ethernet device.
1365  * @param list
1366  *   Pointer to a TAILQ flow list.
1367  * @param[in] flow
1368  *   Flow to destroy.
1369  */
1370 static void
1371 mlx5_flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list,
1372                        struct rte_flow *flow)
1373 {
1374         mlx5_flow_remove(dev, flow);
1375         TAILQ_REMOVE(list, flow, next);
1376         rte_free(flow);
1377 }
1378
1379 /**
1380  * Destroy all flows.
1381  *
1382  * @param dev
1383  *   Pointer to Ethernet device.
1384  * @param list
1385  *   Pointer to a TAILQ flow list.
1386  */
1387 void
1388 mlx5_flow_list_flush(struct rte_eth_dev *dev, struct mlx5_flows *list)
1389 {
1390         while (!TAILQ_EMPTY(list)) {
1391                 struct rte_flow *flow;
1392
1393                 flow = TAILQ_FIRST(list);
1394                 mlx5_flow_list_destroy(dev, list, flow);
1395         }
1396 }
1397
1398 /**
1399  * Remove all flows.
1400  *
1401  * @param dev
1402  *   Pointer to Ethernet device.
1403  * @param list
1404  *   Pointer to a TAILQ flow list.
1405  */
1406 void
1407 mlx5_flow_stop(struct rte_eth_dev *dev, struct mlx5_flows *list)
1408 {
1409         struct rte_flow *flow;
1410
1411         TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next)
1412                 mlx5_flow_remove(dev, flow);
1413 }
1414
1415 /**
1416  * Add all flows.
1417  *
1418  * @param dev
1419  *   Pointer to Ethernet device.
1420  * @param list
1421  *   Pointer to a TAILQ flow list.
1422  *
1423  * @return
1424  *   0 on success, a negative errno value otherwise and rte_errno is set.
1425  */
1426 int
1427 mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list)
1428 {
1429         struct rte_flow *flow;
1430         struct rte_flow_error error;
1431         int ret = 0;
1432
1433         TAILQ_FOREACH(flow, list, next) {
1434                 ret = mlx5_flow_apply(dev, flow, &error);
1435                 if (ret < 0)
1436                         goto error;
1437         }
1438         return 0;
1439 error:
1440         ret = rte_errno; /* Save rte_errno before cleanup. */
1441         mlx5_flow_stop(dev, list);
1442         rte_errno = ret; /* Restore rte_errno. */
1443         return -rte_errno;
1444 }
1445
1446 /**
1447  * Verify the flow list is empty
1448  *
1449  * @param dev
1450  *  Pointer to Ethernet device.
1451  *
1452  * @return the number of flows not released.
1453  */
1454 int
1455 mlx5_flow_verify(struct rte_eth_dev *dev)
1456 {
1457         struct priv *priv = dev->data->dev_private;
1458         struct rte_flow *flow;
1459         int ret = 0;
1460
1461         TAILQ_FOREACH(flow, &priv->flows, next) {
1462                 DRV_LOG(DEBUG, "port %u flow %p still referenced",
1463                         dev->data->port_id, (void *)flow);
1464                 ++ret;
1465         }
1466         return ret;
1467 }
1468
1469 /**
1470  * Enable a control flow configured from the control plane.
1471  *
1472  * @param dev
1473  *   Pointer to Ethernet device.
1474  * @param eth_spec
1475  *   An Ethernet flow spec to apply.
1476  * @param eth_mask
1477  *   An Ethernet flow mask to apply.
1478  * @param vlan_spec
1479  *   A VLAN flow spec to apply.
1480  * @param vlan_mask
1481  *   A VLAN flow mask to apply.
1482  *
1483  * @return
1484  *   0 on success, a negative errno value otherwise and rte_errno is set.
1485  */
1486 int
1487 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
1488                     struct rte_flow_item_eth *eth_spec,
1489                     struct rte_flow_item_eth *eth_mask,
1490                     struct rte_flow_item_vlan *vlan_spec,
1491                     struct rte_flow_item_vlan *vlan_mask)
1492 {
1493         struct priv *priv = dev->data->dev_private;
1494         const struct rte_flow_attr attr = {
1495                 .ingress = 1,
1496                 .priority = priv->config.flow_prio - 1,
1497         };
1498         struct rte_flow_item items[] = {
1499                 {
1500                         .type = RTE_FLOW_ITEM_TYPE_ETH,
1501                         .spec = eth_spec,
1502                         .last = NULL,
1503                         .mask = eth_mask,
1504                 },
1505                 {
1506                         .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
1507                                 RTE_FLOW_ITEM_TYPE_END,
1508                         .spec = vlan_spec,
1509                         .last = NULL,
1510                         .mask = vlan_mask,
1511                 },
1512                 {
1513                         .type = RTE_FLOW_ITEM_TYPE_END,
1514                 },
1515         };
1516         uint16_t queue[priv->reta_idx_n];
1517         struct rte_flow_action_rss action_rss = {
1518                 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
1519                 .level = 0,
1520                 .types = priv->rss_conf.rss_hf,
1521                 .key_len = priv->rss_conf.rss_key_len,
1522                 .queue_num = priv->reta_idx_n,
1523                 .key = priv->rss_conf.rss_key,
1524                 .queue = queue,
1525         };
1526         struct rte_flow_action actions[] = {
1527                 {
1528                         .type = RTE_FLOW_ACTION_TYPE_RSS,
1529                         .conf = &action_rss,
1530                 },
1531                 {
1532                         .type = RTE_FLOW_ACTION_TYPE_END,
1533                 },
1534         };
1535         struct rte_flow *flow;
1536         struct rte_flow_error error;
1537         unsigned int i;
1538
1539         if (!priv->reta_idx_n) {
1540                 rte_errno = EINVAL;
1541                 return -rte_errno;
1542         }
1543         for (i = 0; i != priv->reta_idx_n; ++i)
1544                 queue[i] = (*priv->reta_idx)[i];
1545         flow = mlx5_flow_list_create(dev, &priv->ctrl_flows, &attr, items,
1546                                      actions, &error);
1547         if (!flow)
1548                 return -rte_errno;
1549         return 0;
1550 }
1551
1552 /**
1553  * Enable a flow control configured from the control plane.
1554  *
1555  * @param dev
1556  *   Pointer to Ethernet device.
1557  * @param eth_spec
1558  *   An Ethernet flow spec to apply.
1559  * @param eth_mask
1560  *   An Ethernet flow mask to apply.
1561  *
1562  * @return
1563  *   0 on success, a negative errno value otherwise and rte_errno is set.
1564  */
1565 int
1566 mlx5_ctrl_flow(struct rte_eth_dev *dev,
1567                struct rte_flow_item_eth *eth_spec,
1568                struct rte_flow_item_eth *eth_mask)
1569 {
1570         return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
1571 }
1572
1573 /**
1574  * Destroy a flow.
1575  *
1576  * @see rte_flow_destroy()
1577  * @see rte_flow_ops
1578  */
1579 int
1580 mlx5_flow_destroy(struct rte_eth_dev *dev,
1581                   struct rte_flow *flow,
1582                   struct rte_flow_error *error __rte_unused)
1583 {
1584         struct priv *priv = dev->data->dev_private;
1585
1586         mlx5_flow_list_destroy(dev, &priv->flows, flow);
1587         return 0;
1588 }
1589
1590 /**
1591  * Destroy all flows.
1592  *
1593  * @see rte_flow_flush()
1594  * @see rte_flow_ops
1595  */
1596 int
1597 mlx5_flow_flush(struct rte_eth_dev *dev,
1598                 struct rte_flow_error *error __rte_unused)
1599 {
1600         struct priv *priv = dev->data->dev_private;
1601
1602         mlx5_flow_list_flush(dev, &priv->flows);
1603         return 0;
1604 }
1605
1606 /**
1607  * Isolated mode.
1608  *
1609  * @see rte_flow_isolate()
1610  * @see rte_flow_ops
1611  */
1612 int
1613 mlx5_flow_isolate(struct rte_eth_dev *dev,
1614                   int enable,
1615                   struct rte_flow_error *error)
1616 {
1617         struct priv *priv = dev->data->dev_private;
1618
1619         if (dev->data->dev_started) {
1620                 rte_flow_error_set(error, EBUSY,
1621                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1622                                    NULL,
1623                                    "port must be stopped first");
1624                 return -rte_errno;
1625         }
1626         priv->isolated = !!enable;
1627         if (enable)
1628                 dev->dev_ops = &mlx5_dev_ops_isolate;
1629         else
1630                 dev->dev_ops = &mlx5_dev_ops;
1631         return 0;
1632 }
1633
1634 /**
1635  * Convert a flow director filter to a generic flow.
1636  *
1637  * @param dev
1638  *   Pointer to Ethernet device.
1639  * @param fdir_filter
1640  *   Flow director filter to add.
1641  * @param attributes
1642  *   Generic flow parameters structure.
1643  *
1644  * @return
1645  *   0 on success, a negative errno value otherwise and rte_errno is set.
1646  */
1647 static int
1648 mlx5_fdir_filter_convert(struct rte_eth_dev *dev,
1649                          const struct rte_eth_fdir_filter *fdir_filter,
1650                          struct mlx5_fdir *attributes)
1651 {
1652         struct priv *priv = dev->data->dev_private;
1653         const struct rte_eth_fdir_input *input = &fdir_filter->input;
1654         const struct rte_eth_fdir_masks *mask =
1655                 &dev->data->dev_conf.fdir_conf.mask;
1656
1657         /* Validate queue number. */
1658         if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
1659                 DRV_LOG(ERR, "port %u invalid queue number %d",
1660                         dev->data->port_id, fdir_filter->action.rx_queue);
1661                 rte_errno = EINVAL;
1662                 return -rte_errno;
1663         }
1664         attributes->attr.ingress = 1;
1665         attributes->items[0] = (struct rte_flow_item) {
1666                 .type = RTE_FLOW_ITEM_TYPE_ETH,
1667                 .spec = &attributes->l2,
1668                 .mask = &attributes->l2_mask,
1669         };
1670         switch (fdir_filter->action.behavior) {
1671         case RTE_ETH_FDIR_ACCEPT:
1672                 attributes->actions[0] = (struct rte_flow_action){
1673                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
1674                         .conf = &attributes->queue,
1675                 };
1676                 break;
1677         case RTE_ETH_FDIR_REJECT:
1678                 attributes->actions[0] = (struct rte_flow_action){
1679                         .type = RTE_FLOW_ACTION_TYPE_DROP,
1680                 };
1681                 break;
1682         default:
1683                 DRV_LOG(ERR, "port %u invalid behavior %d",
1684                         dev->data->port_id,
1685                         fdir_filter->action.behavior);
1686                 rte_errno = ENOTSUP;
1687                 return -rte_errno;
1688         }
1689         attributes->queue.index = fdir_filter->action.rx_queue;
1690         /* Handle L3. */
1691         switch (fdir_filter->input.flow_type) {
1692         case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
1693         case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
1694         case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
1695                 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
1696                         .src_addr = input->flow.ip4_flow.src_ip,
1697                         .dst_addr = input->flow.ip4_flow.dst_ip,
1698                         .time_to_live = input->flow.ip4_flow.ttl,
1699                         .type_of_service = input->flow.ip4_flow.tos,
1700                         .next_proto_id = input->flow.ip4_flow.proto,
1701                 };
1702                 attributes->l3_mask.ipv4.hdr = (struct ipv4_hdr){
1703                         .src_addr = mask->ipv4_mask.src_ip,
1704                         .dst_addr = mask->ipv4_mask.dst_ip,
1705                         .time_to_live = mask->ipv4_mask.ttl,
1706                         .type_of_service = mask->ipv4_mask.tos,
1707                         .next_proto_id = mask->ipv4_mask.proto,
1708                 };
1709                 attributes->items[1] = (struct rte_flow_item){
1710                         .type = RTE_FLOW_ITEM_TYPE_IPV4,
1711                         .spec = &attributes->l3,
1712                         .mask = &attributes->l3_mask,
1713                 };
1714                 break;
1715         case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
1716         case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
1717         case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
1718                 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
1719                         .hop_limits = input->flow.ipv6_flow.hop_limits,
1720                         .proto = input->flow.ipv6_flow.proto,
1721                 };
1722
1723                 memcpy(attributes->l3.ipv6.hdr.src_addr,
1724                        input->flow.ipv6_flow.src_ip,
1725                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
1726                 memcpy(attributes->l3.ipv6.hdr.dst_addr,
1727                        input->flow.ipv6_flow.dst_ip,
1728                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
1729                 memcpy(attributes->l3_mask.ipv6.hdr.src_addr,
1730                        mask->ipv6_mask.src_ip,
1731                        RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
1732                 memcpy(attributes->l3_mask.ipv6.hdr.dst_addr,
1733                        mask->ipv6_mask.dst_ip,
1734                        RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
1735                 attributes->items[1] = (struct rte_flow_item){
1736                         .type = RTE_FLOW_ITEM_TYPE_IPV6,
1737                         .spec = &attributes->l3,
1738                         .mask = &attributes->l3_mask,
1739                 };
1740                 break;
1741         default:
1742                 DRV_LOG(ERR, "port %u invalid flow type%d",
1743                         dev->data->port_id, fdir_filter->input.flow_type);
1744                 rte_errno = ENOTSUP;
1745                 return -rte_errno;
1746         }
1747         /* Handle L4. */
1748         switch (fdir_filter->input.flow_type) {
1749         case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
1750                 attributes->l4.udp.hdr = (struct udp_hdr){
1751                         .src_port = input->flow.udp4_flow.src_port,
1752                         .dst_port = input->flow.udp4_flow.dst_port,
1753                 };
1754                 attributes->l4_mask.udp.hdr = (struct udp_hdr){
1755                         .src_port = mask->src_port_mask,
1756                         .dst_port = mask->dst_port_mask,
1757                 };
1758                 attributes->items[2] = (struct rte_flow_item){
1759                         .type = RTE_FLOW_ITEM_TYPE_UDP,
1760                         .spec = &attributes->l4,
1761                         .mask = &attributes->l4_mask,
1762                 };
1763                 break;
1764         case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
1765                 attributes->l4.tcp.hdr = (struct tcp_hdr){
1766                         .src_port = input->flow.tcp4_flow.src_port,
1767                         .dst_port = input->flow.tcp4_flow.dst_port,
1768                 };
1769                 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
1770                         .src_port = mask->src_port_mask,
1771                         .dst_port = mask->dst_port_mask,
1772                 };
1773                 attributes->items[2] = (struct rte_flow_item){
1774                         .type = RTE_FLOW_ITEM_TYPE_TCP,
1775                         .spec = &attributes->l4,
1776                         .mask = &attributes->l4_mask,
1777                 };
1778                 break;
1779         case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
1780                 attributes->l4.udp.hdr = (struct udp_hdr){
1781                         .src_port = input->flow.udp6_flow.src_port,
1782                         .dst_port = input->flow.udp6_flow.dst_port,
1783                 };
1784                 attributes->l4_mask.udp.hdr = (struct udp_hdr){
1785                         .src_port = mask->src_port_mask,
1786                         .dst_port = mask->dst_port_mask,
1787                 };
1788                 attributes->items[2] = (struct rte_flow_item){
1789                         .type = RTE_FLOW_ITEM_TYPE_UDP,
1790                         .spec = &attributes->l4,
1791                         .mask = &attributes->l4_mask,
1792                 };
1793                 break;
1794         case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
1795                 attributes->l4.tcp.hdr = (struct tcp_hdr){
1796                         .src_port = input->flow.tcp6_flow.src_port,
1797                         .dst_port = input->flow.tcp6_flow.dst_port,
1798                 };
1799                 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
1800                         .src_port = mask->src_port_mask,
1801                         .dst_port = mask->dst_port_mask,
1802                 };
1803                 attributes->items[2] = (struct rte_flow_item){
1804                         .type = RTE_FLOW_ITEM_TYPE_TCP,
1805                         .spec = &attributes->l4,
1806                         .mask = &attributes->l4_mask,
1807                 };
1808                 break;
1809         case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
1810         case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
1811                 break;
1812         default:
1813                 DRV_LOG(ERR, "port %u invalid flow type%d",
1814                         dev->data->port_id, fdir_filter->input.flow_type);
1815                 rte_errno = ENOTSUP;
1816                 return -rte_errno;
1817         }
1818         return 0;
1819 }
1820
1821 /**
1822  * Add new flow director filter and store it in list.
1823  *
1824  * @param dev
1825  *   Pointer to Ethernet device.
1826  * @param fdir_filter
1827  *   Flow director filter to add.
1828  *
1829  * @return
1830  *   0 on success, a negative errno value otherwise and rte_errno is set.
1831  */
1832 static int
1833 mlx5_fdir_filter_add(struct rte_eth_dev *dev,
1834                      const struct rte_eth_fdir_filter *fdir_filter)
1835 {
1836         struct priv *priv = dev->data->dev_private;
1837         struct mlx5_fdir attributes = {
1838                 .attr.group = 0,
1839                 .l2_mask = {
1840                         .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1841                         .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1842                         .type = 0,
1843                 },
1844         };
1845         struct rte_flow_error error;
1846         struct rte_flow *flow;
1847         int ret;
1848
1849         ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
1850         if (ret)
1851                 return ret;
1852         flow = mlx5_flow_list_create(dev, &priv->flows, &attributes.attr,
1853                                      attributes.items, attributes.actions,
1854                                      &error);
1855         if (flow) {
1856                 DRV_LOG(DEBUG, "port %u FDIR created %p", dev->data->port_id,
1857                         (void *)flow);
1858                 return 0;
1859         }
1860         return -rte_errno;
1861 }
1862
1863 /**
1864  * Delete specific filter.
1865  *
1866  * @param dev
1867  *   Pointer to Ethernet device.
1868  * @param fdir_filter
1869  *   Filter to be deleted.
1870  *
1871  * @return
1872  *   0 on success, a negative errno value otherwise and rte_errno is set.
1873  */
1874 static int
1875 mlx5_fdir_filter_delete(struct rte_eth_dev *dev __rte_unused,
1876                         const struct rte_eth_fdir_filter *fdir_filter
1877                         __rte_unused)
1878 {
1879         rte_errno = ENOTSUP;
1880         return -rte_errno;
1881 }
1882
1883 /**
1884  * Update queue for specific filter.
1885  *
1886  * @param dev
1887  *   Pointer to Ethernet device.
1888  * @param fdir_filter
1889  *   Filter to be updated.
1890  *
1891  * @return
1892  *   0 on success, a negative errno value otherwise and rte_errno is set.
1893  */
1894 static int
1895 mlx5_fdir_filter_update(struct rte_eth_dev *dev,
1896                         const struct rte_eth_fdir_filter *fdir_filter)
1897 {
1898         int ret;
1899
1900         ret = mlx5_fdir_filter_delete(dev, fdir_filter);
1901         if (ret)
1902                 return ret;
1903         return mlx5_fdir_filter_add(dev, fdir_filter);
1904 }
1905
1906 /**
1907  * Flush all filters.
1908  *
1909  * @param dev
1910  *   Pointer to Ethernet device.
1911  */
1912 static void
1913 mlx5_fdir_filter_flush(struct rte_eth_dev *dev)
1914 {
1915         struct priv *priv = dev->data->dev_private;
1916
1917         mlx5_flow_list_flush(dev, &priv->flows);
1918 }
1919
1920 /**
1921  * Get flow director information.
1922  *
1923  * @param dev
1924  *   Pointer to Ethernet device.
1925  * @param[out] fdir_info
1926  *   Resulting flow director information.
1927  */
1928 static void
1929 mlx5_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info)
1930 {
1931         struct rte_eth_fdir_masks *mask =
1932                 &dev->data->dev_conf.fdir_conf.mask;
1933
1934         fdir_info->mode = dev->data->dev_conf.fdir_conf.mode;
1935         fdir_info->guarant_spc = 0;
1936         rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
1937         fdir_info->max_flexpayload = 0;
1938         fdir_info->flow_types_mask[0] = 0;
1939         fdir_info->flex_payload_unit = 0;
1940         fdir_info->max_flex_payload_segment_num = 0;
1941         fdir_info->flex_payload_limit = 0;
1942         memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
1943 }
1944
1945 /**
1946  * Deal with flow director operations.
1947  *
1948  * @param dev
1949  *   Pointer to Ethernet device.
1950  * @param filter_op
1951  *   Operation to perform.
1952  * @param arg
1953  *   Pointer to operation-specific structure.
1954  *
1955  * @return
1956  *   0 on success, a negative errno value otherwise and rte_errno is set.
1957  */
1958 static int
1959 mlx5_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op,
1960                     void *arg)
1961 {
1962         enum rte_fdir_mode fdir_mode =
1963                 dev->data->dev_conf.fdir_conf.mode;
1964
1965         if (filter_op == RTE_ETH_FILTER_NOP)
1966                 return 0;
1967         if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
1968             fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
1969                 DRV_LOG(ERR, "port %u flow director mode %d not supported",
1970                         dev->data->port_id, fdir_mode);
1971                 rte_errno = EINVAL;
1972                 return -rte_errno;
1973         }
1974         switch (filter_op) {
1975         case RTE_ETH_FILTER_ADD:
1976                 return mlx5_fdir_filter_add(dev, arg);
1977         case RTE_ETH_FILTER_UPDATE:
1978                 return mlx5_fdir_filter_update(dev, arg);
1979         case RTE_ETH_FILTER_DELETE:
1980                 return mlx5_fdir_filter_delete(dev, arg);
1981         case RTE_ETH_FILTER_FLUSH:
1982                 mlx5_fdir_filter_flush(dev);
1983                 break;
1984         case RTE_ETH_FILTER_INFO:
1985                 mlx5_fdir_info_get(dev, arg);
1986                 break;
1987         default:
1988                 DRV_LOG(DEBUG, "port %u unknown operation %u",
1989                         dev->data->port_id, filter_op);
1990                 rte_errno = EINVAL;
1991                 return -rte_errno;
1992         }
1993         return 0;
1994 }
1995
1996 /**
1997  * Manage filter operations.
1998  *
1999  * @param dev
2000  *   Pointer to Ethernet device structure.
2001  * @param filter_type
2002  *   Filter type.
2003  * @param filter_op
2004  *   Operation to perform.
2005  * @param arg
2006  *   Pointer to operation-specific structure.
2007  *
2008  * @return
2009  *   0 on success, a negative errno value otherwise and rte_errno is set.
2010  */
2011 int
2012 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
2013                      enum rte_filter_type filter_type,
2014                      enum rte_filter_op filter_op,
2015                      void *arg)
2016 {
2017         switch (filter_type) {
2018         case RTE_ETH_FILTER_GENERIC:
2019                 if (filter_op != RTE_ETH_FILTER_GET) {
2020                         rte_errno = EINVAL;
2021                         return -rte_errno;
2022                 }
2023                 *(const void **)arg = &mlx5_flow_ops;
2024                 return 0;
2025         case RTE_ETH_FILTER_FDIR:
2026                 return mlx5_fdir_ctrl_func(dev, filter_op, arg);
2027         default:
2028                 DRV_LOG(ERR, "port %u filter type (%d) not supported",
2029                         dev->data->port_id, filter_type);
2030                 rte_errno = ENOTSUP;
2031                 return -rte_errno;
2032         }
2033         return 0;
2034 }