net/mlx5: add flow IPv4 item
[dpdk.git] / drivers / net / mlx5 / mlx5_flow.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright 2016 6WIND S.A.
3  * Copyright 2016 Mellanox Technologies, Ltd
4  */
5
6 #include <sys/queue.h>
7 #include <stdint.h>
8 #include <string.h>
9
10 /* Verbs header. */
11 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
12 #ifdef PEDANTIC
13 #pragma GCC diagnostic ignored "-Wpedantic"
14 #endif
15 #include <infiniband/verbs.h>
16 #ifdef PEDANTIC
17 #pragma GCC diagnostic error "-Wpedantic"
18 #endif
19
20 #include <rte_common.h>
21 #include <rte_ether.h>
22 #include <rte_eth_ctrl.h>
23 #include <rte_ethdev_driver.h>
24 #include <rte_flow.h>
25 #include <rte_flow_driver.h>
26 #include <rte_malloc.h>
27 #include <rte_ip.h>
28
29 #include "mlx5.h"
30 #include "mlx5_defs.h"
31 #include "mlx5_prm.h"
32 #include "mlx5_glue.h"
33
34 /* Dev ops structure defined in mlx5.c */
35 extern const struct eth_dev_ops mlx5_dev_ops;
36 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
37
38 /* Pattern Layer bits. */
39 #define MLX5_FLOW_LAYER_OUTER_L2 (1u << 0)
40 #define MLX5_FLOW_LAYER_OUTER_L3_IPV4 (1u << 1)
41 #define MLX5_FLOW_LAYER_OUTER_L3_IPV6 (1u << 2)
42 #define MLX5_FLOW_LAYER_OUTER_L4_UDP (1u << 3)
43 #define MLX5_FLOW_LAYER_OUTER_L4_TCP (1u << 4)
44 #define MLX5_FLOW_LAYER_OUTER_VLAN (1u << 5)
45 /* Masks. */
46 #define MLX5_FLOW_LAYER_OUTER_L3 \
47         (MLX5_FLOW_LAYER_OUTER_L3_IPV4 | MLX5_FLOW_LAYER_OUTER_L3_IPV6)
48 #define MLX5_FLOW_LAYER_OUTER_L4 \
49         (MLX5_FLOW_LAYER_OUTER_L4_UDP | MLX5_FLOW_LAYER_OUTER_L4_TCP)
50
51 /* Actions that modify the fate of matching traffic. */
52 #define MLX5_FLOW_FATE_DROP (1u << 0)
53 #define MLX5_FLOW_FATE_QUEUE (1u << 1)
54
55 /** Handles information leading to a drop fate. */
56 struct mlx5_flow_verbs {
57         unsigned int size; /**< Size of the attribute. */
58         struct {
59                 struct ibv_flow_attr *attr;
60                 /**< Pointer to the Specification buffer. */
61                 uint8_t *specs; /**< Pointer to the specifications. */
62         };
63         struct ibv_flow *flow; /**< Verbs flow pointer. */
64         struct mlx5_hrxq *hrxq; /**< Hash Rx queue object. */
65 };
66
67 /* Flow structure. */
68 struct rte_flow {
69         TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
70         struct rte_flow_attr attributes; /**< User flow attribute. */
71         uint32_t layers;
72         /**< Bit-fields of present layers see MLX5_FLOW_LAYER_*. */
73         uint32_t fate;
74         /**< Bit-fields of present fate see MLX5_FLOW_FATE_*. */
75         struct mlx5_flow_verbs verbs; /* Verbs flow. */
76         uint16_t queue; /**< Destination queue to redirect traffic to. */
77 };
78
79 static const struct rte_flow_ops mlx5_flow_ops = {
80         .validate = mlx5_flow_validate,
81         .create = mlx5_flow_create,
82         .destroy = mlx5_flow_destroy,
83         .flush = mlx5_flow_flush,
84         .isolate = mlx5_flow_isolate,
85 };
86
87 /* Convert FDIR request to Generic flow. */
88 struct mlx5_fdir {
89         struct rte_flow_attr attr;
90         struct rte_flow_action actions[2];
91         struct rte_flow_item items[4];
92         struct rte_flow_item_eth l2;
93         struct rte_flow_item_eth l2_mask;
94         union {
95                 struct rte_flow_item_ipv4 ipv4;
96                 struct rte_flow_item_ipv6 ipv6;
97         } l3;
98         union {
99                 struct rte_flow_item_ipv4 ipv4;
100                 struct rte_flow_item_ipv6 ipv6;
101         } l3_mask;
102         union {
103                 struct rte_flow_item_udp udp;
104                 struct rte_flow_item_tcp tcp;
105         } l4;
106         union {
107                 struct rte_flow_item_udp udp;
108                 struct rte_flow_item_tcp tcp;
109         } l4_mask;
110         struct rte_flow_action_queue queue;
111 };
112
113 /* Verbs specification header. */
114 struct ibv_spec_header {
115         enum ibv_flow_spec_type type;
116         uint16_t size;
117 };
118
119  /**
120   * Discover the maximum number of priority available.
121   *
122   * @param[in] dev
123   *   Pointer to Ethernet device.
124   *
125   * @return
126   *   number of supported flow priority on success, a negative errno value
127   *   otherwise and rte_errno is set.
128   */
129 int
130 mlx5_flow_discover_priorities(struct rte_eth_dev *dev)
131 {
132         struct {
133                 struct ibv_flow_attr attr;
134                 struct ibv_flow_spec_eth eth;
135                 struct ibv_flow_spec_action_drop drop;
136         } flow_attr = {
137                 .attr = {
138                         .num_of_specs = 2,
139                 },
140                 .eth = {
141                         .type = IBV_FLOW_SPEC_ETH,
142                         .size = sizeof(struct ibv_flow_spec_eth),
143                 },
144                 .drop = {
145                         .size = sizeof(struct ibv_flow_spec_action_drop),
146                         .type = IBV_FLOW_SPEC_ACTION_DROP,
147                 },
148         };
149         struct ibv_flow *flow;
150         struct mlx5_hrxq *drop = mlx5_hrxq_drop_new(dev);
151         uint16_t vprio[] = { 8, 16 };
152         int i;
153
154         if (!drop) {
155                 rte_errno = ENOTSUP;
156                 return -rte_errno;
157         }
158         for (i = 0; i != RTE_DIM(vprio); i++) {
159                 flow_attr.attr.priority = vprio[i] - 1;
160                 flow = mlx5_glue->create_flow(drop->qp, &flow_attr.attr);
161                 if (!flow)
162                         break;
163                 claim_zero(mlx5_glue->destroy_flow(flow));
164         }
165         mlx5_hrxq_drop_release(dev);
166         DRV_LOG(INFO, "port %u flow maximum priority: %d",
167                 dev->data->port_id, vprio[i - 1]);
168         return vprio[i - 1];
169 }
170
171 /**
172  * Verify the @p attributes will be correctly understood by the NIC and store
173  * them in the @p flow if everything is correct.
174  *
175  * @param[in] dev
176  *   Pointer to Ethernet device.
177  * @param[in] attributes
178  *   Pointer to flow attributes
179  * @param[in, out] flow
180  *   Pointer to the rte_flow structure.
181  * @param[out] error
182  *   Pointer to error structure.
183  *
184  * @return
185  *   0 on success, a negative errno value otherwise and rte_errno is set.
186  */
187 static int
188 mlx5_flow_attributes(struct rte_eth_dev *dev,
189                      const struct rte_flow_attr *attributes,
190                      struct rte_flow *flow,
191                      struct rte_flow_error *error)
192 {
193         uint32_t priority_max =
194                 ((struct priv *)dev->data->dev_private)->config.flow_prio;
195
196         if (attributes->group)
197                 return rte_flow_error_set(error, ENOTSUP,
198                                           RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
199                                           NULL,
200                                           "groups is not supported");
201         if (attributes->priority >= priority_max)
202                 return rte_flow_error_set(error, ENOTSUP,
203                                           RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
204                                           NULL,
205                                           "priority out of range");
206         if (attributes->egress)
207                 return rte_flow_error_set(error, ENOTSUP,
208                                           RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
209                                           NULL,
210                                           "egress is not supported");
211         if (attributes->transfer)
212                 return rte_flow_error_set(error, ENOTSUP,
213                                           RTE_FLOW_ERROR_TYPE_ATTR_TRANSFER,
214                                           NULL,
215                                           "transfer is not supported");
216         if (!attributes->ingress)
217                 return rte_flow_error_set(error, ENOTSUP,
218                                           RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
219                                           NULL,
220                                           "ingress attribute is mandatory");
221         flow->attributes = *attributes;
222         return 0;
223 }
224
225 /**
226  * Verify the @p item specifications (spec, last, mask) are compatible with the
227  * NIC capabilities.
228  *
229  * @param[in] item
230  *   Item specification.
231  * @param[in] mask
232  *   @p item->mask or flow default bit-masks.
233  * @param[in] nic_mask
234  *   Bit-masks covering supported fields by the NIC to compare with user mask.
235  * @param[in] size
236  *   Bit-masks size in bytes.
237  * @param[out] error
238  *   Pointer to error structure.
239  *
240  * @return
241  *   0 on success, a negative errno value otherwise and rte_errno is set.
242  */
243 static int
244 mlx5_flow_item_acceptable(const struct rte_flow_item *item,
245                           const uint8_t *mask,
246                           const uint8_t *nic_mask,
247                           unsigned int size,
248                           struct rte_flow_error *error)
249 {
250         unsigned int i;
251
252         assert(nic_mask);
253         for (i = 0; i < size; ++i)
254                 if ((nic_mask[i] | mask[i]) != nic_mask[i])
255                         return rte_flow_error_set(error, ENOTSUP,
256                                                   RTE_FLOW_ERROR_TYPE_ITEM,
257                                                   item,
258                                                   "mask enables non supported"
259                                                   " bits");
260         if (!item->spec && (item->mask || item->last))
261                 return rte_flow_error_set(error, EINVAL,
262                                           RTE_FLOW_ERROR_TYPE_ITEM,
263                                           item,
264                                           "mask/last without a spec is not"
265                                           " supported");
266         if (item->spec && item->last) {
267                 uint8_t spec[size];
268                 uint8_t last[size];
269                 unsigned int i;
270                 int ret;
271
272                 for (i = 0; i < size; ++i) {
273                         spec[i] = ((const uint8_t *)item->spec)[i] & mask[i];
274                         last[i] = ((const uint8_t *)item->last)[i] & mask[i];
275                 }
276                 ret = memcmp(spec, last, size);
277                 if (ret != 0)
278                         return rte_flow_error_set(error, ENOTSUP,
279                                                   RTE_FLOW_ERROR_TYPE_ITEM,
280                                                   item,
281                                                   "range is not supported");
282         }
283         return 0;
284 }
285
286 /**
287  * Add a verbs specification into @p flow.
288  *
289  * @param[in, out] flow
290  *   Pointer to flow structure.
291  * @param[in] src
292  *   Create specification.
293  * @param[in] size
294  *   Size in bytes of the specification to copy.
295  */
296 static void
297 mlx5_flow_spec_verbs_add(struct rte_flow *flow, void *src, unsigned int size)
298 {
299         if (flow->verbs.specs) {
300                 void *dst;
301
302                 dst = (void *)(flow->verbs.specs + flow->verbs.size);
303                 memcpy(dst, src, size);
304                 ++flow->verbs.attr->num_of_specs;
305         }
306         flow->verbs.size += size;
307 }
308
309 /**
310  * Convert the @p item into a Verbs specification after ensuring the NIC
311  * will understand and process it correctly.
312  * If the necessary size for the conversion is greater than the @p flow_size,
313  * nothing is written in @p flow, the validation is still performed.
314  *
315  * @param[in] item
316  *   Item specification.
317  * @param[in, out] flow
318  *   Pointer to flow structure.
319  * @param[in] flow_size
320  *   Size in bytes of the available space in @p flow, if too small, nothing is
321  *   written.
322  * @param[out] error
323  *   Pointer to error structure.
324  *
325  * @return
326  *   On success the number of bytes consumed/necessary, if the returned value
327  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
328  *   otherwise another call with this returned memory size should be done.
329  *   On error, a negative errno value is returned and rte_errno is set.
330  */
331 static int
332 mlx5_flow_item_eth(const struct rte_flow_item *item, struct rte_flow *flow,
333                    const size_t flow_size, struct rte_flow_error *error)
334 {
335         const struct rte_flow_item_eth *spec = item->spec;
336         const struct rte_flow_item_eth *mask = item->mask;
337         const struct rte_flow_item_eth nic_mask = {
338                 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
339                 .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
340                 .type = RTE_BE16(0xffff),
341         };
342         const unsigned int size = sizeof(struct ibv_flow_spec_eth);
343         struct ibv_flow_spec_eth eth = {
344                 .type = IBV_FLOW_SPEC_ETH,
345                 .size = size,
346         };
347         int ret;
348
349         if (flow->layers & MLX5_FLOW_LAYER_OUTER_L2)
350                 return rte_flow_error_set(error, ENOTSUP,
351                                           RTE_FLOW_ERROR_TYPE_ITEM,
352                                           item,
353                                           "L2 layers already configured");
354         if (!mask)
355                 mask = &rte_flow_item_eth_mask;
356         ret = mlx5_flow_item_acceptable(item, (const uint8_t *)mask,
357                                         (const uint8_t *)&nic_mask,
358                                         sizeof(struct rte_flow_item_eth),
359                                         error);
360         if (ret)
361                 return ret;
362         flow->layers |= MLX5_FLOW_LAYER_OUTER_L2;
363         if (size > flow_size)
364                 return size;
365         if (spec) {
366                 unsigned int i;
367
368                 memcpy(&eth.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
369                 memcpy(&eth.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
370                 eth.val.ether_type = spec->type;
371                 memcpy(&eth.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
372                 memcpy(&eth.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
373                 eth.mask.ether_type = mask->type;
374                 /* Remove unwanted bits from values. */
375                 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
376                         eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
377                         eth.val.src_mac[i] &= eth.mask.src_mac[i];
378                 }
379                 eth.val.ether_type &= eth.mask.ether_type;
380         }
381         mlx5_flow_spec_verbs_add(flow, &eth, size);
382         return size;
383 }
384
385 /**
386  * Update the VLAN tag in the Verbs Ethernet specification.
387  *
388  * @param[in, out] attr
389  *   Pointer to Verbs attributes structure.
390  * @param[in] eth
391  *   Verbs structure containing the VLAN information to copy.
392  */
393 static void
394 mlx5_flow_item_vlan_update(struct ibv_flow_attr *attr,
395                            struct ibv_flow_spec_eth *eth)
396 {
397         unsigned int i;
398         enum ibv_flow_spec_type search = IBV_FLOW_SPEC_ETH;
399         struct ibv_spec_header *hdr = (struct ibv_spec_header *)
400                 ((uint8_t *)attr + sizeof(struct ibv_flow_attr));
401
402         for (i = 0; i != attr->num_of_specs; ++i) {
403                 if (hdr->type == search) {
404                         struct ibv_flow_spec_eth *e =
405                                 (struct ibv_flow_spec_eth *)hdr;
406
407                         e->val.vlan_tag = eth->val.vlan_tag;
408                         e->mask.vlan_tag = eth->mask.vlan_tag;
409                         e->val.ether_type = eth->val.ether_type;
410                         e->mask.ether_type = eth->mask.ether_type;
411                         break;
412                 }
413                 hdr = (struct ibv_spec_header *)((uint8_t *)hdr + hdr->size);
414         }
415 }
416
417 /**
418  * Convert the @p item into @p flow (or by updating the already present
419  * Ethernet Verbs) specification after ensuring the NIC will understand and
420  * process it correctly.
421  * If the necessary size for the conversion is greater than the @p flow_size,
422  * nothing is written in @p flow, the validation is still performed.
423  *
424  * @param[in] item
425  *   Item specification.
426  * @param[in, out] flow
427  *   Pointer to flow structure.
428  * @param[in] flow_size
429  *   Size in bytes of the available space in @p flow, if too small, nothing is
430  *   written.
431  * @param[out] error
432  *   Pointer to error structure.
433  *
434  * @return
435  *   On success the number of bytes consumed/necessary, if the returned value
436  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
437  *   otherwise another call with this returned memory size should be done.
438  *   On error, a negative errno value is returned and rte_errno is set.
439  */
440 static int
441 mlx5_flow_item_vlan(const struct rte_flow_item *item, struct rte_flow *flow,
442                     const size_t flow_size, struct rte_flow_error *error)
443 {
444         const struct rte_flow_item_vlan *spec = item->spec;
445         const struct rte_flow_item_vlan *mask = item->mask;
446         const struct rte_flow_item_vlan nic_mask = {
447                 .tci = RTE_BE16(0x0fff),
448                 .inner_type = RTE_BE16(0xffff),
449         };
450         unsigned int size = sizeof(struct ibv_flow_spec_eth);
451         struct ibv_flow_spec_eth eth = {
452                 .type = IBV_FLOW_SPEC_ETH,
453                 .size = size,
454         };
455         int ret;
456         const uint32_t l34m = MLX5_FLOW_LAYER_OUTER_L3 |
457                         MLX5_FLOW_LAYER_OUTER_L4;
458         const uint32_t vlanm = MLX5_FLOW_LAYER_OUTER_VLAN;
459         const uint32_t l2m = MLX5_FLOW_LAYER_OUTER_L2;
460
461         if (flow->layers & vlanm)
462                 return rte_flow_error_set(error, ENOTSUP,
463                                           RTE_FLOW_ERROR_TYPE_ITEM,
464                                           item,
465                                           "VLAN layer already configured");
466         else if ((flow->layers & l34m) != 0)
467                 return rte_flow_error_set(error, ENOTSUP,
468                                           RTE_FLOW_ERROR_TYPE_ITEM,
469                                           item,
470                                           "L2 layer cannot follow L3/L4 layer");
471         if (!mask)
472                 mask = &rte_flow_item_vlan_mask;
473         ret = mlx5_flow_item_acceptable
474                 (item, (const uint8_t *)mask,
475                  (const uint8_t *)&nic_mask,
476                  sizeof(struct rte_flow_item_vlan), error);
477         if (ret)
478                 return ret;
479         if (spec) {
480                 eth.val.vlan_tag = spec->tci;
481                 eth.mask.vlan_tag = mask->tci;
482                 eth.val.vlan_tag &= eth.mask.vlan_tag;
483                 eth.val.ether_type = spec->inner_type;
484                 eth.mask.ether_type = mask->inner_type;
485                 eth.val.ether_type &= eth.mask.ether_type;
486         }
487         /*
488          * From verbs perspective an empty VLAN is equivalent
489          * to a packet without VLAN layer.
490          */
491         if (!eth.mask.vlan_tag)
492                 return rte_flow_error_set(error, EINVAL,
493                                           RTE_FLOW_ERROR_TYPE_ITEM_SPEC,
494                                           item->spec,
495                                           "VLAN cannot be empty");
496         if (!(flow->layers & l2m)) {
497                 if (size <= flow_size)
498                         mlx5_flow_spec_verbs_add(flow, &eth, size);
499         } else {
500                 if (flow->verbs.attr)
501                         mlx5_flow_item_vlan_update(flow->verbs.attr, &eth);
502                 size = 0; /* Only an update is done in eth specification. */
503         }
504         flow->layers |= MLX5_FLOW_LAYER_OUTER_L2 |
505                 MLX5_FLOW_LAYER_OUTER_VLAN;
506         return size;
507 }
508
509 /**
510  * Convert the @p item into a Verbs specification after ensuring the NIC
511  * will understand and process it correctly.
512  * If the necessary size for the conversion is greater than the @p flow_size,
513  * nothing is written in @p flow, the validation is still performed.
514  *
515  * @param[in] item
516  *   Item specification.
517  * @param[in, out] flow
518  *   Pointer to flow structure.
519  * @param[in] flow_size
520  *   Size in bytes of the available space in @p flow, if too small, nothing is
521  *   written.
522  * @param[out] error
523  *   Pointer to error structure.
524  *
525  * @return
526  *   On success the number of bytes consumed/necessary, if the returned value
527  *   is lesser or equal to @p flow_size, the @p item has fully been converted,
528  *   otherwise another call with this returned memory size should be done.
529  *   On error, a negative errno value is returned and rte_errno is set.
530  */
531 static int
532 mlx5_flow_item_ipv4(const struct rte_flow_item *item, struct rte_flow *flow,
533                     const size_t flow_size, struct rte_flow_error *error)
534 {
535         const struct rte_flow_item_ipv4 *spec = item->spec;
536         const struct rte_flow_item_ipv4 *mask = item->mask;
537         const struct rte_flow_item_ipv4 nic_mask = {
538                 .hdr = {
539                         .src_addr = RTE_BE32(0xffffffff),
540                         .dst_addr = RTE_BE32(0xffffffff),
541                         .type_of_service = 0xff,
542                         .next_proto_id = 0xff,
543                 },
544         };
545         unsigned int size = sizeof(struct ibv_flow_spec_ipv4_ext);
546         struct ibv_flow_spec_ipv4_ext ipv4 = {
547                 .type = IBV_FLOW_SPEC_IPV4_EXT,
548                 .size = size,
549         };
550         int ret;
551
552         if (flow->layers & MLX5_FLOW_LAYER_OUTER_L3)
553                 return rte_flow_error_set(error, ENOTSUP,
554                                           RTE_FLOW_ERROR_TYPE_ITEM,
555                                           item,
556                                           "multiple L3 layers not supported");
557         else if (flow->layers & MLX5_FLOW_LAYER_OUTER_L4)
558                 return rte_flow_error_set(error, ENOTSUP,
559                                           RTE_FLOW_ERROR_TYPE_ITEM,
560                                           item,
561                                           "L3 cannot follow an L4 layer.");
562         if (!mask)
563                 mask = &rte_flow_item_ipv4_mask;
564         ret = mlx5_flow_item_acceptable
565                 (item, (const uint8_t *)mask,
566                  (const uint8_t *)&nic_mask,
567                  sizeof(struct rte_flow_item_ipv4), error);
568         if (ret < 0)
569                 return ret;
570         flow->layers |= MLX5_FLOW_LAYER_OUTER_L3_IPV4;
571         if (size > flow_size)
572                 return size;
573         if (spec) {
574                 ipv4.val = (struct ibv_flow_ipv4_ext_filter){
575                         .src_ip = spec->hdr.src_addr,
576                         .dst_ip = spec->hdr.dst_addr,
577                         .proto = spec->hdr.next_proto_id,
578                         .tos = spec->hdr.type_of_service,
579                 };
580                 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
581                         .src_ip = mask->hdr.src_addr,
582                         .dst_ip = mask->hdr.dst_addr,
583                         .proto = mask->hdr.next_proto_id,
584                         .tos = mask->hdr.type_of_service,
585                 };
586                 /* Remove unwanted bits from values. */
587                 ipv4.val.src_ip &= ipv4.mask.src_ip;
588                 ipv4.val.dst_ip &= ipv4.mask.dst_ip;
589                 ipv4.val.proto &= ipv4.mask.proto;
590                 ipv4.val.tos &= ipv4.mask.tos;
591         }
592         mlx5_flow_spec_verbs_add(flow, &ipv4, size);
593         return size;
594 }
595
596 /**
597  * Convert the @p pattern into a Verbs specifications after ensuring the NIC
598  * will understand and process it correctly.
599  * The conversion is performed item per item, each of them is written into
600  * the @p flow if its size is lesser or equal to @p flow_size.
601  * Validation and memory consumption computation are still performed until the
602  * end of @p pattern, unless an error is encountered.
603  *
604  * @param[in] pattern
605  *   Flow pattern.
606  * @param[in, out] flow
607  *   Pointer to the rte_flow structure.
608  * @param[in] flow_size
609  *   Size in bytes of the available space in @p flow, if too small some
610  *   garbage may be present.
611  * @param[out] error
612  *   Pointer to error structure.
613  *
614  * @return
615  *   On success the number of bytes consumed/necessary, if the returned value
616  *   is lesser or equal to @p flow_size, the @pattern  has fully been
617  *   converted, otherwise another call with this returned memory size should
618  *   be done.
619  *   On error, a negative errno value is returned and rte_errno is set.
620  */
621 static int
622 mlx5_flow_items(const struct rte_flow_item pattern[],
623                 struct rte_flow *flow, const size_t flow_size,
624                 struct rte_flow_error *error)
625 {
626         int remain = flow_size;
627         size_t size = 0;
628
629         for (; pattern->type != RTE_FLOW_ITEM_TYPE_END; pattern++) {
630                 int ret = 0;
631
632                 switch (pattern->type) {
633                 case RTE_FLOW_ITEM_TYPE_VOID:
634                         break;
635                 case RTE_FLOW_ITEM_TYPE_ETH:
636                         ret = mlx5_flow_item_eth(pattern, flow, remain, error);
637                         break;
638                 case RTE_FLOW_ITEM_TYPE_VLAN:
639                         ret = mlx5_flow_item_vlan(pattern, flow, remain, error);
640                         break;
641                 case RTE_FLOW_ITEM_TYPE_IPV4:
642                         ret = mlx5_flow_item_ipv4(pattern, flow, remain, error);
643                         break;
644                 default:
645                         return rte_flow_error_set(error, ENOTSUP,
646                                                   RTE_FLOW_ERROR_TYPE_ITEM,
647                                                   pattern,
648                                                   "item not supported");
649                 }
650                 if (ret < 0)
651                         return ret;
652                 if (remain > ret)
653                         remain -= ret;
654                 else
655                         remain = 0;
656                 size += ret;
657         }
658         if (!flow->layers) {
659                 const struct rte_flow_item item = {
660                         .type = RTE_FLOW_ITEM_TYPE_ETH,
661                 };
662
663                 return mlx5_flow_item_eth(&item, flow, flow_size, error);
664         }
665         return size;
666 }
667
668 /**
669  * Convert the @p action into a Verbs specification after ensuring the NIC
670  * will understand and process it correctly.
671  * If the necessary size for the conversion is greater than the @p flow_size,
672  * nothing is written in @p flow, the validation is still performed.
673  *
674  * @param[in] action
675  *   Action configuration.
676  * @param[in, out] flow
677  *   Pointer to flow structure.
678  * @param[in] flow_size
679  *   Size in bytes of the available space in @p flow, if too small, nothing is
680  *   written.
681  * @param[out] error
682  *   Pointer to error structure.
683  *
684  * @return
685  *   On success the number of bytes consumed/necessary, if the returned value
686  *   is lesser or equal to @p flow_size, the @p action has fully been
687  *   converted, otherwise another call with this returned memory size should
688  *   be done.
689  *   On error, a negative errno value is returned and rte_errno is set.
690  */
691 static int
692 mlx5_flow_action_drop(const struct rte_flow_action *action,
693                       struct rte_flow *flow, const size_t flow_size,
694                       struct rte_flow_error *error)
695 {
696         unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
697         struct ibv_flow_spec_action_drop drop = {
698                         .type = IBV_FLOW_SPEC_ACTION_DROP,
699                         .size = size,
700         };
701
702         if (flow->fate)
703                 return rte_flow_error_set(error, ENOTSUP,
704                                           RTE_FLOW_ERROR_TYPE_ACTION,
705                                           action,
706                                           "multiple fate actions are not"
707                                           " supported");
708         if (size < flow_size)
709                 mlx5_flow_spec_verbs_add(flow, &drop, size);
710         flow->fate |= MLX5_FLOW_FATE_DROP;
711         return size;
712 }
713
714 /**
715  * Convert the @p action into @p flow after ensuring the NIC will understand
716  * and process it correctly.
717  *
718  * @param[in] dev
719  *   Pointer to Ethernet device structure.
720  * @param[in] action
721  *   Action configuration.
722  * @param[in, out] flow
723  *   Pointer to flow structure.
724  * @param[out] error
725  *   Pointer to error structure.
726  *
727  * @return
728  *   0 on success, a negative errno value otherwise and rte_errno is set.
729  */
730 static int
731 mlx5_flow_action_queue(struct rte_eth_dev *dev,
732                        const struct rte_flow_action *action,
733                        struct rte_flow *flow,
734                        struct rte_flow_error *error)
735 {
736         struct priv *priv = dev->data->dev_private;
737         const struct rte_flow_action_queue *queue = action->conf;
738
739         if (flow->fate)
740                 return rte_flow_error_set(error, ENOTSUP,
741                                           RTE_FLOW_ERROR_TYPE_ACTION,
742                                           action,
743                                           "multiple fate actions are not"
744                                           " supported");
745         if (queue->index >= priv->rxqs_n)
746                 return rte_flow_error_set(error, EINVAL,
747                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
748                                           &queue->index,
749                                           "queue index out of range");
750         if (!(*priv->rxqs)[queue->index])
751                 return rte_flow_error_set(error, EINVAL,
752                                           RTE_FLOW_ERROR_TYPE_ACTION_CONF,
753                                           &queue->index,
754                                           "queue is not configured");
755         flow->queue = queue->index;
756         flow->fate |= MLX5_FLOW_FATE_QUEUE;
757         return 0;
758 }
759
760 /**
761  * Convert the @p action into @p flow after ensuring the NIC will understand
762  * and process it correctly.
763  * The conversion is performed action per action, each of them is written into
764  * the @p flow if its size is lesser or equal to @p flow_size.
765  * Validation and memory consumption computation are still performed until the
766  * end of @p action, unless an error is encountered.
767  *
768  * @param[in] dev
769  *   Pointer to Ethernet device structure.
770  * @param[in] actions
771  *   Pointer to flow actions array.
772  * @param[in, out] flow
773  *   Pointer to the rte_flow structure.
774  * @param[in] flow_size
775  *   Size in bytes of the available space in @p flow, if too small some
776  *   garbage may be present.
777  * @param[out] error
778  *   Pointer to error structure.
779  *
780  * @return
781  *   On success the number of bytes consumed/necessary, if the returned value
782  *   is lesser or equal to @p flow_size, the @p actions has fully been
783  *   converted, otherwise another call with this returned memory size should
784  *   be done.
785  *   On error, a negative errno value is returned and rte_errno is set.
786  */
787 static int
788 mlx5_flow_actions(struct rte_eth_dev *dev,
789                   const struct rte_flow_action actions[],
790                   struct rte_flow *flow, const size_t flow_size,
791                   struct rte_flow_error *error)
792 {
793         size_t size = 0;
794         int remain = flow_size;
795         int ret = 0;
796
797         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; actions++) {
798                 switch (actions->type) {
799                 case RTE_FLOW_ACTION_TYPE_VOID:
800                         break;
801                 case RTE_FLOW_ACTION_TYPE_DROP:
802                         ret = mlx5_flow_action_drop(actions, flow, remain,
803                                                     error);
804                         break;
805                 case RTE_FLOW_ACTION_TYPE_QUEUE:
806                         ret = mlx5_flow_action_queue(dev, actions, flow, error);
807                         break;
808                 default:
809                         return rte_flow_error_set(error, ENOTSUP,
810                                                   RTE_FLOW_ERROR_TYPE_ACTION,
811                                                   actions,
812                                                   "action not supported");
813                 }
814                 if (ret < 0)
815                         return ret;
816                 if (remain > ret)
817                         remain -= ret;
818                 else
819                         remain = 0;
820                 size += ret;
821         }
822         if (!flow->fate)
823                 return rte_flow_error_set(error, ENOTSUP,
824                                           RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
825                                           NULL,
826                                           "no fate action found");
827         return size;
828 }
829
830 /**
831  * Convert the @p attributes, @p pattern, @p action, into an flow for the NIC
832  * after ensuring the NIC will understand and process it correctly.
833  * The conversion is only performed item/action per item/action, each of
834  * them is written into the @p flow if its size is lesser or equal to @p
835  * flow_size.
836  * Validation and memory consumption computation are still performed until the
837  * end, unless an error is encountered.
838  *
839  * @param[in] dev
840  *   Pointer to Ethernet device.
841  * @param[in, out] flow
842  *   Pointer to flow structure.
843  * @param[in] flow_size
844  *   Size in bytes of the available space in @p flow, if too small some
845  *   garbage may be present.
846  * @param[in] attributes
847  *   Flow rule attributes.
848  * @param[in] pattern
849  *   Pattern specification (list terminated by the END pattern item).
850  * @param[in] actions
851  *   Associated actions (list terminated by the END action).
852  * @param[out] error
853  *   Perform verbose error reporting if not NULL.
854  *
855  * @return
856  *   On success the number of bytes consumed/necessary, if the returned value
857  *   is lesser or equal to @p flow_size, the flow has fully been converted and
858  *   can be applied, otherwise another call with this returned memory size
859  *   should be done.
860  *   On error, a negative errno value is returned and rte_errno is set.
861  */
862 static int
863 mlx5_flow_merge(struct rte_eth_dev *dev, struct rte_flow *flow,
864                 const size_t flow_size,
865                 const struct rte_flow_attr *attributes,
866                 const struct rte_flow_item pattern[],
867                 const struct rte_flow_action actions[],
868                 struct rte_flow_error *error)
869 {
870         struct rte_flow local_flow = { .layers = 0, };
871         size_t size = sizeof(*flow) + sizeof(struct ibv_flow_attr);
872         int remain = (flow_size > size) ? flow_size - size : 0;
873         int ret;
874
875         if (!remain)
876                 flow = &local_flow;
877         ret = mlx5_flow_attributes(dev, attributes, flow, error);
878         if (ret < 0)
879                 return ret;
880         ret = mlx5_flow_items(pattern, flow, remain, error);
881         if (ret < 0)
882                 return ret;
883         size += ret;
884         remain = (flow_size > size) ? flow_size - size : 0;
885         ret = mlx5_flow_actions(dev, actions, flow, remain, error);
886         if (ret < 0)
887                 return ret;
888         size += ret;
889         if (size <= flow_size)
890                 flow->verbs.attr->priority = flow->attributes.priority;
891         return size;
892 }
893
894 /**
895  * Validate a flow supported by the NIC.
896  *
897  * @see rte_flow_validate()
898  * @see rte_flow_ops
899  */
900 int
901 mlx5_flow_validate(struct rte_eth_dev *dev,
902                    const struct rte_flow_attr *attr,
903                    const struct rte_flow_item items[],
904                    const struct rte_flow_action actions[],
905                    struct rte_flow_error *error)
906 {
907         int ret = mlx5_flow_merge(dev, NULL, 0, attr, items, actions, error);
908
909         if (ret < 0)
910                 return ret;
911         return 0;
912 }
913
914 /**
915  * Remove the flow.
916  *
917  * @param[in] dev
918  *   Pointer to Ethernet device.
919  * @param[in, out] flow
920  *   Pointer to flow structure.
921  */
922 static void
923 mlx5_flow_remove(struct rte_eth_dev *dev, struct rte_flow *flow)
924 {
925         if (flow->fate & MLX5_FLOW_FATE_DROP) {
926                 if (flow->verbs.flow) {
927                         claim_zero(mlx5_glue->destroy_flow(flow->verbs.flow));
928                         flow->verbs.flow = NULL;
929                 }
930         }
931         if (flow->verbs.hrxq) {
932                 if (flow->fate & MLX5_FLOW_FATE_DROP)
933                         mlx5_hrxq_drop_release(dev);
934                 else if (flow->fate & MLX5_FLOW_FATE_QUEUE)
935                         mlx5_hrxq_release(dev, flow->verbs.hrxq);
936                 flow->verbs.hrxq = NULL;
937         }
938 }
939
940 /**
941  * Apply the flow.
942  *
943  * @param[in] dev
944  *   Pointer to Ethernet device structure.
945  * @param[in, out] flow
946  *   Pointer to flow structure.
947  * @param[out] error
948  *   Pointer to error structure.
949  *
950  * @return
951  *   0 on success, a negative errno value otherwise and rte_errno is set.
952  */
953 static int
954 mlx5_flow_apply(struct rte_eth_dev *dev, struct rte_flow *flow,
955                 struct rte_flow_error *error)
956 {
957         if (flow->fate & MLX5_FLOW_FATE_DROP) {
958                 flow->verbs.hrxq = mlx5_hrxq_drop_new(dev);
959                 if (!flow->verbs.hrxq)
960                         return rte_flow_error_set
961                                 (error, errno,
962                                  RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
963                                  NULL,
964                                  "cannot allocate Drop queue");
965         } else if (flow->fate & MLX5_FLOW_FATE_QUEUE) {
966                 struct mlx5_hrxq *hrxq;
967
968                 hrxq = mlx5_hrxq_get(dev, rss_hash_default_key,
969                                      rss_hash_default_key_len, 0,
970                                      &flow->queue, 1, 0, 0);
971                 if (!hrxq)
972                         hrxq = mlx5_hrxq_new(dev, rss_hash_default_key,
973                                              rss_hash_default_key_len, 0,
974                                              &flow->queue, 1, 0, 0);
975                 if (!hrxq)
976                         return rte_flow_error_set(error, rte_errno,
977                                         RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
978                                         NULL,
979                                         "cannot create flow");
980                 flow->verbs.hrxq = hrxq;
981         }
982         flow->verbs.flow =
983                 mlx5_glue->create_flow(flow->verbs.hrxq->qp, flow->verbs.attr);
984         if (!flow->verbs.flow) {
985                 if (flow->fate & MLX5_FLOW_FATE_DROP)
986                         mlx5_hrxq_drop_release(dev);
987                 else
988                         mlx5_hrxq_release(dev, flow->verbs.hrxq);
989                 flow->verbs.hrxq = NULL;
990                 return rte_flow_error_set(error, errno,
991                                           RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
992                                           NULL,
993                                           "kernel module refuses to create"
994                                           " flow");
995         }
996         return 0;
997 }
998
999 /**
1000  * Create a flow and add it to @p list.
1001  *
1002  * @param dev
1003  *   Pointer to Ethernet device.
1004  * @param list
1005  *   Pointer to a TAILQ flow list.
1006  * @param[in] attr
1007  *   Flow rule attributes.
1008  * @param[in] items
1009  *   Pattern specification (list terminated by the END pattern item).
1010  * @param[in] actions
1011  *   Associated actions (list terminated by the END action).
1012  * @param[out] error
1013  *   Perform verbose error reporting if not NULL.
1014  *
1015  * @return
1016  *   A flow on success, NULL otherwise and rte_errno is set.
1017  */
1018 static struct rte_flow *
1019 mlx5_flow_list_create(struct rte_eth_dev *dev,
1020                       struct mlx5_flows *list,
1021                       const struct rte_flow_attr *attr,
1022                       const struct rte_flow_item items[],
1023                       const struct rte_flow_action actions[],
1024                       struct rte_flow_error *error)
1025 {
1026         struct rte_flow *flow;
1027         size_t size;
1028         int ret;
1029
1030         ret = mlx5_flow_merge(dev, NULL, 0, attr, items, actions, error);
1031         if (ret < 0)
1032                 return NULL;
1033         size = ret;
1034         flow = rte_zmalloc(__func__, size, 0);
1035         if (!flow) {
1036                 rte_flow_error_set(error, ENOMEM,
1037                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1038                                    NULL,
1039                                    "cannot allocate memory");
1040                 return NULL;
1041         }
1042         flow->verbs.attr = (struct ibv_flow_attr *)(flow + 1);
1043         flow->verbs.specs = (uint8_t *)(flow->verbs.attr + 1);
1044         ret = mlx5_flow_merge(dev, flow, size, attr, items, actions, error);
1045         if (ret < 0)
1046                 goto error;
1047         assert((size_t)ret == size);
1048         if (dev->data->dev_started) {
1049                 ret = mlx5_flow_apply(dev, flow, error);
1050                 if (ret < 0)
1051                         goto error;
1052         }
1053         TAILQ_INSERT_TAIL(list, flow, next);
1054         return flow;
1055 error:
1056         ret = rte_errno; /* Save rte_errno before cleanup. */
1057         mlx5_flow_remove(dev, flow);
1058         rte_free(flow);
1059         rte_errno = ret; /* Restore rte_errno. */
1060         return NULL;
1061 }
1062
1063 /**
1064  * Create a flow.
1065  *
1066  * @see rte_flow_create()
1067  * @see rte_flow_ops
1068  */
1069 struct rte_flow *
1070 mlx5_flow_create(struct rte_eth_dev *dev,
1071                  const struct rte_flow_attr *attr,
1072                  const struct rte_flow_item items[],
1073                  const struct rte_flow_action actions[],
1074                  struct rte_flow_error *error)
1075 {
1076         return mlx5_flow_list_create
1077                 (dev, &((struct priv *)dev->data->dev_private)->flows,
1078                  attr, items, actions, error);
1079 }
1080
1081 /**
1082  * Destroy a flow in a list.
1083  *
1084  * @param dev
1085  *   Pointer to Ethernet device.
1086  * @param list
1087  *   Pointer to a TAILQ flow list.
1088  * @param[in] flow
1089  *   Flow to destroy.
1090  */
1091 static void
1092 mlx5_flow_list_destroy(struct rte_eth_dev *dev, struct mlx5_flows *list,
1093                        struct rte_flow *flow)
1094 {
1095         mlx5_flow_remove(dev, flow);
1096         TAILQ_REMOVE(list, flow, next);
1097         rte_free(flow);
1098 }
1099
1100 /**
1101  * Destroy all flows.
1102  *
1103  * @param dev
1104  *   Pointer to Ethernet device.
1105  * @param list
1106  *   Pointer to a TAILQ flow list.
1107  */
1108 void
1109 mlx5_flow_list_flush(struct rte_eth_dev *dev, struct mlx5_flows *list)
1110 {
1111         while (!TAILQ_EMPTY(list)) {
1112                 struct rte_flow *flow;
1113
1114                 flow = TAILQ_FIRST(list);
1115                 mlx5_flow_list_destroy(dev, list, flow);
1116         }
1117 }
1118
1119 /**
1120  * Remove all flows.
1121  *
1122  * @param dev
1123  *   Pointer to Ethernet device.
1124  * @param list
1125  *   Pointer to a TAILQ flow list.
1126  */
1127 void
1128 mlx5_flow_stop(struct rte_eth_dev *dev, struct mlx5_flows *list)
1129 {
1130         struct rte_flow *flow;
1131
1132         TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next)
1133                 mlx5_flow_remove(dev, flow);
1134 }
1135
1136 /**
1137  * Add all flows.
1138  *
1139  * @param dev
1140  *   Pointer to Ethernet device.
1141  * @param list
1142  *   Pointer to a TAILQ flow list.
1143  *
1144  * @return
1145  *   0 on success, a negative errno value otherwise and rte_errno is set.
1146  */
1147 int
1148 mlx5_flow_start(struct rte_eth_dev *dev, struct mlx5_flows *list)
1149 {
1150         struct rte_flow *flow;
1151         struct rte_flow_error error;
1152         int ret = 0;
1153
1154         TAILQ_FOREACH(flow, list, next) {
1155                 ret = mlx5_flow_apply(dev, flow, &error);
1156                 if (ret < 0)
1157                         goto error;
1158         }
1159         return 0;
1160 error:
1161         ret = rte_errno; /* Save rte_errno before cleanup. */
1162         mlx5_flow_stop(dev, list);
1163         rte_errno = ret; /* Restore rte_errno. */
1164         return -rte_errno;
1165 }
1166
1167 /**
1168  * Verify the flow list is empty
1169  *
1170  * @param dev
1171  *  Pointer to Ethernet device.
1172  *
1173  * @return the number of flows not released.
1174  */
1175 int
1176 mlx5_flow_verify(struct rte_eth_dev *dev)
1177 {
1178         struct priv *priv = dev->data->dev_private;
1179         struct rte_flow *flow;
1180         int ret = 0;
1181
1182         TAILQ_FOREACH(flow, &priv->flows, next) {
1183                 DRV_LOG(DEBUG, "port %u flow %p still referenced",
1184                         dev->data->port_id, (void *)flow);
1185                 ++ret;
1186         }
1187         return ret;
1188 }
1189
1190 /**
1191  * Enable a control flow configured from the control plane.
1192  *
1193  * @param dev
1194  *   Pointer to Ethernet device.
1195  * @param eth_spec
1196  *   An Ethernet flow spec to apply.
1197  * @param eth_mask
1198  *   An Ethernet flow mask to apply.
1199  * @param vlan_spec
1200  *   A VLAN flow spec to apply.
1201  * @param vlan_mask
1202  *   A VLAN flow mask to apply.
1203  *
1204  * @return
1205  *   0 on success, a negative errno value otherwise and rte_errno is set.
1206  */
1207 int
1208 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
1209                     struct rte_flow_item_eth *eth_spec,
1210                     struct rte_flow_item_eth *eth_mask,
1211                     struct rte_flow_item_vlan *vlan_spec,
1212                     struct rte_flow_item_vlan *vlan_mask)
1213 {
1214         struct priv *priv = dev->data->dev_private;
1215         const struct rte_flow_attr attr = {
1216                 .ingress = 1,
1217                 .priority = priv->config.flow_prio - 1,
1218         };
1219         struct rte_flow_item items[] = {
1220                 {
1221                         .type = RTE_FLOW_ITEM_TYPE_ETH,
1222                         .spec = eth_spec,
1223                         .last = NULL,
1224                         .mask = eth_mask,
1225                 },
1226                 {
1227                         .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
1228                                 RTE_FLOW_ITEM_TYPE_END,
1229                         .spec = vlan_spec,
1230                         .last = NULL,
1231                         .mask = vlan_mask,
1232                 },
1233                 {
1234                         .type = RTE_FLOW_ITEM_TYPE_END,
1235                 },
1236         };
1237         uint16_t queue[priv->reta_idx_n];
1238         struct rte_flow_action_rss action_rss = {
1239                 .func = RTE_ETH_HASH_FUNCTION_DEFAULT,
1240                 .level = 0,
1241                 .types = priv->rss_conf.rss_hf,
1242                 .key_len = priv->rss_conf.rss_key_len,
1243                 .queue_num = priv->reta_idx_n,
1244                 .key = priv->rss_conf.rss_key,
1245                 .queue = queue,
1246         };
1247         struct rte_flow_action actions[] = {
1248                 {
1249                         .type = RTE_FLOW_ACTION_TYPE_RSS,
1250                         .conf = &action_rss,
1251                 },
1252                 {
1253                         .type = RTE_FLOW_ACTION_TYPE_END,
1254                 },
1255         };
1256         struct rte_flow *flow;
1257         struct rte_flow_error error;
1258         unsigned int i;
1259
1260         if (!priv->reta_idx_n) {
1261                 rte_errno = EINVAL;
1262                 return -rte_errno;
1263         }
1264         for (i = 0; i != priv->reta_idx_n; ++i)
1265                 queue[i] = (*priv->reta_idx)[i];
1266         flow = mlx5_flow_list_create(dev, &priv->ctrl_flows, &attr, items,
1267                                      actions, &error);
1268         if (!flow)
1269                 return -rte_errno;
1270         return 0;
1271 }
1272
1273 /**
1274  * Enable a flow control configured from the control plane.
1275  *
1276  * @param dev
1277  *   Pointer to Ethernet device.
1278  * @param eth_spec
1279  *   An Ethernet flow spec to apply.
1280  * @param eth_mask
1281  *   An Ethernet flow mask to apply.
1282  *
1283  * @return
1284  *   0 on success, a negative errno value otherwise and rte_errno is set.
1285  */
1286 int
1287 mlx5_ctrl_flow(struct rte_eth_dev *dev,
1288                struct rte_flow_item_eth *eth_spec,
1289                struct rte_flow_item_eth *eth_mask)
1290 {
1291         return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
1292 }
1293
1294 /**
1295  * Destroy a flow.
1296  *
1297  * @see rte_flow_destroy()
1298  * @see rte_flow_ops
1299  */
1300 int
1301 mlx5_flow_destroy(struct rte_eth_dev *dev,
1302                   struct rte_flow *flow,
1303                   struct rte_flow_error *error __rte_unused)
1304 {
1305         struct priv *priv = dev->data->dev_private;
1306
1307         mlx5_flow_list_destroy(dev, &priv->flows, flow);
1308         return 0;
1309 }
1310
1311 /**
1312  * Destroy all flows.
1313  *
1314  * @see rte_flow_flush()
1315  * @see rte_flow_ops
1316  */
1317 int
1318 mlx5_flow_flush(struct rte_eth_dev *dev,
1319                 struct rte_flow_error *error __rte_unused)
1320 {
1321         struct priv *priv = dev->data->dev_private;
1322
1323         mlx5_flow_list_flush(dev, &priv->flows);
1324         return 0;
1325 }
1326
1327 /**
1328  * Isolated mode.
1329  *
1330  * @see rte_flow_isolate()
1331  * @see rte_flow_ops
1332  */
1333 int
1334 mlx5_flow_isolate(struct rte_eth_dev *dev,
1335                   int enable,
1336                   struct rte_flow_error *error)
1337 {
1338         struct priv *priv = dev->data->dev_private;
1339
1340         if (dev->data->dev_started) {
1341                 rte_flow_error_set(error, EBUSY,
1342                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1343                                    NULL,
1344                                    "port must be stopped first");
1345                 return -rte_errno;
1346         }
1347         priv->isolated = !!enable;
1348         if (enable)
1349                 dev->dev_ops = &mlx5_dev_ops_isolate;
1350         else
1351                 dev->dev_ops = &mlx5_dev_ops;
1352         return 0;
1353 }
1354
1355 /**
1356  * Convert a flow director filter to a generic flow.
1357  *
1358  * @param dev
1359  *   Pointer to Ethernet device.
1360  * @param fdir_filter
1361  *   Flow director filter to add.
1362  * @param attributes
1363  *   Generic flow parameters structure.
1364  *
1365  * @return
1366  *   0 on success, a negative errno value otherwise and rte_errno is set.
1367  */
1368 static int
1369 mlx5_fdir_filter_convert(struct rte_eth_dev *dev,
1370                          const struct rte_eth_fdir_filter *fdir_filter,
1371                          struct mlx5_fdir *attributes)
1372 {
1373         struct priv *priv = dev->data->dev_private;
1374         const struct rte_eth_fdir_input *input = &fdir_filter->input;
1375         const struct rte_eth_fdir_masks *mask =
1376                 &dev->data->dev_conf.fdir_conf.mask;
1377
1378         /* Validate queue number. */
1379         if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
1380                 DRV_LOG(ERR, "port %u invalid queue number %d",
1381                         dev->data->port_id, fdir_filter->action.rx_queue);
1382                 rte_errno = EINVAL;
1383                 return -rte_errno;
1384         }
1385         attributes->attr.ingress = 1;
1386         attributes->items[0] = (struct rte_flow_item) {
1387                 .type = RTE_FLOW_ITEM_TYPE_ETH,
1388                 .spec = &attributes->l2,
1389                 .mask = &attributes->l2_mask,
1390         };
1391         switch (fdir_filter->action.behavior) {
1392         case RTE_ETH_FDIR_ACCEPT:
1393                 attributes->actions[0] = (struct rte_flow_action){
1394                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
1395                         .conf = &attributes->queue,
1396                 };
1397                 break;
1398         case RTE_ETH_FDIR_REJECT:
1399                 attributes->actions[0] = (struct rte_flow_action){
1400                         .type = RTE_FLOW_ACTION_TYPE_DROP,
1401                 };
1402                 break;
1403         default:
1404                 DRV_LOG(ERR, "port %u invalid behavior %d",
1405                         dev->data->port_id,
1406                         fdir_filter->action.behavior);
1407                 rte_errno = ENOTSUP;
1408                 return -rte_errno;
1409         }
1410         attributes->queue.index = fdir_filter->action.rx_queue;
1411         /* Handle L3. */
1412         switch (fdir_filter->input.flow_type) {
1413         case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
1414         case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
1415         case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
1416                 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
1417                         .src_addr = input->flow.ip4_flow.src_ip,
1418                         .dst_addr = input->flow.ip4_flow.dst_ip,
1419                         .time_to_live = input->flow.ip4_flow.ttl,
1420                         .type_of_service = input->flow.ip4_flow.tos,
1421                         .next_proto_id = input->flow.ip4_flow.proto,
1422                 };
1423                 attributes->l3_mask.ipv4.hdr = (struct ipv4_hdr){
1424                         .src_addr = mask->ipv4_mask.src_ip,
1425                         .dst_addr = mask->ipv4_mask.dst_ip,
1426                         .time_to_live = mask->ipv4_mask.ttl,
1427                         .type_of_service = mask->ipv4_mask.tos,
1428                         .next_proto_id = mask->ipv4_mask.proto,
1429                 };
1430                 attributes->items[1] = (struct rte_flow_item){
1431                         .type = RTE_FLOW_ITEM_TYPE_IPV4,
1432                         .spec = &attributes->l3,
1433                         .mask = &attributes->l3_mask,
1434                 };
1435                 break;
1436         case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
1437         case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
1438         case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
1439                 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
1440                         .hop_limits = input->flow.ipv6_flow.hop_limits,
1441                         .proto = input->flow.ipv6_flow.proto,
1442                 };
1443
1444                 memcpy(attributes->l3.ipv6.hdr.src_addr,
1445                        input->flow.ipv6_flow.src_ip,
1446                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
1447                 memcpy(attributes->l3.ipv6.hdr.dst_addr,
1448                        input->flow.ipv6_flow.dst_ip,
1449                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
1450                 memcpy(attributes->l3_mask.ipv6.hdr.src_addr,
1451                        mask->ipv6_mask.src_ip,
1452                        RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
1453                 memcpy(attributes->l3_mask.ipv6.hdr.dst_addr,
1454                        mask->ipv6_mask.dst_ip,
1455                        RTE_DIM(attributes->l3_mask.ipv6.hdr.src_addr));
1456                 attributes->items[1] = (struct rte_flow_item){
1457                         .type = RTE_FLOW_ITEM_TYPE_IPV6,
1458                         .spec = &attributes->l3,
1459                         .mask = &attributes->l3_mask,
1460                 };
1461                 break;
1462         default:
1463                 DRV_LOG(ERR, "port %u invalid flow type%d",
1464                         dev->data->port_id, fdir_filter->input.flow_type);
1465                 rte_errno = ENOTSUP;
1466                 return -rte_errno;
1467         }
1468         /* Handle L4. */
1469         switch (fdir_filter->input.flow_type) {
1470         case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
1471                 attributes->l4.udp.hdr = (struct udp_hdr){
1472                         .src_port = input->flow.udp4_flow.src_port,
1473                         .dst_port = input->flow.udp4_flow.dst_port,
1474                 };
1475                 attributes->l4_mask.udp.hdr = (struct udp_hdr){
1476                         .src_port = mask->src_port_mask,
1477                         .dst_port = mask->dst_port_mask,
1478                 };
1479                 attributes->items[2] = (struct rte_flow_item){
1480                         .type = RTE_FLOW_ITEM_TYPE_UDP,
1481                         .spec = &attributes->l4,
1482                         .mask = &attributes->l4_mask,
1483                 };
1484                 break;
1485         case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
1486                 attributes->l4.tcp.hdr = (struct tcp_hdr){
1487                         .src_port = input->flow.tcp4_flow.src_port,
1488                         .dst_port = input->flow.tcp4_flow.dst_port,
1489                 };
1490                 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
1491                         .src_port = mask->src_port_mask,
1492                         .dst_port = mask->dst_port_mask,
1493                 };
1494                 attributes->items[2] = (struct rte_flow_item){
1495                         .type = RTE_FLOW_ITEM_TYPE_TCP,
1496                         .spec = &attributes->l4,
1497                         .mask = &attributes->l4_mask,
1498                 };
1499                 break;
1500         case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
1501                 attributes->l4.udp.hdr = (struct udp_hdr){
1502                         .src_port = input->flow.udp6_flow.src_port,
1503                         .dst_port = input->flow.udp6_flow.dst_port,
1504                 };
1505                 attributes->l4_mask.udp.hdr = (struct udp_hdr){
1506                         .src_port = mask->src_port_mask,
1507                         .dst_port = mask->dst_port_mask,
1508                 };
1509                 attributes->items[2] = (struct rte_flow_item){
1510                         .type = RTE_FLOW_ITEM_TYPE_UDP,
1511                         .spec = &attributes->l4,
1512                         .mask = &attributes->l4_mask,
1513                 };
1514                 break;
1515         case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
1516                 attributes->l4.tcp.hdr = (struct tcp_hdr){
1517                         .src_port = input->flow.tcp6_flow.src_port,
1518                         .dst_port = input->flow.tcp6_flow.dst_port,
1519                 };
1520                 attributes->l4_mask.tcp.hdr = (struct tcp_hdr){
1521                         .src_port = mask->src_port_mask,
1522                         .dst_port = mask->dst_port_mask,
1523                 };
1524                 attributes->items[2] = (struct rte_flow_item){
1525                         .type = RTE_FLOW_ITEM_TYPE_TCP,
1526                         .spec = &attributes->l4,
1527                         .mask = &attributes->l4_mask,
1528                 };
1529                 break;
1530         case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
1531         case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
1532                 break;
1533         default:
1534                 DRV_LOG(ERR, "port %u invalid flow type%d",
1535                         dev->data->port_id, fdir_filter->input.flow_type);
1536                 rte_errno = ENOTSUP;
1537                 return -rte_errno;
1538         }
1539         return 0;
1540 }
1541
1542 /**
1543  * Add new flow director filter and store it in list.
1544  *
1545  * @param dev
1546  *   Pointer to Ethernet device.
1547  * @param fdir_filter
1548  *   Flow director filter to add.
1549  *
1550  * @return
1551  *   0 on success, a negative errno value otherwise and rte_errno is set.
1552  */
1553 static int
1554 mlx5_fdir_filter_add(struct rte_eth_dev *dev,
1555                      const struct rte_eth_fdir_filter *fdir_filter)
1556 {
1557         struct priv *priv = dev->data->dev_private;
1558         struct mlx5_fdir attributes = {
1559                 .attr.group = 0,
1560                 .l2_mask = {
1561                         .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1562                         .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
1563                         .type = 0,
1564                 },
1565         };
1566         struct rte_flow_error error;
1567         struct rte_flow *flow;
1568         int ret;
1569
1570         ret = mlx5_fdir_filter_convert(dev, fdir_filter, &attributes);
1571         if (ret)
1572                 return ret;
1573         flow = mlx5_flow_list_create(dev, &priv->flows, &attributes.attr,
1574                                      attributes.items, attributes.actions,
1575                                      &error);
1576         if (flow) {
1577                 DRV_LOG(DEBUG, "port %u FDIR created %p", dev->data->port_id,
1578                         (void *)flow);
1579                 return 0;
1580         }
1581         return -rte_errno;
1582 }
1583
1584 /**
1585  * Delete specific filter.
1586  *
1587  * @param dev
1588  *   Pointer to Ethernet device.
1589  * @param fdir_filter
1590  *   Filter to be deleted.
1591  *
1592  * @return
1593  *   0 on success, a negative errno value otherwise and rte_errno is set.
1594  */
1595 static int
1596 mlx5_fdir_filter_delete(struct rte_eth_dev *dev __rte_unused,
1597                         const struct rte_eth_fdir_filter *fdir_filter
1598                         __rte_unused)
1599 {
1600         rte_errno = ENOTSUP;
1601         return -rte_errno;
1602 }
1603
1604 /**
1605  * Update queue for specific filter.
1606  *
1607  * @param dev
1608  *   Pointer to Ethernet device.
1609  * @param fdir_filter
1610  *   Filter to be updated.
1611  *
1612  * @return
1613  *   0 on success, a negative errno value otherwise and rte_errno is set.
1614  */
1615 static int
1616 mlx5_fdir_filter_update(struct rte_eth_dev *dev,
1617                         const struct rte_eth_fdir_filter *fdir_filter)
1618 {
1619         int ret;
1620
1621         ret = mlx5_fdir_filter_delete(dev, fdir_filter);
1622         if (ret)
1623                 return ret;
1624         return mlx5_fdir_filter_add(dev, fdir_filter);
1625 }
1626
1627 /**
1628  * Flush all filters.
1629  *
1630  * @param dev
1631  *   Pointer to Ethernet device.
1632  */
1633 static void
1634 mlx5_fdir_filter_flush(struct rte_eth_dev *dev)
1635 {
1636         struct priv *priv = dev->data->dev_private;
1637
1638         mlx5_flow_list_flush(dev, &priv->flows);
1639 }
1640
1641 /**
1642  * Get flow director information.
1643  *
1644  * @param dev
1645  *   Pointer to Ethernet device.
1646  * @param[out] fdir_info
1647  *   Resulting flow director information.
1648  */
1649 static void
1650 mlx5_fdir_info_get(struct rte_eth_dev *dev, struct rte_eth_fdir_info *fdir_info)
1651 {
1652         struct rte_eth_fdir_masks *mask =
1653                 &dev->data->dev_conf.fdir_conf.mask;
1654
1655         fdir_info->mode = dev->data->dev_conf.fdir_conf.mode;
1656         fdir_info->guarant_spc = 0;
1657         rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
1658         fdir_info->max_flexpayload = 0;
1659         fdir_info->flow_types_mask[0] = 0;
1660         fdir_info->flex_payload_unit = 0;
1661         fdir_info->max_flex_payload_segment_num = 0;
1662         fdir_info->flex_payload_limit = 0;
1663         memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
1664 }
1665
1666 /**
1667  * Deal with flow director operations.
1668  *
1669  * @param dev
1670  *   Pointer to Ethernet device.
1671  * @param filter_op
1672  *   Operation to perform.
1673  * @param arg
1674  *   Pointer to operation-specific structure.
1675  *
1676  * @return
1677  *   0 on success, a negative errno value otherwise and rte_errno is set.
1678  */
1679 static int
1680 mlx5_fdir_ctrl_func(struct rte_eth_dev *dev, enum rte_filter_op filter_op,
1681                     void *arg)
1682 {
1683         enum rte_fdir_mode fdir_mode =
1684                 dev->data->dev_conf.fdir_conf.mode;
1685
1686         if (filter_op == RTE_ETH_FILTER_NOP)
1687                 return 0;
1688         if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
1689             fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
1690                 DRV_LOG(ERR, "port %u flow director mode %d not supported",
1691                         dev->data->port_id, fdir_mode);
1692                 rte_errno = EINVAL;
1693                 return -rte_errno;
1694         }
1695         switch (filter_op) {
1696         case RTE_ETH_FILTER_ADD:
1697                 return mlx5_fdir_filter_add(dev, arg);
1698         case RTE_ETH_FILTER_UPDATE:
1699                 return mlx5_fdir_filter_update(dev, arg);
1700         case RTE_ETH_FILTER_DELETE:
1701                 return mlx5_fdir_filter_delete(dev, arg);
1702         case RTE_ETH_FILTER_FLUSH:
1703                 mlx5_fdir_filter_flush(dev);
1704                 break;
1705         case RTE_ETH_FILTER_INFO:
1706                 mlx5_fdir_info_get(dev, arg);
1707                 break;
1708         default:
1709                 DRV_LOG(DEBUG, "port %u unknown operation %u",
1710                         dev->data->port_id, filter_op);
1711                 rte_errno = EINVAL;
1712                 return -rte_errno;
1713         }
1714         return 0;
1715 }
1716
1717 /**
1718  * Manage filter operations.
1719  *
1720  * @param dev
1721  *   Pointer to Ethernet device structure.
1722  * @param filter_type
1723  *   Filter type.
1724  * @param filter_op
1725  *   Operation to perform.
1726  * @param arg
1727  *   Pointer to operation-specific structure.
1728  *
1729  * @return
1730  *   0 on success, a negative errno value otherwise and rte_errno is set.
1731  */
1732 int
1733 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
1734                      enum rte_filter_type filter_type,
1735                      enum rte_filter_op filter_op,
1736                      void *arg)
1737 {
1738         switch (filter_type) {
1739         case RTE_ETH_FILTER_GENERIC:
1740                 if (filter_op != RTE_ETH_FILTER_GET) {
1741                         rte_errno = EINVAL;
1742                         return -rte_errno;
1743                 }
1744                 *(const void **)arg = &mlx5_flow_ops;
1745                 return 0;
1746         case RTE_ETH_FILTER_FDIR:
1747                 return mlx5_fdir_ctrl_func(dev, filter_op, arg);
1748         default:
1749                 DRV_LOG(ERR, "port %u filter type (%d) not supported",
1750                         dev->data->port_id, filter_type);
1751                 rte_errno = ENOTSUP;
1752                 return -rte_errno;
1753         }
1754         return 0;
1755 }