e5854c6ebc7edddcb9d8a0f7286e12ec8ad29ddb
[dpdk.git] / drivers / net / mlx4 / mlx4_flow.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright 2017 6WIND S.A.
5  *   Copyright 2017 Mellanox
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of 6WIND S.A. nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 /**
35  * @file
36  * Flow API operations for mlx4 driver.
37  */
38
39 #include <arpa/inet.h>
40 #include <assert.h>
41 #include <errno.h>
42 #include <stddef.h>
43 #include <stdint.h>
44 #include <string.h>
45 #include <sys/queue.h>
46
47 /* Verbs headers do not support -pedantic. */
48 #ifdef PEDANTIC
49 #pragma GCC diagnostic ignored "-Wpedantic"
50 #endif
51 #include <infiniband/verbs.h>
52 #ifdef PEDANTIC
53 #pragma GCC diagnostic error "-Wpedantic"
54 #endif
55
56 #include <rte_errno.h>
57 #include <rte_eth_ctrl.h>
58 #include <rte_ethdev.h>
59 #include <rte_flow.h>
60 #include <rte_flow_driver.h>
61 #include <rte_malloc.h>
62
63 /* PMD headers. */
64 #include "mlx4.h"
65 #include "mlx4_flow.h"
66 #include "mlx4_rxtx.h"
67 #include "mlx4_utils.h"
68
69 /** Static initializer for a list of subsequent item types. */
70 #define NEXT_ITEM(...) \
71         (const enum rte_flow_item_type []){ \
72                 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
73         }
74
75 /** Processor structure associated with a flow item. */
76 struct mlx4_flow_proc_item {
77         /** Bit-masks corresponding to the possibilities for the item. */
78         const void *mask;
79         /**
80          * Default bit-masks to use when item->mask is not provided. When
81          * \default_mask is also NULL, the full supported bit-mask (\mask) is
82          * used instead.
83          */
84         const void *default_mask;
85         /** Bit-masks size in bytes. */
86         const unsigned int mask_sz;
87         /**
88          * Check support for a given item.
89          *
90          * @param item[in]
91          *   Item specification.
92          * @param mask[in]
93          *   Bit-masks covering supported fields to compare with spec,
94          *   last and mask in
95          *   \item.
96          * @param size
97          *   Bit-Mask size in bytes.
98          *
99          * @return
100          *   0 on success, negative value otherwise.
101          */
102         int (*validate)(const struct rte_flow_item *item,
103                         const uint8_t *mask, unsigned int size);
104         /**
105          * Conversion function from rte_flow to NIC specific flow.
106          *
107          * @param item
108          *   rte_flow item to convert.
109          * @param default_mask
110          *   Default bit-masks to use when item->mask is not provided.
111          * @param data
112          *   Internal structure to store the conversion.
113          *
114          * @return
115          *   0 on success, negative value otherwise.
116          */
117         int (*convert)(const struct rte_flow_item *item,
118                        const void *default_mask,
119                        void *data);
120         /** Size in bytes of the destination structure. */
121         const unsigned int dst_sz;
122         /** List of possible subsequent items. */
123         const enum rte_flow_item_type *const next_item;
124 };
125
126 struct rte_flow_drop {
127         struct ibv_qp *qp; /**< Verbs queue pair. */
128         struct ibv_cq *cq; /**< Verbs completion queue. */
129 };
130
131 /**
132  * Convert Ethernet item to Verbs specification.
133  *
134  * @param item[in]
135  *   Item specification.
136  * @param default_mask[in]
137  *   Default bit-masks to use when item->mask is not provided.
138  * @param data[in, out]
139  *   User structure.
140  */
141 static int
142 mlx4_flow_create_eth(const struct rte_flow_item *item,
143                      const void *default_mask,
144                      void *data)
145 {
146         const struct rte_flow_item_eth *spec = item->spec;
147         const struct rte_flow_item_eth *mask = item->mask;
148         struct mlx4_flow *flow = (struct mlx4_flow *)data;
149         struct ibv_flow_spec_eth *eth;
150         const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
151         unsigned int i;
152
153         ++flow->ibv_attr->num_of_specs;
154         flow->ibv_attr->priority = 2;
155         eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
156         *eth = (struct ibv_flow_spec_eth) {
157                 .type = IBV_FLOW_SPEC_ETH,
158                 .size = eth_size,
159         };
160         if (!spec) {
161                 flow->ibv_attr->type = IBV_FLOW_ATTR_ALL_DEFAULT;
162                 return 0;
163         }
164         if (!mask)
165                 mask = default_mask;
166         memcpy(eth->val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
167         memcpy(eth->val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
168         memcpy(eth->mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
169         memcpy(eth->mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
170         /* Remove unwanted bits from values. */
171         for (i = 0; i < ETHER_ADDR_LEN; ++i) {
172                 eth->val.dst_mac[i] &= eth->mask.dst_mac[i];
173                 eth->val.src_mac[i] &= eth->mask.src_mac[i];
174         }
175         return 0;
176 }
177
178 /**
179  * Convert VLAN item to Verbs specification.
180  *
181  * @param item[in]
182  *   Item specification.
183  * @param default_mask[in]
184  *   Default bit-masks to use when item->mask is not provided.
185  * @param data[in, out]
186  *   User structure.
187  */
188 static int
189 mlx4_flow_create_vlan(const struct rte_flow_item *item,
190                       const void *default_mask,
191                       void *data)
192 {
193         const struct rte_flow_item_vlan *spec = item->spec;
194         const struct rte_flow_item_vlan *mask = item->mask;
195         struct mlx4_flow *flow = (struct mlx4_flow *)data;
196         struct ibv_flow_spec_eth *eth;
197         const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
198
199         eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset - eth_size);
200         if (!spec)
201                 return 0;
202         if (!mask)
203                 mask = default_mask;
204         eth->val.vlan_tag = spec->tci;
205         eth->mask.vlan_tag = mask->tci;
206         eth->val.vlan_tag &= eth->mask.vlan_tag;
207         return 0;
208 }
209
210 /**
211  * Convert IPv4 item to Verbs specification.
212  *
213  * @param item[in]
214  *   Item specification.
215  * @param default_mask[in]
216  *   Default bit-masks to use when item->mask is not provided.
217  * @param data[in, out]
218  *   User structure.
219  */
220 static int
221 mlx4_flow_create_ipv4(const struct rte_flow_item *item,
222                       const void *default_mask,
223                       void *data)
224 {
225         const struct rte_flow_item_ipv4 *spec = item->spec;
226         const struct rte_flow_item_ipv4 *mask = item->mask;
227         struct mlx4_flow *flow = (struct mlx4_flow *)data;
228         struct ibv_flow_spec_ipv4 *ipv4;
229         unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4);
230
231         ++flow->ibv_attr->num_of_specs;
232         flow->ibv_attr->priority = 1;
233         ipv4 = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
234         *ipv4 = (struct ibv_flow_spec_ipv4) {
235                 .type = IBV_FLOW_SPEC_IPV4,
236                 .size = ipv4_size,
237         };
238         if (!spec)
239                 return 0;
240         ipv4->val = (struct ibv_flow_ipv4_filter) {
241                 .src_ip = spec->hdr.src_addr,
242                 .dst_ip = spec->hdr.dst_addr,
243         };
244         if (!mask)
245                 mask = default_mask;
246         ipv4->mask = (struct ibv_flow_ipv4_filter) {
247                 .src_ip = mask->hdr.src_addr,
248                 .dst_ip = mask->hdr.dst_addr,
249         };
250         /* Remove unwanted bits from values. */
251         ipv4->val.src_ip &= ipv4->mask.src_ip;
252         ipv4->val.dst_ip &= ipv4->mask.dst_ip;
253         return 0;
254 }
255
256 /**
257  * Convert UDP item to Verbs specification.
258  *
259  * @param item[in]
260  *   Item specification.
261  * @param default_mask[in]
262  *   Default bit-masks to use when item->mask is not provided.
263  * @param data[in, out]
264  *   User structure.
265  */
266 static int
267 mlx4_flow_create_udp(const struct rte_flow_item *item,
268                      const void *default_mask,
269                      void *data)
270 {
271         const struct rte_flow_item_udp *spec = item->spec;
272         const struct rte_flow_item_udp *mask = item->mask;
273         struct mlx4_flow *flow = (struct mlx4_flow *)data;
274         struct ibv_flow_spec_tcp_udp *udp;
275         unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
276
277         ++flow->ibv_attr->num_of_specs;
278         flow->ibv_attr->priority = 0;
279         udp = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
280         *udp = (struct ibv_flow_spec_tcp_udp) {
281                 .type = IBV_FLOW_SPEC_UDP,
282                 .size = udp_size,
283         };
284         if (!spec)
285                 return 0;
286         udp->val.dst_port = spec->hdr.dst_port;
287         udp->val.src_port = spec->hdr.src_port;
288         if (!mask)
289                 mask = default_mask;
290         udp->mask.dst_port = mask->hdr.dst_port;
291         udp->mask.src_port = mask->hdr.src_port;
292         /* Remove unwanted bits from values. */
293         udp->val.src_port &= udp->mask.src_port;
294         udp->val.dst_port &= udp->mask.dst_port;
295         return 0;
296 }
297
298 /**
299  * Convert TCP item to Verbs specification.
300  *
301  * @param item[in]
302  *   Item specification.
303  * @param default_mask[in]
304  *   Default bit-masks to use when item->mask is not provided.
305  * @param data[in, out]
306  *   User structure.
307  */
308 static int
309 mlx4_flow_create_tcp(const struct rte_flow_item *item,
310                      const void *default_mask,
311                      void *data)
312 {
313         const struct rte_flow_item_tcp *spec = item->spec;
314         const struct rte_flow_item_tcp *mask = item->mask;
315         struct mlx4_flow *flow = (struct mlx4_flow *)data;
316         struct ibv_flow_spec_tcp_udp *tcp;
317         unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
318
319         ++flow->ibv_attr->num_of_specs;
320         flow->ibv_attr->priority = 0;
321         tcp = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
322         *tcp = (struct ibv_flow_spec_tcp_udp) {
323                 .type = IBV_FLOW_SPEC_TCP,
324                 .size = tcp_size,
325         };
326         if (!spec)
327                 return 0;
328         tcp->val.dst_port = spec->hdr.dst_port;
329         tcp->val.src_port = spec->hdr.src_port;
330         if (!mask)
331                 mask = default_mask;
332         tcp->mask.dst_port = mask->hdr.dst_port;
333         tcp->mask.src_port = mask->hdr.src_port;
334         /* Remove unwanted bits from values. */
335         tcp->val.src_port &= tcp->mask.src_port;
336         tcp->val.dst_port &= tcp->mask.dst_port;
337         return 0;
338 }
339
340 /**
341  * Check support for a given item.
342  *
343  * @param item[in]
344  *   Item specification.
345  * @param mask[in]
346  *   Bit-masks covering supported fields to compare with spec, last and mask in
347  *   \item.
348  * @param size
349  *   Bit-Mask size in bytes.
350  *
351  * @return
352  *   0 on success, negative value otherwise.
353  */
354 static int
355 mlx4_flow_item_validate(const struct rte_flow_item *item,
356                         const uint8_t *mask, unsigned int size)
357 {
358         int ret = 0;
359
360         if (!item->spec && (item->mask || item->last))
361                 return -1;
362         if (item->spec && !item->mask) {
363                 unsigned int i;
364                 const uint8_t *spec = item->spec;
365
366                 for (i = 0; i < size; ++i)
367                         if ((spec[i] | mask[i]) != mask[i])
368                                 return -1;
369         }
370         if (item->last && !item->mask) {
371                 unsigned int i;
372                 const uint8_t *spec = item->last;
373
374                 for (i = 0; i < size; ++i)
375                         if ((spec[i] | mask[i]) != mask[i])
376                                 return -1;
377         }
378         if (item->spec && item->last) {
379                 uint8_t spec[size];
380                 uint8_t last[size];
381                 const uint8_t *apply = mask;
382                 unsigned int i;
383
384                 if (item->mask)
385                         apply = item->mask;
386                 for (i = 0; i < size; ++i) {
387                         spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
388                         last[i] = ((const uint8_t *)item->last)[i] & apply[i];
389                 }
390                 ret = memcmp(spec, last, size);
391         }
392         return ret;
393 }
394
395 static int
396 mlx4_flow_validate_eth(const struct rte_flow_item *item,
397                        const uint8_t *mask, unsigned int size)
398 {
399         if (item->mask) {
400                 const struct rte_flow_item_eth *mask = item->mask;
401
402                 if (mask->dst.addr_bytes[0] != 0xff ||
403                                 mask->dst.addr_bytes[1] != 0xff ||
404                                 mask->dst.addr_bytes[2] != 0xff ||
405                                 mask->dst.addr_bytes[3] != 0xff ||
406                                 mask->dst.addr_bytes[4] != 0xff ||
407                                 mask->dst.addr_bytes[5] != 0xff)
408                         return -1;
409         }
410         return mlx4_flow_item_validate(item, mask, size);
411 }
412
413 static int
414 mlx4_flow_validate_vlan(const struct rte_flow_item *item,
415                         const uint8_t *mask, unsigned int size)
416 {
417         if (item->mask) {
418                 const struct rte_flow_item_vlan *mask = item->mask;
419
420                 if (mask->tci != 0 &&
421                     ntohs(mask->tci) != 0x0fff)
422                         return -1;
423         }
424         return mlx4_flow_item_validate(item, mask, size);
425 }
426
427 static int
428 mlx4_flow_validate_ipv4(const struct rte_flow_item *item,
429                         const uint8_t *mask, unsigned int size)
430 {
431         if (item->mask) {
432                 const struct rte_flow_item_ipv4 *mask = item->mask;
433
434                 if (mask->hdr.src_addr != 0 &&
435                     mask->hdr.src_addr != 0xffffffff)
436                         return -1;
437                 if (mask->hdr.dst_addr != 0 &&
438                     mask->hdr.dst_addr != 0xffffffff)
439                         return -1;
440         }
441         return mlx4_flow_item_validate(item, mask, size);
442 }
443
444 static int
445 mlx4_flow_validate_udp(const struct rte_flow_item *item,
446                        const uint8_t *mask, unsigned int size)
447 {
448         if (item->mask) {
449                 const struct rte_flow_item_udp *mask = item->mask;
450
451                 if (mask->hdr.src_port != 0 &&
452                     mask->hdr.src_port != 0xffff)
453                         return -1;
454                 if (mask->hdr.dst_port != 0 &&
455                     mask->hdr.dst_port != 0xffff)
456                         return -1;
457         }
458         return mlx4_flow_item_validate(item, mask, size);
459 }
460
461 static int
462 mlx4_flow_validate_tcp(const struct rte_flow_item *item,
463                        const uint8_t *mask, unsigned int size)
464 {
465         if (item->mask) {
466                 const struct rte_flow_item_tcp *mask = item->mask;
467
468                 if (mask->hdr.src_port != 0 &&
469                     mask->hdr.src_port != 0xffff)
470                         return -1;
471                 if (mask->hdr.dst_port != 0 &&
472                     mask->hdr.dst_port != 0xffff)
473                         return -1;
474         }
475         return mlx4_flow_item_validate(item, mask, size);
476 }
477
478 /** Graph of supported items and associated actions. */
479 static const struct mlx4_flow_proc_item mlx4_flow_proc_item_list[] = {
480         [RTE_FLOW_ITEM_TYPE_END] = {
481                 .next_item = NEXT_ITEM(RTE_FLOW_ITEM_TYPE_ETH),
482         },
483         [RTE_FLOW_ITEM_TYPE_ETH] = {
484                 .next_item = NEXT_ITEM(RTE_FLOW_ITEM_TYPE_VLAN,
485                                        RTE_FLOW_ITEM_TYPE_IPV4),
486                 .mask = &(const struct rte_flow_item_eth){
487                         .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
488                         .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
489                 },
490                 .default_mask = &rte_flow_item_eth_mask,
491                 .mask_sz = sizeof(struct rte_flow_item_eth),
492                 .validate = mlx4_flow_validate_eth,
493                 .convert = mlx4_flow_create_eth,
494                 .dst_sz = sizeof(struct ibv_flow_spec_eth),
495         },
496         [RTE_FLOW_ITEM_TYPE_VLAN] = {
497                 .next_item = NEXT_ITEM(RTE_FLOW_ITEM_TYPE_IPV4),
498                 .mask = &(const struct rte_flow_item_vlan){
499                 /* rte_flow_item_vlan_mask is invalid for mlx4. */
500 #if RTE_BYTE_ORDER == RTE_BIG_ENDIAN
501                         .tci = 0x0fff,
502 #else
503                         .tci = 0xff0f,
504 #endif
505                 },
506                 .mask_sz = sizeof(struct rte_flow_item_vlan),
507                 .validate = mlx4_flow_validate_vlan,
508                 .convert = mlx4_flow_create_vlan,
509                 .dst_sz = 0,
510         },
511         [RTE_FLOW_ITEM_TYPE_IPV4] = {
512                 .next_item = NEXT_ITEM(RTE_FLOW_ITEM_TYPE_UDP,
513                                        RTE_FLOW_ITEM_TYPE_TCP),
514                 .mask = &(const struct rte_flow_item_ipv4){
515                         .hdr = {
516                                 .src_addr = -1,
517                                 .dst_addr = -1,
518                         },
519                 },
520                 .default_mask = &rte_flow_item_ipv4_mask,
521                 .mask_sz = sizeof(struct rte_flow_item_ipv4),
522                 .validate = mlx4_flow_validate_ipv4,
523                 .convert = mlx4_flow_create_ipv4,
524                 .dst_sz = sizeof(struct ibv_flow_spec_ipv4),
525         },
526         [RTE_FLOW_ITEM_TYPE_UDP] = {
527                 .mask = &(const struct rte_flow_item_udp){
528                         .hdr = {
529                                 .src_port = -1,
530                                 .dst_port = -1,
531                         },
532                 },
533                 .default_mask = &rte_flow_item_udp_mask,
534                 .mask_sz = sizeof(struct rte_flow_item_udp),
535                 .validate = mlx4_flow_validate_udp,
536                 .convert = mlx4_flow_create_udp,
537                 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
538         },
539         [RTE_FLOW_ITEM_TYPE_TCP] = {
540                 .mask = &(const struct rte_flow_item_tcp){
541                         .hdr = {
542                                 .src_port = -1,
543                                 .dst_port = -1,
544                         },
545                 },
546                 .default_mask = &rte_flow_item_tcp_mask,
547                 .mask_sz = sizeof(struct rte_flow_item_tcp),
548                 .validate = mlx4_flow_validate_tcp,
549                 .convert = mlx4_flow_create_tcp,
550                 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
551         },
552 };
553
554 /**
555  * Make sure a flow rule is supported and initialize associated structure.
556  *
557  * @param priv
558  *   Pointer to private structure.
559  * @param[in] attr
560  *   Flow rule attributes.
561  * @param[in] pattern
562  *   Pattern specification (list terminated by the END pattern item).
563  * @param[in] actions
564  *   Associated actions (list terminated by the END action).
565  * @param[out] error
566  *   Perform verbose error reporting if not NULL.
567  * @param[in, out] flow
568  *   Flow structure to update.
569  *
570  * @return
571  *   0 on success, a negative errno value otherwise and rte_errno is set.
572  */
573 static int
574 mlx4_flow_prepare(struct priv *priv,
575                   const struct rte_flow_attr *attr,
576                   const struct rte_flow_item pattern[],
577                   const struct rte_flow_action actions[],
578                   struct rte_flow_error *error,
579                   struct mlx4_flow *flow)
580 {
581         const struct rte_flow_item *item;
582         const struct rte_flow_action *action;
583         const struct mlx4_flow_proc_item *proc = mlx4_flow_proc_item_list;
584         struct mlx4_flow_target target = {
585                 .queue = 0,
586                 .drop = 0,
587         };
588         uint32_t priority_override = 0;
589
590         if (attr->group) {
591                 rte_flow_error_set(error, ENOTSUP,
592                                    RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
593                                    NULL,
594                                    "groups are not supported");
595                 return -rte_errno;
596         }
597         if (priv->isolated) {
598                 priority_override = attr->priority;
599         } else if (attr->priority) {
600                 rte_flow_error_set(error, ENOTSUP,
601                                    RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
602                                    NULL,
603                                    "priorities are not supported outside"
604                                    " isolated mode");
605                 return -rte_errno;
606         }
607         if (attr->priority > MLX4_FLOW_PRIORITY_LAST) {
608                 rte_flow_error_set(error, ENOTSUP,
609                                    RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
610                                    NULL,
611                                    "maximum priority level is "
612                                    MLX4_STR_EXPAND(MLX4_FLOW_PRIORITY_LAST));
613                 return -rte_errno;
614         }
615         if (attr->egress) {
616                 rte_flow_error_set(error, ENOTSUP,
617                                    RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
618                                    NULL,
619                                    "egress is not supported");
620                 return -rte_errno;
621         }
622         if (!attr->ingress) {
623                 rte_flow_error_set(error, ENOTSUP,
624                                    RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
625                                    NULL,
626                                    "only ingress is supported");
627                 return -rte_errno;
628         }
629         /* Go over pattern. */
630         for (item = pattern; item->type != RTE_FLOW_ITEM_TYPE_END; ++item) {
631                 const struct mlx4_flow_proc_item *next = NULL;
632                 unsigned int i;
633                 int err;
634
635                 if (item->type == RTE_FLOW_ITEM_TYPE_VOID)
636                         continue;
637                 /*
638                  * The nic can support patterns with NULL eth spec only
639                  * if eth is a single item in a rule.
640                  */
641                 if (!item->spec && item->type == RTE_FLOW_ITEM_TYPE_ETH) {
642                         const struct rte_flow_item *next = item + 1;
643
644                         if (next->type != RTE_FLOW_ITEM_TYPE_END) {
645                                 rte_flow_error_set(error, ENOTSUP,
646                                                    RTE_FLOW_ERROR_TYPE_ITEM,
647                                                    item,
648                                                    "the rule requires"
649                                                    " an Ethernet spec");
650                                 return -rte_errno;
651                         }
652                 }
653                 for (i = 0;
654                      proc->next_item &&
655                      proc->next_item[i] != RTE_FLOW_ITEM_TYPE_END;
656                      ++i) {
657                         if (proc->next_item[i] == item->type) {
658                                 next = &mlx4_flow_proc_item_list[item->type];
659                                 break;
660                         }
661                 }
662                 if (!next)
663                         goto exit_item_not_supported;
664                 proc = next;
665                 err = proc->validate(item, proc->mask, proc->mask_sz);
666                 if (err)
667                         goto exit_item_not_supported;
668                 if (flow->ibv_attr && proc->convert) {
669                         err = proc->convert(item,
670                                             (proc->default_mask ?
671                                              proc->default_mask :
672                                              proc->mask),
673                                             flow);
674                         if (err)
675                                 goto exit_item_not_supported;
676                 }
677                 flow->offset += proc->dst_sz;
678         }
679         /* Use specified priority level when in isolated mode. */
680         if (priv->isolated && flow->ibv_attr)
681                 flow->ibv_attr->priority = priority_override;
682         /* Go over actions list. */
683         for (action = actions;
684              action->type != RTE_FLOW_ACTION_TYPE_END;
685              ++action) {
686                 if (action->type == RTE_FLOW_ACTION_TYPE_VOID) {
687                         continue;
688                 } else if (action->type == RTE_FLOW_ACTION_TYPE_DROP) {
689                         target.drop = 1;
690                 } else if (action->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
691                         const struct rte_flow_action_queue *queue =
692                                 action->conf;
693
694                         if (!queue || (queue->index >
695                                        (priv->dev->data->nb_rx_queues - 1)))
696                                 goto exit_action_not_supported;
697                         target.queue = 1;
698                 } else {
699                         goto exit_action_not_supported;
700                 }
701         }
702         if (!target.queue && !target.drop) {
703                 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
704                                    NULL, "no valid action");
705                 return -rte_errno;
706         }
707         return 0;
708 exit_item_not_supported:
709         rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
710                            item, "item not supported");
711         return -rte_errno;
712 exit_action_not_supported:
713         rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
714                            action, "action not supported");
715         return -rte_errno;
716 }
717
718 /**
719  * Validate a flow supported by the NIC.
720  *
721  * @see rte_flow_validate()
722  * @see rte_flow_ops
723  */
724 static int
725 mlx4_flow_validate(struct rte_eth_dev *dev,
726                    const struct rte_flow_attr *attr,
727                    const struct rte_flow_item pattern[],
728                    const struct rte_flow_action actions[],
729                    struct rte_flow_error *error)
730 {
731         struct priv *priv = dev->data->dev_private;
732         struct mlx4_flow flow = { .offset = sizeof(struct ibv_flow_attr) };
733
734         return mlx4_flow_prepare(priv, attr, pattern, actions, error, &flow);
735 }
736
737 /**
738  * Destroy a drop queue.
739  *
740  * @param priv
741  *   Pointer to private structure.
742  */
743 static void
744 mlx4_flow_destroy_drop_queue(struct priv *priv)
745 {
746         if (priv->flow_drop_queue) {
747                 struct rte_flow_drop *fdq = priv->flow_drop_queue;
748
749                 priv->flow_drop_queue = NULL;
750                 claim_zero(ibv_destroy_qp(fdq->qp));
751                 claim_zero(ibv_destroy_cq(fdq->cq));
752                 rte_free(fdq);
753         }
754 }
755
756 /**
757  * Create a single drop queue for all drop flows.
758  *
759  * @param priv
760  *   Pointer to private structure.
761  *
762  * @return
763  *   0 on success, negative value otherwise.
764  */
765 static int
766 mlx4_flow_create_drop_queue(struct priv *priv)
767 {
768         struct ibv_qp *qp;
769         struct ibv_cq *cq;
770         struct rte_flow_drop *fdq;
771
772         fdq = rte_calloc(__func__, 1, sizeof(*fdq), 0);
773         if (!fdq) {
774                 ERROR("Cannot allocate memory for drop struct");
775                 goto err;
776         }
777         cq = ibv_create_cq(priv->ctx, 1, NULL, NULL, 0);
778         if (!cq) {
779                 ERROR("Cannot create drop CQ");
780                 goto err_create_cq;
781         }
782         qp = ibv_create_qp(priv->pd,
783                            &(struct ibv_qp_init_attr){
784                                 .send_cq = cq,
785                                 .recv_cq = cq,
786                                 .cap = {
787                                         .max_recv_wr = 1,
788                                         .max_recv_sge = 1,
789                                 },
790                                 .qp_type = IBV_QPT_RAW_PACKET,
791                            });
792         if (!qp) {
793                 ERROR("Cannot create drop QP");
794                 goto err_create_qp;
795         }
796         *fdq = (struct rte_flow_drop){
797                 .qp = qp,
798                 .cq = cq,
799         };
800         priv->flow_drop_queue = fdq;
801         return 0;
802 err_create_qp:
803         claim_zero(ibv_destroy_cq(cq));
804 err_create_cq:
805         rte_free(fdq);
806 err:
807         return -1;
808 }
809
810 /**
811  * Complete flow rule creation.
812  *
813  * @param priv
814  *   Pointer to private structure.
815  * @param ibv_attr
816  *   Verbs flow attributes.
817  * @param target
818  *   Rule target descriptor.
819  * @param[out] error
820  *   Perform verbose error reporting if not NULL.
821  *
822  * @return
823  *   A flow if the rule could be created.
824  */
825 static struct rte_flow *
826 mlx4_flow_create_target_queue(struct priv *priv,
827                               struct ibv_flow_attr *ibv_attr,
828                               struct mlx4_flow_target *target,
829                               struct rte_flow_error *error)
830 {
831         struct ibv_qp *qp;
832         struct rte_flow *rte_flow;
833
834         assert(priv->pd);
835         assert(priv->ctx);
836         rte_flow = rte_calloc(__func__, 1, sizeof(*rte_flow), 0);
837         if (!rte_flow) {
838                 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
839                                    NULL, "cannot allocate flow memory");
840                 return NULL;
841         }
842         if (target->drop) {
843                 qp = priv->flow_drop_queue ? priv->flow_drop_queue->qp : NULL;
844         } else {
845                 struct rxq *rxq = priv->dev->data->rx_queues[target->queue_id];
846
847                 qp = rxq->qp;
848                 rte_flow->qp = qp;
849         }
850         rte_flow->ibv_attr = ibv_attr;
851         if (!priv->started)
852                 return rte_flow;
853         rte_flow->ibv_flow = ibv_create_flow(qp, rte_flow->ibv_attr);
854         if (!rte_flow->ibv_flow) {
855                 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
856                                    NULL, "flow rule creation failure");
857                 goto error;
858         }
859         return rte_flow;
860 error:
861         rte_free(rte_flow);
862         return NULL;
863 }
864
865 /**
866  * Create a flow.
867  *
868  * @see rte_flow_create()
869  * @see rte_flow_ops
870  */
871 static struct rte_flow *
872 mlx4_flow_create(struct rte_eth_dev *dev,
873                  const struct rte_flow_attr *attr,
874                  const struct rte_flow_item pattern[],
875                  const struct rte_flow_action actions[],
876                  struct rte_flow_error *error)
877 {
878         const struct rte_flow_action *action;
879         struct priv *priv = dev->data->dev_private;
880         struct rte_flow *rte_flow;
881         struct mlx4_flow_target target;
882         struct mlx4_flow flow = { .offset = sizeof(struct ibv_flow_attr), };
883         int err;
884
885         err = mlx4_flow_prepare(priv, attr, pattern, actions, error, &flow);
886         if (err)
887                 return NULL;
888         flow.ibv_attr = rte_malloc(__func__, flow.offset, 0);
889         if (!flow.ibv_attr) {
890                 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
891                                    NULL, "cannot allocate ibv_attr memory");
892                 return NULL;
893         }
894         flow.offset = sizeof(struct ibv_flow_attr);
895         *flow.ibv_attr = (struct ibv_flow_attr){
896                 .comp_mask = 0,
897                 .type = IBV_FLOW_ATTR_NORMAL,
898                 .size = sizeof(struct ibv_flow_attr),
899                 .priority = attr->priority,
900                 .num_of_specs = 0,
901                 .port = priv->port,
902                 .flags = 0,
903         };
904         claim_zero(mlx4_flow_prepare(priv, attr, pattern, actions,
905                                      error, &flow));
906         target = (struct mlx4_flow_target){
907                 .queue = 0,
908                 .drop = 0,
909         };
910         for (action = actions;
911              action->type != RTE_FLOW_ACTION_TYPE_END;
912              ++action) {
913                 if (action->type == RTE_FLOW_ACTION_TYPE_VOID) {
914                         continue;
915                 } else if (action->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
916                         target.queue = 1;
917                         target.queue_id =
918                                 ((const struct rte_flow_action_queue *)
919                                  action->conf)->index;
920                 } else if (action->type == RTE_FLOW_ACTION_TYPE_DROP) {
921                         target.drop = 1;
922                 } else {
923                         rte_flow_error_set(error, ENOTSUP,
924                                            RTE_FLOW_ERROR_TYPE_ACTION,
925                                            action, "unsupported action");
926                         goto exit;
927                 }
928         }
929         rte_flow = mlx4_flow_create_target_queue(priv, flow.ibv_attr,
930                                                  &target, error);
931         if (rte_flow) {
932                 LIST_INSERT_HEAD(&priv->flows, rte_flow, next);
933                 DEBUG("Flow created %p", (void *)rte_flow);
934                 return rte_flow;
935         }
936 exit:
937         rte_free(flow.ibv_attr);
938         return NULL;
939 }
940
941 /**
942  * Configure isolated mode.
943  *
944  * @see rte_flow_isolate()
945  * @see rte_flow_ops
946  */
947 static int
948 mlx4_flow_isolate(struct rte_eth_dev *dev,
949                   int enable,
950                   struct rte_flow_error *error)
951 {
952         struct priv *priv = dev->data->dev_private;
953
954         if (!!enable == !!priv->isolated)
955                 return 0;
956         priv->isolated = !!enable;
957         if (enable) {
958                 mlx4_mac_addr_del(priv);
959         } else if (mlx4_mac_addr_add(priv) < 0) {
960                 priv->isolated = 1;
961                 return rte_flow_error_set(error, rte_errno,
962                                           RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
963                                           NULL, "cannot leave isolated mode");
964         }
965         return 0;
966 }
967
968 /**
969  * Destroy a flow.
970  *
971  * @see rte_flow_destroy()
972  * @see rte_flow_ops
973  */
974 static int
975 mlx4_flow_destroy(struct rte_eth_dev *dev,
976                   struct rte_flow *flow,
977                   struct rte_flow_error *error)
978 {
979         (void)dev;
980         (void)error;
981         LIST_REMOVE(flow, next);
982         if (flow->ibv_flow)
983                 claim_zero(ibv_destroy_flow(flow->ibv_flow));
984         rte_free(flow->ibv_attr);
985         DEBUG("Flow destroyed %p", (void *)flow);
986         rte_free(flow);
987         return 0;
988 }
989
990 /**
991  * Destroy all flows.
992  *
993  * @see rte_flow_flush()
994  * @see rte_flow_ops
995  */
996 static int
997 mlx4_flow_flush(struct rte_eth_dev *dev,
998                 struct rte_flow_error *error)
999 {
1000         struct priv *priv = dev->data->dev_private;
1001
1002         while (!LIST_EMPTY(&priv->flows)) {
1003                 struct rte_flow *flow;
1004
1005                 flow = LIST_FIRST(&priv->flows);
1006                 mlx4_flow_destroy(dev, flow, error);
1007         }
1008         return 0;
1009 }
1010
1011 /**
1012  * Remove all flows.
1013  *
1014  * Called by dev_stop() to remove all flows.
1015  *
1016  * @param priv
1017  *   Pointer to private structure.
1018  */
1019 void
1020 mlx4_flow_stop(struct priv *priv)
1021 {
1022         struct rte_flow *flow;
1023
1024         for (flow = LIST_FIRST(&priv->flows);
1025              flow;
1026              flow = LIST_NEXT(flow, next)) {
1027                 claim_zero(ibv_destroy_flow(flow->ibv_flow));
1028                 flow->ibv_flow = NULL;
1029                 DEBUG("Flow %p removed", (void *)flow);
1030         }
1031         mlx4_flow_destroy_drop_queue(priv);
1032 }
1033
1034 /**
1035  * Add all flows.
1036  *
1037  * @param priv
1038  *   Pointer to private structure.
1039  *
1040  * @return
1041  *   0 on success, a errno value otherwise and rte_errno is set.
1042  */
1043 int
1044 mlx4_flow_start(struct priv *priv)
1045 {
1046         int ret;
1047         struct ibv_qp *qp;
1048         struct rte_flow *flow;
1049
1050         ret = mlx4_flow_create_drop_queue(priv);
1051         if (ret)
1052                 return -1;
1053         for (flow = LIST_FIRST(&priv->flows);
1054              flow;
1055              flow = LIST_NEXT(flow, next)) {
1056                 qp = flow->qp ? flow->qp : priv->flow_drop_queue->qp;
1057                 flow->ibv_flow = ibv_create_flow(qp, flow->ibv_attr);
1058                 if (!flow->ibv_flow) {
1059                         DEBUG("Flow %p cannot be applied", (void *)flow);
1060                         rte_errno = EINVAL;
1061                         return rte_errno;
1062                 }
1063                 DEBUG("Flow %p applied", (void *)flow);
1064         }
1065         return 0;
1066 }
1067
1068 static const struct rte_flow_ops mlx4_flow_ops = {
1069         .validate = mlx4_flow_validate,
1070         .create = mlx4_flow_create,
1071         .destroy = mlx4_flow_destroy,
1072         .flush = mlx4_flow_flush,
1073         .isolate = mlx4_flow_isolate,
1074 };
1075
1076 /**
1077  * Manage filter operations.
1078  *
1079  * @param dev
1080  *   Pointer to Ethernet device structure.
1081  * @param filter_type
1082  *   Filter type.
1083  * @param filter_op
1084  *   Operation to perform.
1085  * @param arg
1086  *   Pointer to operation-specific structure.
1087  *
1088  * @return
1089  *   0 on success, negative errno value otherwise and rte_errno is set.
1090  */
1091 int
1092 mlx4_filter_ctrl(struct rte_eth_dev *dev,
1093                  enum rte_filter_type filter_type,
1094                  enum rte_filter_op filter_op,
1095                  void *arg)
1096 {
1097         switch (filter_type) {
1098         case RTE_ETH_FILTER_GENERIC:
1099                 if (filter_op != RTE_ETH_FILTER_GET)
1100                         break;
1101                 *(const void **)arg = &mlx4_flow_ops;
1102                 return 0;
1103         default:
1104                 ERROR("%p: filter type (%d) not supported",
1105                       (void *)dev, filter_type);
1106                 break;
1107         }
1108         rte_errno = ENOTSUP;
1109         return -rte_errno;
1110 }