net/mlx4: simplify trigger code for flow rules
[dpdk.git] / drivers / net / mlx4 / mlx4_flow.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright 2017 6WIND S.A.
5  *   Copyright 2017 Mellanox
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of 6WIND S.A. nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 /**
35  * @file
36  * Flow API operations for mlx4 driver.
37  */
38
39 #include <arpa/inet.h>
40 #include <assert.h>
41 #include <errno.h>
42 #include <stdalign.h>
43 #include <stddef.h>
44 #include <stdint.h>
45 #include <string.h>
46 #include <sys/queue.h>
47
48 /* Verbs headers do not support -pedantic. */
49 #ifdef PEDANTIC
50 #pragma GCC diagnostic ignored "-Wpedantic"
51 #endif
52 #include <infiniband/verbs.h>
53 #ifdef PEDANTIC
54 #pragma GCC diagnostic error "-Wpedantic"
55 #endif
56
57 #include <rte_byteorder.h>
58 #include <rte_errno.h>
59 #include <rte_eth_ctrl.h>
60 #include <rte_ethdev.h>
61 #include <rte_flow.h>
62 #include <rte_flow_driver.h>
63 #include <rte_malloc.h>
64
65 /* PMD headers. */
66 #include "mlx4.h"
67 #include "mlx4_flow.h"
68 #include "mlx4_rxtx.h"
69 #include "mlx4_utils.h"
70
71 /** Static initializer for a list of subsequent item types. */
72 #define NEXT_ITEM(...) \
73         (const enum rte_flow_item_type []){ \
74                 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
75         }
76
77 /** Processor structure associated with a flow item. */
78 struct mlx4_flow_proc_item {
79         /** Bit-masks corresponding to the possibilities for the item. */
80         const void *mask;
81         /**
82          * Default bit-masks to use when item->mask is not provided. When
83          * \default_mask is also NULL, the full supported bit-mask (\mask) is
84          * used instead.
85          */
86         const void *default_mask;
87         /** Bit-masks size in bytes. */
88         const unsigned int mask_sz;
89         /**
90          * Check support for a given item.
91          *
92          * @param item[in]
93          *   Item specification.
94          * @param mask[in]
95          *   Bit-masks covering supported fields to compare with spec,
96          *   last and mask in
97          *   \item.
98          * @param size
99          *   Bit-Mask size in bytes.
100          *
101          * @return
102          *   0 on success, negative value otherwise.
103          */
104         int (*validate)(const struct rte_flow_item *item,
105                         const uint8_t *mask, unsigned int size);
106         /**
107          * Conversion function from rte_flow to NIC specific flow.
108          *
109          * @param item
110          *   rte_flow item to convert.
111          * @param default_mask
112          *   Default bit-masks to use when item->mask is not provided.
113          * @param flow
114          *   Flow rule handle to update.
115          *
116          * @return
117          *   0 on success, negative value otherwise.
118          */
119         int (*convert)(const struct rte_flow_item *item,
120                        const void *default_mask,
121                        struct rte_flow *flow);
122         /** Size in bytes of the destination structure. */
123         const unsigned int dst_sz;
124         /** List of possible subsequent items. */
125         const enum rte_flow_item_type *const next_item;
126 };
127
128 /** Shared resources for drop flow rules. */
129 struct mlx4_drop {
130         struct ibv_qp *qp; /**< QP target. */
131         struct ibv_cq *cq; /**< CQ associated with above QP. */
132         struct priv *priv; /**< Back pointer to private data. */
133         uint32_t refcnt; /**< Reference count. */
134 };
135
136 /**
137  * Convert Ethernet item to Verbs specification.
138  *
139  * @param item[in]
140  *   Item specification.
141  * @param default_mask[in]
142  *   Default bit-masks to use when item->mask is not provided.
143  * @param flow[in, out]
144  *   Flow rule handle to update.
145  */
146 static int
147 mlx4_flow_create_eth(const struct rte_flow_item *item,
148                      const void *default_mask,
149                      struct rte_flow *flow)
150 {
151         const struct rte_flow_item_eth *spec = item->spec;
152         const struct rte_flow_item_eth *mask = item->mask;
153         struct ibv_flow_spec_eth *eth;
154         const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
155         unsigned int i;
156
157         ++flow->ibv_attr->num_of_specs;
158         eth = (void *)((uintptr_t)flow->ibv_attr + flow->ibv_attr_size);
159         *eth = (struct ibv_flow_spec_eth) {
160                 .type = IBV_FLOW_SPEC_ETH,
161                 .size = eth_size,
162         };
163         if (!spec) {
164                 flow->ibv_attr->type = IBV_FLOW_ATTR_ALL_DEFAULT;
165                 return 0;
166         }
167         if (!mask)
168                 mask = default_mask;
169         memcpy(eth->val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
170         memcpy(eth->val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
171         memcpy(eth->mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
172         memcpy(eth->mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
173         /* Remove unwanted bits from values. */
174         for (i = 0; i < ETHER_ADDR_LEN; ++i) {
175                 eth->val.dst_mac[i] &= eth->mask.dst_mac[i];
176                 eth->val.src_mac[i] &= eth->mask.src_mac[i];
177         }
178         return 0;
179 }
180
181 /**
182  * Convert VLAN item to Verbs specification.
183  *
184  * @param item[in]
185  *   Item specification.
186  * @param default_mask[in]
187  *   Default bit-masks to use when item->mask is not provided.
188  * @param flow[in, out]
189  *   Flow rule handle to update.
190  */
191 static int
192 mlx4_flow_create_vlan(const struct rte_flow_item *item,
193                       const void *default_mask,
194                       struct rte_flow *flow)
195 {
196         const struct rte_flow_item_vlan *spec = item->spec;
197         const struct rte_flow_item_vlan *mask = item->mask;
198         struct ibv_flow_spec_eth *eth;
199         const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
200
201         eth = (void *)((uintptr_t)flow->ibv_attr + flow->ibv_attr_size -
202                        eth_size);
203         if (!spec)
204                 return 0;
205         if (!mask)
206                 mask = default_mask;
207         eth->val.vlan_tag = spec->tci;
208         eth->mask.vlan_tag = mask->tci;
209         eth->val.vlan_tag &= eth->mask.vlan_tag;
210         return 0;
211 }
212
213 /**
214  * Convert IPv4 item to Verbs specification.
215  *
216  * @param item[in]
217  *   Item specification.
218  * @param default_mask[in]
219  *   Default bit-masks to use when item->mask is not provided.
220  * @param flow[in, out]
221  *   Flow rule handle to update.
222  */
223 static int
224 mlx4_flow_create_ipv4(const struct rte_flow_item *item,
225                       const void *default_mask,
226                       struct rte_flow *flow)
227 {
228         const struct rte_flow_item_ipv4 *spec = item->spec;
229         const struct rte_flow_item_ipv4 *mask = item->mask;
230         struct ibv_flow_spec_ipv4 *ipv4;
231         unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4);
232
233         ++flow->ibv_attr->num_of_specs;
234         ipv4 = (void *)((uintptr_t)flow->ibv_attr + flow->ibv_attr_size);
235         *ipv4 = (struct ibv_flow_spec_ipv4) {
236                 .type = IBV_FLOW_SPEC_IPV4,
237                 .size = ipv4_size,
238         };
239         if (!spec)
240                 return 0;
241         ipv4->val = (struct ibv_flow_ipv4_filter) {
242                 .src_ip = spec->hdr.src_addr,
243                 .dst_ip = spec->hdr.dst_addr,
244         };
245         if (!mask)
246                 mask = default_mask;
247         ipv4->mask = (struct ibv_flow_ipv4_filter) {
248                 .src_ip = mask->hdr.src_addr,
249                 .dst_ip = mask->hdr.dst_addr,
250         };
251         /* Remove unwanted bits from values. */
252         ipv4->val.src_ip &= ipv4->mask.src_ip;
253         ipv4->val.dst_ip &= ipv4->mask.dst_ip;
254         return 0;
255 }
256
257 /**
258  * Convert UDP item to Verbs specification.
259  *
260  * @param item[in]
261  *   Item specification.
262  * @param default_mask[in]
263  *   Default bit-masks to use when item->mask is not provided.
264  * @param flow[in, out]
265  *   Flow rule handle to update.
266  */
267 static int
268 mlx4_flow_create_udp(const struct rte_flow_item *item,
269                      const void *default_mask,
270                      struct rte_flow *flow)
271 {
272         const struct rte_flow_item_udp *spec = item->spec;
273         const struct rte_flow_item_udp *mask = item->mask;
274         struct ibv_flow_spec_tcp_udp *udp;
275         unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
276
277         ++flow->ibv_attr->num_of_specs;
278         udp = (void *)((uintptr_t)flow->ibv_attr + flow->ibv_attr_size);
279         *udp = (struct ibv_flow_spec_tcp_udp) {
280                 .type = IBV_FLOW_SPEC_UDP,
281                 .size = udp_size,
282         };
283         if (!spec)
284                 return 0;
285         udp->val.dst_port = spec->hdr.dst_port;
286         udp->val.src_port = spec->hdr.src_port;
287         if (!mask)
288                 mask = default_mask;
289         udp->mask.dst_port = mask->hdr.dst_port;
290         udp->mask.src_port = mask->hdr.src_port;
291         /* Remove unwanted bits from values. */
292         udp->val.src_port &= udp->mask.src_port;
293         udp->val.dst_port &= udp->mask.dst_port;
294         return 0;
295 }
296
297 /**
298  * Convert TCP item to Verbs specification.
299  *
300  * @param item[in]
301  *   Item specification.
302  * @param default_mask[in]
303  *   Default bit-masks to use when item->mask is not provided.
304  * @param flow[in, out]
305  *   Flow rule handle to update.
306  */
307 static int
308 mlx4_flow_create_tcp(const struct rte_flow_item *item,
309                      const void *default_mask,
310                      struct rte_flow *flow)
311 {
312         const struct rte_flow_item_tcp *spec = item->spec;
313         const struct rte_flow_item_tcp *mask = item->mask;
314         struct ibv_flow_spec_tcp_udp *tcp;
315         unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
316
317         ++flow->ibv_attr->num_of_specs;
318         tcp = (void *)((uintptr_t)flow->ibv_attr + flow->ibv_attr_size);
319         *tcp = (struct ibv_flow_spec_tcp_udp) {
320                 .type = IBV_FLOW_SPEC_TCP,
321                 .size = tcp_size,
322         };
323         if (!spec)
324                 return 0;
325         tcp->val.dst_port = spec->hdr.dst_port;
326         tcp->val.src_port = spec->hdr.src_port;
327         if (!mask)
328                 mask = default_mask;
329         tcp->mask.dst_port = mask->hdr.dst_port;
330         tcp->mask.src_port = mask->hdr.src_port;
331         /* Remove unwanted bits from values. */
332         tcp->val.src_port &= tcp->mask.src_port;
333         tcp->val.dst_port &= tcp->mask.dst_port;
334         return 0;
335 }
336
337 /**
338  * Check support for a given item.
339  *
340  * @param item[in]
341  *   Item specification.
342  * @param mask[in]
343  *   Bit-masks covering supported fields to compare with spec, last and mask in
344  *   \item.
345  * @param size
346  *   Bit-Mask size in bytes.
347  *
348  * @return
349  *   0 on success, negative value otherwise.
350  */
351 static int
352 mlx4_flow_item_validate(const struct rte_flow_item *item,
353                         const uint8_t *mask, unsigned int size)
354 {
355         int ret = 0;
356
357         if (!item->spec && (item->mask || item->last))
358                 return -1;
359         if (item->spec && !item->mask) {
360                 unsigned int i;
361                 const uint8_t *spec = item->spec;
362
363                 for (i = 0; i < size; ++i)
364                         if ((spec[i] | mask[i]) != mask[i])
365                                 return -1;
366         }
367         if (item->last && !item->mask) {
368                 unsigned int i;
369                 const uint8_t *spec = item->last;
370
371                 for (i = 0; i < size; ++i)
372                         if ((spec[i] | mask[i]) != mask[i])
373                                 return -1;
374         }
375         if (item->spec && item->last) {
376                 uint8_t spec[size];
377                 uint8_t last[size];
378                 const uint8_t *apply = mask;
379                 unsigned int i;
380
381                 if (item->mask)
382                         apply = item->mask;
383                 for (i = 0; i < size; ++i) {
384                         spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
385                         last[i] = ((const uint8_t *)item->last)[i] & apply[i];
386                 }
387                 ret = memcmp(spec, last, size);
388         }
389         return ret;
390 }
391
392 static int
393 mlx4_flow_validate_eth(const struct rte_flow_item *item,
394                        const uint8_t *mask, unsigned int size)
395 {
396         if (item->mask) {
397                 const struct rte_flow_item_eth *mask = item->mask;
398
399                 if (mask->dst.addr_bytes[0] != 0xff ||
400                                 mask->dst.addr_bytes[1] != 0xff ||
401                                 mask->dst.addr_bytes[2] != 0xff ||
402                                 mask->dst.addr_bytes[3] != 0xff ||
403                                 mask->dst.addr_bytes[4] != 0xff ||
404                                 mask->dst.addr_bytes[5] != 0xff)
405                         return -1;
406         }
407         return mlx4_flow_item_validate(item, mask, size);
408 }
409
410 static int
411 mlx4_flow_validate_vlan(const struct rte_flow_item *item,
412                         const uint8_t *mask, unsigned int size)
413 {
414         if (item->mask) {
415                 const struct rte_flow_item_vlan *mask = item->mask;
416
417                 if (mask->tci != 0 &&
418                     ntohs(mask->tci) != 0x0fff)
419                         return -1;
420         }
421         return mlx4_flow_item_validate(item, mask, size);
422 }
423
424 static int
425 mlx4_flow_validate_ipv4(const struct rte_flow_item *item,
426                         const uint8_t *mask, unsigned int size)
427 {
428         if (item->mask) {
429                 const struct rte_flow_item_ipv4 *mask = item->mask;
430
431                 if (mask->hdr.src_addr != 0 &&
432                     mask->hdr.src_addr != 0xffffffff)
433                         return -1;
434                 if (mask->hdr.dst_addr != 0 &&
435                     mask->hdr.dst_addr != 0xffffffff)
436                         return -1;
437         }
438         return mlx4_flow_item_validate(item, mask, size);
439 }
440
441 static int
442 mlx4_flow_validate_udp(const struct rte_flow_item *item,
443                        const uint8_t *mask, unsigned int size)
444 {
445         if (item->mask) {
446                 const struct rte_flow_item_udp *mask = item->mask;
447
448                 if (mask->hdr.src_port != 0 &&
449                     mask->hdr.src_port != 0xffff)
450                         return -1;
451                 if (mask->hdr.dst_port != 0 &&
452                     mask->hdr.dst_port != 0xffff)
453                         return -1;
454         }
455         return mlx4_flow_item_validate(item, mask, size);
456 }
457
458 static int
459 mlx4_flow_validate_tcp(const struct rte_flow_item *item,
460                        const uint8_t *mask, unsigned int size)
461 {
462         if (item->mask) {
463                 const struct rte_flow_item_tcp *mask = item->mask;
464
465                 if (mask->hdr.src_port != 0 &&
466                     mask->hdr.src_port != 0xffff)
467                         return -1;
468                 if (mask->hdr.dst_port != 0 &&
469                     mask->hdr.dst_port != 0xffff)
470                         return -1;
471         }
472         return mlx4_flow_item_validate(item, mask, size);
473 }
474
475 /** Graph of supported items and associated actions. */
476 static const struct mlx4_flow_proc_item mlx4_flow_proc_item_list[] = {
477         [RTE_FLOW_ITEM_TYPE_END] = {
478                 .next_item = NEXT_ITEM(RTE_FLOW_ITEM_TYPE_ETH),
479         },
480         [RTE_FLOW_ITEM_TYPE_ETH] = {
481                 .next_item = NEXT_ITEM(RTE_FLOW_ITEM_TYPE_VLAN,
482                                        RTE_FLOW_ITEM_TYPE_IPV4),
483                 .mask = &(const struct rte_flow_item_eth){
484                         .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
485                         .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
486                 },
487                 .default_mask = &rte_flow_item_eth_mask,
488                 .mask_sz = sizeof(struct rte_flow_item_eth),
489                 .validate = mlx4_flow_validate_eth,
490                 .convert = mlx4_flow_create_eth,
491                 .dst_sz = sizeof(struct ibv_flow_spec_eth),
492         },
493         [RTE_FLOW_ITEM_TYPE_VLAN] = {
494                 .next_item = NEXT_ITEM(RTE_FLOW_ITEM_TYPE_IPV4),
495                 .mask = &(const struct rte_flow_item_vlan){
496                         /* Only TCI VID matching is supported. */
497                         .tci = RTE_BE16(0x0fff),
498                 },
499                 .mask_sz = sizeof(struct rte_flow_item_vlan),
500                 .validate = mlx4_flow_validate_vlan,
501                 .convert = mlx4_flow_create_vlan,
502                 .dst_sz = 0,
503         },
504         [RTE_FLOW_ITEM_TYPE_IPV4] = {
505                 .next_item = NEXT_ITEM(RTE_FLOW_ITEM_TYPE_UDP,
506                                        RTE_FLOW_ITEM_TYPE_TCP),
507                 .mask = &(const struct rte_flow_item_ipv4){
508                         .hdr = {
509                                 .src_addr = RTE_BE32(0xffffffff),
510                                 .dst_addr = RTE_BE32(0xffffffff),
511                         },
512                 },
513                 .default_mask = &rte_flow_item_ipv4_mask,
514                 .mask_sz = sizeof(struct rte_flow_item_ipv4),
515                 .validate = mlx4_flow_validate_ipv4,
516                 .convert = mlx4_flow_create_ipv4,
517                 .dst_sz = sizeof(struct ibv_flow_spec_ipv4),
518         },
519         [RTE_FLOW_ITEM_TYPE_UDP] = {
520                 .mask = &(const struct rte_flow_item_udp){
521                         .hdr = {
522                                 .src_port = RTE_BE16(0xffff),
523                                 .dst_port = RTE_BE16(0xffff),
524                         },
525                 },
526                 .default_mask = &rte_flow_item_udp_mask,
527                 .mask_sz = sizeof(struct rte_flow_item_udp),
528                 .validate = mlx4_flow_validate_udp,
529                 .convert = mlx4_flow_create_udp,
530                 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
531         },
532         [RTE_FLOW_ITEM_TYPE_TCP] = {
533                 .mask = &(const struct rte_flow_item_tcp){
534                         .hdr = {
535                                 .src_port = RTE_BE16(0xffff),
536                                 .dst_port = RTE_BE16(0xffff),
537                         },
538                 },
539                 .default_mask = &rte_flow_item_tcp_mask,
540                 .mask_sz = sizeof(struct rte_flow_item_tcp),
541                 .validate = mlx4_flow_validate_tcp,
542                 .convert = mlx4_flow_create_tcp,
543                 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
544         },
545 };
546
547 /**
548  * Make sure a flow rule is supported and initialize associated structure.
549  *
550  * @param priv
551  *   Pointer to private structure.
552  * @param[in] attr
553  *   Flow rule attributes.
554  * @param[in] pattern
555  *   Pattern specification (list terminated by the END pattern item).
556  * @param[in] actions
557  *   Associated actions (list terminated by the END action).
558  * @param[out] error
559  *   Perform verbose error reporting if not NULL.
560  * @param[in, out] addr
561  *   Buffer where the resulting flow rule handle pointer must be stored.
562  *   If NULL, stop processing after validation stage.
563  *
564  * @return
565  *   0 on success, a negative errno value otherwise and rte_errno is set.
566  */
567 static int
568 mlx4_flow_prepare(struct priv *priv,
569                   const struct rte_flow_attr *attr,
570                   const struct rte_flow_item pattern[],
571                   const struct rte_flow_action actions[],
572                   struct rte_flow_error *error,
573                   struct rte_flow **addr)
574 {
575         const struct rte_flow_item *item;
576         const struct rte_flow_action *action;
577         const struct mlx4_flow_proc_item *proc;
578         struct rte_flow temp = { .ibv_attr_size = sizeof(*temp.ibv_attr) };
579         struct rte_flow *flow = &temp;
580
581         if (attr->group)
582                 return rte_flow_error_set
583                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
584                          NULL, "groups are not supported");
585         if (attr->priority > MLX4_FLOW_PRIORITY_LAST)
586                 return rte_flow_error_set
587                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
588                          NULL, "maximum priority level is "
589                          MLX4_STR_EXPAND(MLX4_FLOW_PRIORITY_LAST));
590         if (attr->egress)
591                 return rte_flow_error_set
592                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
593                          NULL, "egress is not supported");
594         if (!attr->ingress)
595                 return rte_flow_error_set
596                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
597                          NULL, "only ingress is supported");
598 fill:
599         proc = mlx4_flow_proc_item_list;
600         /* Go over pattern. */
601         for (item = pattern; item->type; ++item) {
602                 const struct mlx4_flow_proc_item *next = NULL;
603                 unsigned int i;
604                 int err;
605
606                 if (item->type == RTE_FLOW_ITEM_TYPE_VOID)
607                         continue;
608                 if (item->type == MLX4_FLOW_ITEM_TYPE_INTERNAL) {
609                         flow->internal = 1;
610                         continue;
611                 }
612                 /*
613                  * The nic can support patterns with NULL eth spec only
614                  * if eth is a single item in a rule.
615                  */
616                 if (!item->spec && item->type == RTE_FLOW_ITEM_TYPE_ETH) {
617                         const struct rte_flow_item *next = item + 1;
618
619                         if (next->type)
620                                 return rte_flow_error_set
621                                         (error, ENOTSUP,
622                                          RTE_FLOW_ERROR_TYPE_ITEM, item,
623                                          "the rule requires an Ethernet spec");
624                 }
625                 for (i = 0; proc->next_item && proc->next_item[i]; ++i) {
626                         if (proc->next_item[i] == item->type) {
627                                 next = &mlx4_flow_proc_item_list[item->type];
628                                 break;
629                         }
630                 }
631                 if (!next)
632                         goto exit_item_not_supported;
633                 proc = next;
634                 /* Perform validation once, while handle is not allocated. */
635                 if (flow == &temp) {
636                         err = proc->validate(item, proc->mask, proc->mask_sz);
637                         if (err)
638                                 goto exit_item_not_supported;
639                 } else if (proc->convert) {
640                         err = proc->convert(item,
641                                             (proc->default_mask ?
642                                              proc->default_mask :
643                                              proc->mask),
644                                             flow);
645                         if (err)
646                                 goto exit_item_not_supported;
647                 }
648                 flow->ibv_attr_size += proc->dst_sz;
649         }
650         /* Go over actions list. */
651         for (action = actions; action->type; ++action) {
652                 switch (action->type) {
653                         const struct rte_flow_action_queue *queue;
654
655                 case RTE_FLOW_ACTION_TYPE_VOID:
656                         continue;
657                 case RTE_FLOW_ACTION_TYPE_DROP:
658                         flow->drop = 1;
659                         break;
660                 case RTE_FLOW_ACTION_TYPE_QUEUE:
661                         queue = action->conf;
662                         if (queue->index >= priv->dev->data->nb_rx_queues)
663                                 goto exit_action_not_supported;
664                         flow->queue = 1;
665                         flow->queue_id = queue->index;
666                         break;
667                 default:
668                         goto exit_action_not_supported;
669                 }
670         }
671         if (!flow->queue && !flow->drop)
672                 return rte_flow_error_set
673                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
674                          NULL, "no valid action");
675         /* Validation ends here. */
676         if (!addr)
677                 return 0;
678         if (flow == &temp) {
679                 /* Allocate proper handle based on collected data. */
680                 const struct mlx4_malloc_vec vec[] = {
681                         {
682                                 .align = alignof(struct rte_flow),
683                                 .size = sizeof(*flow),
684                                 .addr = (void **)&flow,
685                         },
686                         {
687                                 .align = alignof(struct ibv_flow_attr),
688                                 .size = temp.ibv_attr_size,
689                                 .addr = (void **)&temp.ibv_attr,
690                         },
691                 };
692
693                 if (!mlx4_zmallocv(__func__, vec, RTE_DIM(vec)))
694                         return rte_flow_error_set
695                                 (error, -rte_errno,
696                                  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
697                                  "flow rule handle allocation failure");
698                 /* Most fields will be updated by second pass. */
699                 *flow = (struct rte_flow){
700                         .ibv_attr = temp.ibv_attr,
701                         .ibv_attr_size = sizeof(*flow->ibv_attr),
702                 };
703                 *flow->ibv_attr = (struct ibv_flow_attr){
704                         .type = IBV_FLOW_ATTR_NORMAL,
705                         .size = sizeof(*flow->ibv_attr),
706                         .priority = attr->priority,
707                         .port = priv->port,
708                 };
709                 goto fill;
710         }
711         *addr = flow;
712         return 0;
713 exit_item_not_supported:
714         return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
715                                   item, "item not supported");
716 exit_action_not_supported:
717         return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
718                                   action, "action not supported");
719 }
720
721 /**
722  * Validate a flow supported by the NIC.
723  *
724  * @see rte_flow_validate()
725  * @see rte_flow_ops
726  */
727 static int
728 mlx4_flow_validate(struct rte_eth_dev *dev,
729                    const struct rte_flow_attr *attr,
730                    const struct rte_flow_item pattern[],
731                    const struct rte_flow_action actions[],
732                    struct rte_flow_error *error)
733 {
734         struct priv *priv = dev->data->dev_private;
735
736         return mlx4_flow_prepare(priv, attr, pattern, actions, error, NULL);
737 }
738
739 /**
740  * Get a drop flow rule resources instance.
741  *
742  * @param priv
743  *   Pointer to private structure.
744  *
745  * @return
746  *   Pointer to drop flow resources on success, NULL otherwise and rte_errno
747  *   is set.
748  */
749 static struct mlx4_drop *
750 mlx4_drop_get(struct priv *priv)
751 {
752         struct mlx4_drop *drop = priv->drop;
753
754         if (drop) {
755                 assert(drop->refcnt);
756                 assert(drop->priv == priv);
757                 ++drop->refcnt;
758                 return drop;
759         }
760         drop = rte_malloc(__func__, sizeof(*drop), 0);
761         if (!drop)
762                 goto error;
763         *drop = (struct mlx4_drop){
764                 .priv = priv,
765                 .refcnt = 1,
766         };
767         drop->cq = ibv_create_cq(priv->ctx, 1, NULL, NULL, 0);
768         if (!drop->cq)
769                 goto error;
770         drop->qp = ibv_create_qp(priv->pd,
771                                  &(struct ibv_qp_init_attr){
772                                         .send_cq = drop->cq,
773                                         .recv_cq = drop->cq,
774                                         .qp_type = IBV_QPT_RAW_PACKET,
775                                  });
776         if (!drop->qp)
777                 goto error;
778         priv->drop = drop;
779         return drop;
780 error:
781         if (drop->qp)
782                 claim_zero(ibv_destroy_qp(drop->qp));
783         if (drop->cq)
784                 claim_zero(ibv_destroy_cq(drop->cq));
785         if (drop)
786                 rte_free(drop);
787         rte_errno = ENOMEM;
788         return NULL;
789 }
790
791 /**
792  * Give back a drop flow rule resources instance.
793  *
794  * @param drop
795  *   Pointer to drop flow rule resources.
796  */
797 static void
798 mlx4_drop_put(struct mlx4_drop *drop)
799 {
800         assert(drop->refcnt);
801         if (--drop->refcnt)
802                 return;
803         drop->priv->drop = NULL;
804         claim_zero(ibv_destroy_qp(drop->qp));
805         claim_zero(ibv_destroy_cq(drop->cq));
806         rte_free(drop);
807 }
808
809 /**
810  * Toggle a configured flow rule.
811  *
812  * @param priv
813  *   Pointer to private structure.
814  * @param flow
815  *   Flow rule handle to toggle.
816  * @param enable
817  *   Whether associated Verbs flow must be created or removed.
818  * @param[out] error
819  *   Perform verbose error reporting if not NULL.
820  *
821  * @return
822  *   0 on success, a negative errno value otherwise and rte_errno is set.
823  */
824 static int
825 mlx4_flow_toggle(struct priv *priv,
826                  struct rte_flow *flow,
827                  int enable,
828                  struct rte_flow_error *error)
829 {
830         struct ibv_qp *qp = NULL;
831         const char *msg;
832         int err;
833
834         if (!enable) {
835                 if (!flow->ibv_flow)
836                         return 0;
837                 claim_zero(ibv_destroy_flow(flow->ibv_flow));
838                 flow->ibv_flow = NULL;
839                 if (flow->drop)
840                         mlx4_drop_put(priv->drop);
841                 return 0;
842         }
843         assert(flow->ibv_attr);
844         if (!flow->internal &&
845             !priv->isolated &&
846             flow->ibv_attr->priority == MLX4_FLOW_PRIORITY_LAST) {
847                 if (flow->ibv_flow) {
848                         claim_zero(ibv_destroy_flow(flow->ibv_flow));
849                         flow->ibv_flow = NULL;
850                         if (flow->drop)
851                                 mlx4_drop_put(priv->drop);
852                 }
853                 err = EACCES;
854                 msg = ("priority level "
855                        MLX4_STR_EXPAND(MLX4_FLOW_PRIORITY_LAST)
856                        " is reserved when not in isolated mode");
857                 goto error;
858         }
859         if (flow->queue) {
860                 struct rxq *rxq = NULL;
861
862                 if (flow->queue_id < priv->dev->data->nb_rx_queues)
863                         rxq = priv->dev->data->rx_queues[flow->queue_id];
864                 if (flow->ibv_flow) {
865                         if (!rxq ^ !flow->drop)
866                                 return 0;
867                         /* Verbs flow needs updating. */
868                         claim_zero(ibv_destroy_flow(flow->ibv_flow));
869                         flow->ibv_flow = NULL;
870                         if (flow->drop)
871                                 mlx4_drop_put(priv->drop);
872                 }
873                 if (rxq)
874                         qp = rxq->qp;
875                 /* A missing target queue drops traffic implicitly. */
876                 flow->drop = !rxq;
877         }
878         if (flow->drop) {
879                 mlx4_drop_get(priv);
880                 if (!priv->drop) {
881                         err = rte_errno;
882                         msg = "resources for drop flow rule cannot be created";
883                         goto error;
884                 }
885                 qp = priv->drop->qp;
886         }
887         assert(qp);
888         if (flow->ibv_flow)
889                 return 0;
890         flow->ibv_flow = ibv_create_flow(qp, flow->ibv_attr);
891         if (flow->ibv_flow)
892                 return 0;
893         if (flow->drop)
894                 mlx4_drop_put(priv->drop);
895         err = errno;
896         msg = "flow rule rejected by device";
897 error:
898         return rte_flow_error_set
899                 (error, err, RTE_FLOW_ERROR_TYPE_HANDLE, flow, msg);
900 }
901
902 /**
903  * Create a flow.
904  *
905  * @see rte_flow_create()
906  * @see rte_flow_ops
907  */
908 static struct rte_flow *
909 mlx4_flow_create(struct rte_eth_dev *dev,
910                  const struct rte_flow_attr *attr,
911                  const struct rte_flow_item pattern[],
912                  const struct rte_flow_action actions[],
913                  struct rte_flow_error *error)
914 {
915         struct priv *priv = dev->data->dev_private;
916         struct rte_flow *flow;
917         int err;
918
919         err = mlx4_flow_prepare(priv, attr, pattern, actions, error, &flow);
920         if (err)
921                 return NULL;
922         err = mlx4_flow_toggle(priv, flow, priv->started, error);
923         if (!err) {
924                 struct rte_flow *curr = LIST_FIRST(&priv->flows);
925
926                 /* New rules are inserted after internal ones. */
927                 if (!curr || !curr->internal) {
928                         LIST_INSERT_HEAD(&priv->flows, flow, next);
929                 } else {
930                         while (LIST_NEXT(curr, next) &&
931                                LIST_NEXT(curr, next)->internal)
932                                 curr = LIST_NEXT(curr, next);
933                         LIST_INSERT_AFTER(curr, flow, next);
934                 }
935                 return flow;
936         }
937         rte_flow_error_set(error, -err, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
938                            error->message);
939         rte_free(flow);
940         return NULL;
941 }
942
943 /**
944  * Configure isolated mode.
945  *
946  * @see rte_flow_isolate()
947  * @see rte_flow_ops
948  */
949 static int
950 mlx4_flow_isolate(struct rte_eth_dev *dev,
951                   int enable,
952                   struct rte_flow_error *error)
953 {
954         struct priv *priv = dev->data->dev_private;
955
956         if (!!enable == !!priv->isolated)
957                 return 0;
958         priv->isolated = !!enable;
959         if (mlx4_flow_sync(priv, error)) {
960                 priv->isolated = !enable;
961                 return -rte_errno;
962         }
963         return 0;
964 }
965
966 /**
967  * Destroy a flow rule.
968  *
969  * @see rte_flow_destroy()
970  * @see rte_flow_ops
971  */
972 static int
973 mlx4_flow_destroy(struct rte_eth_dev *dev,
974                   struct rte_flow *flow,
975                   struct rte_flow_error *error)
976 {
977         struct priv *priv = dev->data->dev_private;
978         int err = mlx4_flow_toggle(priv, flow, 0, error);
979
980         if (err)
981                 return err;
982         LIST_REMOVE(flow, next);
983         rte_free(flow);
984         return 0;
985 }
986
987 /**
988  * Destroy user-configured flow rules.
989  *
990  * This function skips internal flows rules.
991  *
992  * @see rte_flow_flush()
993  * @see rte_flow_ops
994  */
995 static int
996 mlx4_flow_flush(struct rte_eth_dev *dev,
997                 struct rte_flow_error *error)
998 {
999         struct priv *priv = dev->data->dev_private;
1000         struct rte_flow *flow = LIST_FIRST(&priv->flows);
1001
1002         while (flow) {
1003                 struct rte_flow *next = LIST_NEXT(flow, next);
1004
1005                 if (!flow->internal)
1006                         mlx4_flow_destroy(dev, flow, error);
1007                 flow = next;
1008         }
1009         return 0;
1010 }
1011
1012 /**
1013  * Generate internal flow rules.
1014  *
1015  * @param priv
1016  *   Pointer to private structure.
1017  * @param[out] error
1018  *   Perform verbose error reporting if not NULL.
1019  *
1020  * @return
1021  *   0 on success, a negative errno value otherwise and rte_errno is set.
1022  */
1023 static int
1024 mlx4_flow_internal(struct priv *priv, struct rte_flow_error *error)
1025 {
1026         struct rte_flow_attr attr = {
1027                 .priority = MLX4_FLOW_PRIORITY_LAST,
1028                 .ingress = 1,
1029         };
1030         struct rte_flow_item pattern[] = {
1031                 {
1032                         .type = MLX4_FLOW_ITEM_TYPE_INTERNAL,
1033                 },
1034                 {
1035                         .type = RTE_FLOW_ITEM_TYPE_ETH,
1036                         .spec = &(struct rte_flow_item_eth){
1037                                 .dst = priv->mac,
1038                         },
1039                         .mask = &(struct rte_flow_item_eth){
1040                                 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1041                         },
1042                 },
1043                 {
1044                         .type = RTE_FLOW_ITEM_TYPE_END,
1045                 },
1046         };
1047         struct rte_flow_action actions[] = {
1048                 {
1049                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
1050                         .conf = &(struct rte_flow_action_queue){
1051                                 .index = 0,
1052                         },
1053                 },
1054                 {
1055                         .type = RTE_FLOW_ACTION_TYPE_END,
1056                 },
1057         };
1058
1059         if (!mlx4_flow_create(priv->dev, &attr, pattern, actions, error))
1060                 return -rte_errno;
1061         return 0;
1062 }
1063
1064 /**
1065  * Synchronize flow rules.
1066  *
1067  * This function synchronizes flow rules with the state of the device by
1068  * taking into account isolated mode and whether target queues are
1069  * configured.
1070  *
1071  * @param priv
1072  *   Pointer to private structure.
1073  * @param[out] error
1074  *   Perform verbose error reporting if not NULL.
1075  *
1076  * @return
1077  *   0 on success, a negative errno value otherwise and rte_errno is set.
1078  */
1079 int
1080 mlx4_flow_sync(struct priv *priv, struct rte_flow_error *error)
1081 {
1082         struct rte_flow *flow;
1083         int ret;
1084
1085         /* Internal flow rules are guaranteed to come first in the list. */
1086         if (priv->isolated) {
1087                 /*
1088                  * Get rid of them in isolated mode, stop at the first
1089                  * non-internal rule found.
1090                  */
1091                 for (flow = LIST_FIRST(&priv->flows);
1092                      flow && flow->internal;
1093                      flow = LIST_FIRST(&priv->flows))
1094                         claim_zero(mlx4_flow_destroy(priv->dev, flow, error));
1095         } else if (!LIST_FIRST(&priv->flows) ||
1096                    !LIST_FIRST(&priv->flows)->internal) {
1097                 /*
1098                  * If the first rule is not internal outside isolated mode,
1099                  * they must be added back.
1100                  */
1101                 ret = mlx4_flow_internal(priv, error);
1102                 if (ret)
1103                         return ret;
1104         }
1105         /* Toggle the remaining flow rules . */
1106         for (flow = LIST_FIRST(&priv->flows);
1107              flow;
1108              flow = LIST_NEXT(flow, next)) {
1109                 ret = mlx4_flow_toggle(priv, flow, priv->started, error);
1110                 if (ret)
1111                         return ret;
1112         }
1113         if (!priv->started)
1114                 assert(!priv->drop);
1115         return 0;
1116 }
1117
1118 /**
1119  * Clean up all flow rules.
1120  *
1121  * Unlike mlx4_flow_flush(), this function takes care of all remaining flow
1122  * rules regardless of whether they are internal or user-configured.
1123  *
1124  * @param priv
1125  *   Pointer to private structure.
1126  */
1127 void
1128 mlx4_flow_clean(struct priv *priv)
1129 {
1130         struct rte_flow *flow;
1131
1132         while ((flow = LIST_FIRST(&priv->flows)))
1133                 mlx4_flow_destroy(priv->dev, flow, NULL);
1134 }
1135
1136 static const struct rte_flow_ops mlx4_flow_ops = {
1137         .validate = mlx4_flow_validate,
1138         .create = mlx4_flow_create,
1139         .destroy = mlx4_flow_destroy,
1140         .flush = mlx4_flow_flush,
1141         .isolate = mlx4_flow_isolate,
1142 };
1143
1144 /**
1145  * Manage filter operations.
1146  *
1147  * @param dev
1148  *   Pointer to Ethernet device structure.
1149  * @param filter_type
1150  *   Filter type.
1151  * @param filter_op
1152  *   Operation to perform.
1153  * @param arg
1154  *   Pointer to operation-specific structure.
1155  *
1156  * @return
1157  *   0 on success, negative errno value otherwise and rte_errno is set.
1158  */
1159 int
1160 mlx4_filter_ctrl(struct rte_eth_dev *dev,
1161                  enum rte_filter_type filter_type,
1162                  enum rte_filter_op filter_op,
1163                  void *arg)
1164 {
1165         switch (filter_type) {
1166         case RTE_ETH_FILTER_GENERIC:
1167                 if (filter_op != RTE_ETH_FILTER_GET)
1168                         break;
1169                 *(const void **)arg = &mlx4_flow_ops;
1170                 return 0;
1171         default:
1172                 ERROR("%p: filter type (%d) not supported",
1173                       (void *)dev, filter_type);
1174                 break;
1175         }
1176         rte_errno = ENOTSUP;
1177         return -rte_errno;
1178 }