e1290a860900f65c71ec352efdd2745ae91152de
[dpdk.git] / drivers / net / mlx4 / mlx4_flow.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright 2017 6WIND S.A.
5  *   Copyright 2017 Mellanox
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of 6WIND S.A. nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 /**
35  * @file
36  * Flow API operations for mlx4 driver.
37  */
38
39 #include <arpa/inet.h>
40 #include <assert.h>
41 #include <errno.h>
42 #include <stdalign.h>
43 #include <stddef.h>
44 #include <stdint.h>
45 #include <string.h>
46 #include <sys/queue.h>
47
48 /* Verbs headers do not support -pedantic. */
49 #ifdef PEDANTIC
50 #pragma GCC diagnostic ignored "-Wpedantic"
51 #endif
52 #include <infiniband/verbs.h>
53 #ifdef PEDANTIC
54 #pragma GCC diagnostic error "-Wpedantic"
55 #endif
56
57 #include <rte_byteorder.h>
58 #include <rte_errno.h>
59 #include <rte_eth_ctrl.h>
60 #include <rte_ethdev.h>
61 #include <rte_flow.h>
62 #include <rte_flow_driver.h>
63 #include <rte_malloc.h>
64
65 /* PMD headers. */
66 #include "mlx4.h"
67 #include "mlx4_flow.h"
68 #include "mlx4_rxtx.h"
69 #include "mlx4_utils.h"
70
71 /** Static initializer for a list of subsequent item types. */
72 #define NEXT_ITEM(...) \
73         (const enum rte_flow_item_type []){ \
74                 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
75         }
76
77 /** Processor structure associated with a flow item. */
78 struct mlx4_flow_proc_item {
79         /** Bit-masks corresponding to the possibilities for the item. */
80         const void *mask;
81         /**
82          * Default bit-masks to use when item->mask is not provided. When
83          * \default_mask is also NULL, the full supported bit-mask (\mask) is
84          * used instead.
85          */
86         const void *default_mask;
87         /** Bit-masks size in bytes. */
88         const unsigned int mask_sz;
89         /**
90          * Check support for a given item.
91          *
92          * @param item[in]
93          *   Item specification.
94          * @param mask[in]
95          *   Bit-masks covering supported fields to compare with spec,
96          *   last and mask in
97          *   \item.
98          * @param size
99          *   Bit-Mask size in bytes.
100          *
101          * @return
102          *   0 on success, negative value otherwise.
103          */
104         int (*validate)(const struct rte_flow_item *item,
105                         const uint8_t *mask, unsigned int size);
106         /**
107          * Conversion function from rte_flow to NIC specific flow.
108          *
109          * @param item
110          *   rte_flow item to convert.
111          * @param default_mask
112          *   Default bit-masks to use when item->mask is not provided.
113          * @param flow
114          *   Flow rule handle to update.
115          *
116          * @return
117          *   0 on success, negative value otherwise.
118          */
119         int (*convert)(const struct rte_flow_item *item,
120                        const void *default_mask,
121                        struct rte_flow *flow);
122         /** Size in bytes of the destination structure. */
123         const unsigned int dst_sz;
124         /** List of possible subsequent items. */
125         const enum rte_flow_item_type *const next_item;
126 };
127
128 /** Shared resources for drop flow rules. */
129 struct mlx4_drop {
130         struct ibv_qp *qp; /**< QP target. */
131         struct ibv_cq *cq; /**< CQ associated with above QP. */
132         struct priv *priv; /**< Back pointer to private data. */
133         uint32_t refcnt; /**< Reference count. */
134 };
135
136 /**
137  * Convert Ethernet item to Verbs specification.
138  *
139  * @param item[in]
140  *   Item specification.
141  * @param default_mask[in]
142  *   Default bit-masks to use when item->mask is not provided.
143  * @param flow[in, out]
144  *   Flow rule handle to update.
145  */
146 static int
147 mlx4_flow_create_eth(const struct rte_flow_item *item,
148                      const void *default_mask,
149                      struct rte_flow *flow)
150 {
151         const struct rte_flow_item_eth *spec = item->spec;
152         const struct rte_flow_item_eth *mask = item->mask;
153         struct ibv_flow_spec_eth *eth;
154         const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
155         unsigned int i;
156
157         ++flow->ibv_attr->num_of_specs;
158         eth = (void *)((uintptr_t)flow->ibv_attr + flow->ibv_attr_size);
159         *eth = (struct ibv_flow_spec_eth) {
160                 .type = IBV_FLOW_SPEC_ETH,
161                 .size = eth_size,
162         };
163         if (!spec) {
164                 flow->ibv_attr->type = IBV_FLOW_ATTR_ALL_DEFAULT;
165                 return 0;
166         }
167         if (!mask)
168                 mask = default_mask;
169         memcpy(eth->val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
170         memcpy(eth->val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
171         memcpy(eth->mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
172         memcpy(eth->mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
173         /* Remove unwanted bits from values. */
174         for (i = 0; i < ETHER_ADDR_LEN; ++i) {
175                 eth->val.dst_mac[i] &= eth->mask.dst_mac[i];
176                 eth->val.src_mac[i] &= eth->mask.src_mac[i];
177         }
178         return 0;
179 }
180
181 /**
182  * Convert VLAN item to Verbs specification.
183  *
184  * @param item[in]
185  *   Item specification.
186  * @param default_mask[in]
187  *   Default bit-masks to use when item->mask is not provided.
188  * @param flow[in, out]
189  *   Flow rule handle to update.
190  */
191 static int
192 mlx4_flow_create_vlan(const struct rte_flow_item *item,
193                       const void *default_mask,
194                       struct rte_flow *flow)
195 {
196         const struct rte_flow_item_vlan *spec = item->spec;
197         const struct rte_flow_item_vlan *mask = item->mask;
198         struct ibv_flow_spec_eth *eth;
199         const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
200
201         eth = (void *)((uintptr_t)flow->ibv_attr + flow->ibv_attr_size -
202                        eth_size);
203         if (!spec)
204                 return 0;
205         if (!mask)
206                 mask = default_mask;
207         eth->val.vlan_tag = spec->tci;
208         eth->mask.vlan_tag = mask->tci;
209         eth->val.vlan_tag &= eth->mask.vlan_tag;
210         return 0;
211 }
212
213 /**
214  * Convert IPv4 item to Verbs specification.
215  *
216  * @param item[in]
217  *   Item specification.
218  * @param default_mask[in]
219  *   Default bit-masks to use when item->mask is not provided.
220  * @param flow[in, out]
221  *   Flow rule handle to update.
222  */
223 static int
224 mlx4_flow_create_ipv4(const struct rte_flow_item *item,
225                       const void *default_mask,
226                       struct rte_flow *flow)
227 {
228         const struct rte_flow_item_ipv4 *spec = item->spec;
229         const struct rte_flow_item_ipv4 *mask = item->mask;
230         struct ibv_flow_spec_ipv4 *ipv4;
231         unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4);
232
233         ++flow->ibv_attr->num_of_specs;
234         ipv4 = (void *)((uintptr_t)flow->ibv_attr + flow->ibv_attr_size);
235         *ipv4 = (struct ibv_flow_spec_ipv4) {
236                 .type = IBV_FLOW_SPEC_IPV4,
237                 .size = ipv4_size,
238         };
239         if (!spec)
240                 return 0;
241         ipv4->val = (struct ibv_flow_ipv4_filter) {
242                 .src_ip = spec->hdr.src_addr,
243                 .dst_ip = spec->hdr.dst_addr,
244         };
245         if (!mask)
246                 mask = default_mask;
247         ipv4->mask = (struct ibv_flow_ipv4_filter) {
248                 .src_ip = mask->hdr.src_addr,
249                 .dst_ip = mask->hdr.dst_addr,
250         };
251         /* Remove unwanted bits from values. */
252         ipv4->val.src_ip &= ipv4->mask.src_ip;
253         ipv4->val.dst_ip &= ipv4->mask.dst_ip;
254         return 0;
255 }
256
257 /**
258  * Convert UDP item to Verbs specification.
259  *
260  * @param item[in]
261  *   Item specification.
262  * @param default_mask[in]
263  *   Default bit-masks to use when item->mask is not provided.
264  * @param flow[in, out]
265  *   Flow rule handle to update.
266  */
267 static int
268 mlx4_flow_create_udp(const struct rte_flow_item *item,
269                      const void *default_mask,
270                      struct rte_flow *flow)
271 {
272         const struct rte_flow_item_udp *spec = item->spec;
273         const struct rte_flow_item_udp *mask = item->mask;
274         struct ibv_flow_spec_tcp_udp *udp;
275         unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
276
277         ++flow->ibv_attr->num_of_specs;
278         udp = (void *)((uintptr_t)flow->ibv_attr + flow->ibv_attr_size);
279         *udp = (struct ibv_flow_spec_tcp_udp) {
280                 .type = IBV_FLOW_SPEC_UDP,
281                 .size = udp_size,
282         };
283         if (!spec)
284                 return 0;
285         udp->val.dst_port = spec->hdr.dst_port;
286         udp->val.src_port = spec->hdr.src_port;
287         if (!mask)
288                 mask = default_mask;
289         udp->mask.dst_port = mask->hdr.dst_port;
290         udp->mask.src_port = mask->hdr.src_port;
291         /* Remove unwanted bits from values. */
292         udp->val.src_port &= udp->mask.src_port;
293         udp->val.dst_port &= udp->mask.dst_port;
294         return 0;
295 }
296
297 /**
298  * Convert TCP item to Verbs specification.
299  *
300  * @param item[in]
301  *   Item specification.
302  * @param default_mask[in]
303  *   Default bit-masks to use when item->mask is not provided.
304  * @param flow[in, out]
305  *   Flow rule handle to update.
306  */
307 static int
308 mlx4_flow_create_tcp(const struct rte_flow_item *item,
309                      const void *default_mask,
310                      struct rte_flow *flow)
311 {
312         const struct rte_flow_item_tcp *spec = item->spec;
313         const struct rte_flow_item_tcp *mask = item->mask;
314         struct ibv_flow_spec_tcp_udp *tcp;
315         unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
316
317         ++flow->ibv_attr->num_of_specs;
318         tcp = (void *)((uintptr_t)flow->ibv_attr + flow->ibv_attr_size);
319         *tcp = (struct ibv_flow_spec_tcp_udp) {
320                 .type = IBV_FLOW_SPEC_TCP,
321                 .size = tcp_size,
322         };
323         if (!spec)
324                 return 0;
325         tcp->val.dst_port = spec->hdr.dst_port;
326         tcp->val.src_port = spec->hdr.src_port;
327         if (!mask)
328                 mask = default_mask;
329         tcp->mask.dst_port = mask->hdr.dst_port;
330         tcp->mask.src_port = mask->hdr.src_port;
331         /* Remove unwanted bits from values. */
332         tcp->val.src_port &= tcp->mask.src_port;
333         tcp->val.dst_port &= tcp->mask.dst_port;
334         return 0;
335 }
336
337 /**
338  * Check support for a given item.
339  *
340  * @param item[in]
341  *   Item specification.
342  * @param mask[in]
343  *   Bit-masks covering supported fields to compare with spec, last and mask in
344  *   \item.
345  * @param size
346  *   Bit-Mask size in bytes.
347  *
348  * @return
349  *   0 on success, negative value otherwise.
350  */
351 static int
352 mlx4_flow_item_validate(const struct rte_flow_item *item,
353                         const uint8_t *mask, unsigned int size)
354 {
355         int ret = 0;
356
357         if (!item->spec && (item->mask || item->last))
358                 return -1;
359         if (item->spec && !item->mask) {
360                 unsigned int i;
361                 const uint8_t *spec = item->spec;
362
363                 for (i = 0; i < size; ++i)
364                         if ((spec[i] | mask[i]) != mask[i])
365                                 return -1;
366         }
367         if (item->last && !item->mask) {
368                 unsigned int i;
369                 const uint8_t *spec = item->last;
370
371                 for (i = 0; i < size; ++i)
372                         if ((spec[i] | mask[i]) != mask[i])
373                                 return -1;
374         }
375         if (item->spec && item->last) {
376                 uint8_t spec[size];
377                 uint8_t last[size];
378                 const uint8_t *apply = mask;
379                 unsigned int i;
380
381                 if (item->mask)
382                         apply = item->mask;
383                 for (i = 0; i < size; ++i) {
384                         spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
385                         last[i] = ((const uint8_t *)item->last)[i] & apply[i];
386                 }
387                 ret = memcmp(spec, last, size);
388         }
389         return ret;
390 }
391
392 static int
393 mlx4_flow_validate_eth(const struct rte_flow_item *item,
394                        const uint8_t *mask, unsigned int size)
395 {
396         if (item->mask) {
397                 const struct rte_flow_item_eth *mask = item->mask;
398
399                 if (mask->dst.addr_bytes[0] != 0xff ||
400                                 mask->dst.addr_bytes[1] != 0xff ||
401                                 mask->dst.addr_bytes[2] != 0xff ||
402                                 mask->dst.addr_bytes[3] != 0xff ||
403                                 mask->dst.addr_bytes[4] != 0xff ||
404                                 mask->dst.addr_bytes[5] != 0xff)
405                         return -1;
406         }
407         return mlx4_flow_item_validate(item, mask, size);
408 }
409
410 static int
411 mlx4_flow_validate_vlan(const struct rte_flow_item *item,
412                         const uint8_t *mask, unsigned int size)
413 {
414         if (item->mask) {
415                 const struct rte_flow_item_vlan *mask = item->mask;
416
417                 if (mask->tci != 0 &&
418                     ntohs(mask->tci) != 0x0fff)
419                         return -1;
420         }
421         return mlx4_flow_item_validate(item, mask, size);
422 }
423
424 static int
425 mlx4_flow_validate_ipv4(const struct rte_flow_item *item,
426                         const uint8_t *mask, unsigned int size)
427 {
428         if (item->mask) {
429                 const struct rte_flow_item_ipv4 *mask = item->mask;
430
431                 if (mask->hdr.src_addr != 0 &&
432                     mask->hdr.src_addr != 0xffffffff)
433                         return -1;
434                 if (mask->hdr.dst_addr != 0 &&
435                     mask->hdr.dst_addr != 0xffffffff)
436                         return -1;
437         }
438         return mlx4_flow_item_validate(item, mask, size);
439 }
440
441 static int
442 mlx4_flow_validate_udp(const struct rte_flow_item *item,
443                        const uint8_t *mask, unsigned int size)
444 {
445         if (item->mask) {
446                 const struct rte_flow_item_udp *mask = item->mask;
447
448                 if (mask->hdr.src_port != 0 &&
449                     mask->hdr.src_port != 0xffff)
450                         return -1;
451                 if (mask->hdr.dst_port != 0 &&
452                     mask->hdr.dst_port != 0xffff)
453                         return -1;
454         }
455         return mlx4_flow_item_validate(item, mask, size);
456 }
457
458 static int
459 mlx4_flow_validate_tcp(const struct rte_flow_item *item,
460                        const uint8_t *mask, unsigned int size)
461 {
462         if (item->mask) {
463                 const struct rte_flow_item_tcp *mask = item->mask;
464
465                 if (mask->hdr.src_port != 0 &&
466                     mask->hdr.src_port != 0xffff)
467                         return -1;
468                 if (mask->hdr.dst_port != 0 &&
469                     mask->hdr.dst_port != 0xffff)
470                         return -1;
471         }
472         return mlx4_flow_item_validate(item, mask, size);
473 }
474
475 /** Graph of supported items and associated actions. */
476 static const struct mlx4_flow_proc_item mlx4_flow_proc_item_list[] = {
477         [RTE_FLOW_ITEM_TYPE_END] = {
478                 .next_item = NEXT_ITEM(RTE_FLOW_ITEM_TYPE_ETH),
479         },
480         [RTE_FLOW_ITEM_TYPE_ETH] = {
481                 .next_item = NEXT_ITEM(RTE_FLOW_ITEM_TYPE_VLAN,
482                                        RTE_FLOW_ITEM_TYPE_IPV4),
483                 .mask = &(const struct rte_flow_item_eth){
484                         .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
485                         .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
486                 },
487                 .default_mask = &rte_flow_item_eth_mask,
488                 .mask_sz = sizeof(struct rte_flow_item_eth),
489                 .validate = mlx4_flow_validate_eth,
490                 .convert = mlx4_flow_create_eth,
491                 .dst_sz = sizeof(struct ibv_flow_spec_eth),
492         },
493         [RTE_FLOW_ITEM_TYPE_VLAN] = {
494                 .next_item = NEXT_ITEM(RTE_FLOW_ITEM_TYPE_IPV4),
495                 .mask = &(const struct rte_flow_item_vlan){
496                         /* Only TCI VID matching is supported. */
497                         .tci = RTE_BE16(0x0fff),
498                 },
499                 .mask_sz = sizeof(struct rte_flow_item_vlan),
500                 .validate = mlx4_flow_validate_vlan,
501                 .convert = mlx4_flow_create_vlan,
502                 .dst_sz = 0,
503         },
504         [RTE_FLOW_ITEM_TYPE_IPV4] = {
505                 .next_item = NEXT_ITEM(RTE_FLOW_ITEM_TYPE_UDP,
506                                        RTE_FLOW_ITEM_TYPE_TCP),
507                 .mask = &(const struct rte_flow_item_ipv4){
508                         .hdr = {
509                                 .src_addr = RTE_BE32(0xffffffff),
510                                 .dst_addr = RTE_BE32(0xffffffff),
511                         },
512                 },
513                 .default_mask = &rte_flow_item_ipv4_mask,
514                 .mask_sz = sizeof(struct rte_flow_item_ipv4),
515                 .validate = mlx4_flow_validate_ipv4,
516                 .convert = mlx4_flow_create_ipv4,
517                 .dst_sz = sizeof(struct ibv_flow_spec_ipv4),
518         },
519         [RTE_FLOW_ITEM_TYPE_UDP] = {
520                 .mask = &(const struct rte_flow_item_udp){
521                         .hdr = {
522                                 .src_port = RTE_BE16(0xffff),
523                                 .dst_port = RTE_BE16(0xffff),
524                         },
525                 },
526                 .default_mask = &rte_flow_item_udp_mask,
527                 .mask_sz = sizeof(struct rte_flow_item_udp),
528                 .validate = mlx4_flow_validate_udp,
529                 .convert = mlx4_flow_create_udp,
530                 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
531         },
532         [RTE_FLOW_ITEM_TYPE_TCP] = {
533                 .mask = &(const struct rte_flow_item_tcp){
534                         .hdr = {
535                                 .src_port = RTE_BE16(0xffff),
536                                 .dst_port = RTE_BE16(0xffff),
537                         },
538                 },
539                 .default_mask = &rte_flow_item_tcp_mask,
540                 .mask_sz = sizeof(struct rte_flow_item_tcp),
541                 .validate = mlx4_flow_validate_tcp,
542                 .convert = mlx4_flow_create_tcp,
543                 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
544         },
545 };
546
547 /**
548  * Make sure a flow rule is supported and initialize associated structure.
549  *
550  * @param priv
551  *   Pointer to private structure.
552  * @param[in] attr
553  *   Flow rule attributes.
554  * @param[in] pattern
555  *   Pattern specification (list terminated by the END pattern item).
556  * @param[in] actions
557  *   Associated actions (list terminated by the END action).
558  * @param[out] error
559  *   Perform verbose error reporting if not NULL.
560  * @param[in, out] addr
561  *   Buffer where the resulting flow rule handle pointer must be stored.
562  *   If NULL, stop processing after validation stage.
563  *
564  * @return
565  *   0 on success, a negative errno value otherwise and rte_errno is set.
566  */
567 static int
568 mlx4_flow_prepare(struct priv *priv,
569                   const struct rte_flow_attr *attr,
570                   const struct rte_flow_item pattern[],
571                   const struct rte_flow_action actions[],
572                   struct rte_flow_error *error,
573                   struct rte_flow **addr)
574 {
575         const struct rte_flow_item *item;
576         const struct rte_flow_action *action;
577         const struct mlx4_flow_proc_item *proc;
578         struct rte_flow temp = { .ibv_attr_size = sizeof(*temp.ibv_attr) };
579         struct rte_flow *flow = &temp;
580
581         if (attr->group)
582                 return rte_flow_error_set
583                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
584                          NULL, "groups are not supported");
585         if (attr->priority > MLX4_FLOW_PRIORITY_LAST)
586                 return rte_flow_error_set
587                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
588                          NULL, "maximum priority level is "
589                          MLX4_STR_EXPAND(MLX4_FLOW_PRIORITY_LAST));
590         if (attr->egress)
591                 return rte_flow_error_set
592                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
593                          NULL, "egress is not supported");
594         if (!attr->ingress)
595                 return rte_flow_error_set
596                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
597                          NULL, "only ingress is supported");
598 fill:
599         proc = mlx4_flow_proc_item_list;
600         /* Go over pattern. */
601         for (item = pattern; item->type; ++item) {
602                 const struct mlx4_flow_proc_item *next = NULL;
603                 unsigned int i;
604                 int err;
605
606                 if (item->type == RTE_FLOW_ITEM_TYPE_VOID)
607                         continue;
608                 if (item->type == MLX4_FLOW_ITEM_TYPE_INTERNAL) {
609                         flow->internal = 1;
610                         continue;
611                 }
612                 /*
613                  * The nic can support patterns with NULL eth spec only
614                  * if eth is a single item in a rule.
615                  */
616                 if (!item->spec && item->type == RTE_FLOW_ITEM_TYPE_ETH) {
617                         const struct rte_flow_item *next = item + 1;
618
619                         if (next->type)
620                                 return rte_flow_error_set
621                                         (error, ENOTSUP,
622                                          RTE_FLOW_ERROR_TYPE_ITEM, item,
623                                          "the rule requires an Ethernet spec");
624                 }
625                 for (i = 0; proc->next_item && proc->next_item[i]; ++i) {
626                         if (proc->next_item[i] == item->type) {
627                                 next = &mlx4_flow_proc_item_list[item->type];
628                                 break;
629                         }
630                 }
631                 if (!next)
632                         goto exit_item_not_supported;
633                 proc = next;
634                 /* Perform validation once, while handle is not allocated. */
635                 if (flow == &temp) {
636                         err = proc->validate(item, proc->mask, proc->mask_sz);
637                         if (err)
638                                 goto exit_item_not_supported;
639                 } else if (proc->convert) {
640                         err = proc->convert(item,
641                                             (proc->default_mask ?
642                                              proc->default_mask :
643                                              proc->mask),
644                                             flow);
645                         if (err)
646                                 goto exit_item_not_supported;
647                 }
648                 flow->ibv_attr_size += proc->dst_sz;
649         }
650         /* Go over actions list. */
651         for (action = actions; action->type; ++action) {
652                 switch (action->type) {
653                         const struct rte_flow_action_queue *queue;
654
655                 case RTE_FLOW_ACTION_TYPE_VOID:
656                         continue;
657                 case RTE_FLOW_ACTION_TYPE_DROP:
658                         flow->drop = 1;
659                         break;
660                 case RTE_FLOW_ACTION_TYPE_QUEUE:
661                         queue = action->conf;
662                         if (queue->index >= priv->dev->data->nb_rx_queues)
663                                 goto exit_action_not_supported;
664                         flow->queue = 1;
665                         flow->queue_id = queue->index;
666                         break;
667                 default:
668                         goto exit_action_not_supported;
669                 }
670         }
671         if (!flow->queue && !flow->drop)
672                 return rte_flow_error_set
673                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
674                          NULL, "no valid action");
675         /* Validation ends here. */
676         if (!addr)
677                 return 0;
678         if (flow == &temp) {
679                 /* Allocate proper handle based on collected data. */
680                 const struct mlx4_malloc_vec vec[] = {
681                         {
682                                 .align = alignof(struct rte_flow),
683                                 .size = sizeof(*flow),
684                                 .addr = (void **)&flow,
685                         },
686                         {
687                                 .align = alignof(struct ibv_flow_attr),
688                                 .size = temp.ibv_attr_size,
689                                 .addr = (void **)&temp.ibv_attr,
690                         },
691                 };
692
693                 if (!mlx4_zmallocv(__func__, vec, RTE_DIM(vec)))
694                         return rte_flow_error_set
695                                 (error, -rte_errno,
696                                  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
697                                  "flow rule handle allocation failure");
698                 /* Most fields will be updated by second pass. */
699                 *flow = (struct rte_flow){
700                         .ibv_attr = temp.ibv_attr,
701                         .ibv_attr_size = sizeof(*flow->ibv_attr),
702                 };
703                 *flow->ibv_attr = (struct ibv_flow_attr){
704                         .type = IBV_FLOW_ATTR_NORMAL,
705                         .size = sizeof(*flow->ibv_attr),
706                         .priority = attr->priority,
707                         .port = priv->port,
708                 };
709                 goto fill;
710         }
711         *addr = flow;
712         return 0;
713 exit_item_not_supported:
714         return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
715                                   item, "item not supported");
716 exit_action_not_supported:
717         return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
718                                   action, "action not supported");
719 }
720
721 /**
722  * Validate a flow supported by the NIC.
723  *
724  * @see rte_flow_validate()
725  * @see rte_flow_ops
726  */
727 static int
728 mlx4_flow_validate(struct rte_eth_dev *dev,
729                    const struct rte_flow_attr *attr,
730                    const struct rte_flow_item pattern[],
731                    const struct rte_flow_action actions[],
732                    struct rte_flow_error *error)
733 {
734         struct priv *priv = dev->data->dev_private;
735
736         return mlx4_flow_prepare(priv, attr, pattern, actions, error, NULL);
737 }
738
739 /**
740  * Get a drop flow rule resources instance.
741  *
742  * @param priv
743  *   Pointer to private structure.
744  *
745  * @return
746  *   Pointer to drop flow resources on success, NULL otherwise and rte_errno
747  *   is set.
748  */
749 static struct mlx4_drop *
750 mlx4_drop_get(struct priv *priv)
751 {
752         struct mlx4_drop *drop = priv->drop;
753
754         if (drop) {
755                 assert(drop->refcnt);
756                 assert(drop->priv == priv);
757                 ++drop->refcnt;
758                 return drop;
759         }
760         drop = rte_malloc(__func__, sizeof(*drop), 0);
761         if (!drop)
762                 goto error;
763         *drop = (struct mlx4_drop){
764                 .priv = priv,
765                 .refcnt = 1,
766         };
767         drop->cq = ibv_create_cq(priv->ctx, 1, NULL, NULL, 0);
768         if (!drop->cq)
769                 goto error;
770         drop->qp = ibv_create_qp(priv->pd,
771                                  &(struct ibv_qp_init_attr){
772                                         .send_cq = drop->cq,
773                                         .recv_cq = drop->cq,
774                                         .qp_type = IBV_QPT_RAW_PACKET,
775                                  });
776         if (!drop->qp)
777                 goto error;
778         priv->drop = drop;
779         return drop;
780 error:
781         if (drop->qp)
782                 claim_zero(ibv_destroy_qp(drop->qp));
783         if (drop->cq)
784                 claim_zero(ibv_destroy_cq(drop->cq));
785         if (drop)
786                 rte_free(drop);
787         rte_errno = ENOMEM;
788         return NULL;
789 }
790
791 /**
792  * Give back a drop flow rule resources instance.
793  *
794  * @param drop
795  *   Pointer to drop flow rule resources.
796  */
797 static void
798 mlx4_drop_put(struct mlx4_drop *drop)
799 {
800         assert(drop->refcnt);
801         if (--drop->refcnt)
802                 return;
803         drop->priv->drop = NULL;
804         claim_zero(ibv_destroy_qp(drop->qp));
805         claim_zero(ibv_destroy_cq(drop->cq));
806         rte_free(drop);
807 }
808
809 /**
810  * Toggle a configured flow rule.
811  *
812  * @param priv
813  *   Pointer to private structure.
814  * @param flow
815  *   Flow rule handle to toggle.
816  * @param enable
817  *   Whether associated Verbs flow must be created or removed.
818  * @param[out] error
819  *   Perform verbose error reporting if not NULL.
820  *
821  * @return
822  *   0 on success, a negative errno value otherwise and rte_errno is set.
823  */
824 static int
825 mlx4_flow_toggle(struct priv *priv,
826                  struct rte_flow *flow,
827                  int enable,
828                  struct rte_flow_error *error)
829 {
830         struct ibv_qp *qp = NULL;
831         const char *msg;
832         int err;
833
834         if (!enable) {
835                 if (!flow->ibv_flow)
836                         return 0;
837                 claim_zero(ibv_destroy_flow(flow->ibv_flow));
838                 flow->ibv_flow = NULL;
839                 if (flow->drop)
840                         mlx4_drop_put(priv->drop);
841                 return 0;
842         }
843         assert(flow->ibv_attr);
844         if (!flow->internal &&
845             !priv->isolated &&
846             flow->ibv_attr->priority == MLX4_FLOW_PRIORITY_LAST) {
847                 if (flow->ibv_flow) {
848                         claim_zero(ibv_destroy_flow(flow->ibv_flow));
849                         flow->ibv_flow = NULL;
850                         if (flow->drop)
851                                 mlx4_drop_put(priv->drop);
852                 }
853                 err = EACCES;
854                 msg = ("priority level "
855                        MLX4_STR_EXPAND(MLX4_FLOW_PRIORITY_LAST)
856                        " is reserved when not in isolated mode");
857                 goto error;
858         }
859         if (flow->queue) {
860                 struct rxq *rxq = NULL;
861
862                 if (flow->queue_id < priv->dev->data->nb_rx_queues)
863                         rxq = priv->dev->data->rx_queues[flow->queue_id];
864                 if (flow->ibv_flow) {
865                         if (!rxq ^ !flow->drop)
866                                 return 0;
867                         /* Verbs flow needs updating. */
868                         claim_zero(ibv_destroy_flow(flow->ibv_flow));
869                         flow->ibv_flow = NULL;
870                         if (flow->drop)
871                                 mlx4_drop_put(priv->drop);
872                 }
873                 if (rxq)
874                         qp = rxq->qp;
875                 /* A missing target queue drops traffic implicitly. */
876                 flow->drop = !rxq;
877         }
878         if (flow->drop) {
879                 mlx4_drop_get(priv);
880                 if (!priv->drop) {
881                         err = rte_errno;
882                         msg = "resources for drop flow rule cannot be created";
883                         goto error;
884                 }
885                 qp = priv->drop->qp;
886         }
887         assert(qp);
888         if (flow->ibv_flow)
889                 return 0;
890         flow->ibv_flow = ibv_create_flow(qp, flow->ibv_attr);
891         if (flow->ibv_flow)
892                 return 0;
893         if (flow->drop)
894                 mlx4_drop_put(priv->drop);
895         err = errno;
896         msg = "flow rule rejected by device";
897 error:
898         return rte_flow_error_set
899                 (error, err, RTE_FLOW_ERROR_TYPE_HANDLE, flow, msg);
900 }
901
902 /**
903  * Create a flow.
904  *
905  * @see rte_flow_create()
906  * @see rte_flow_ops
907  */
908 static struct rte_flow *
909 mlx4_flow_create(struct rte_eth_dev *dev,
910                  const struct rte_flow_attr *attr,
911                  const struct rte_flow_item pattern[],
912                  const struct rte_flow_action actions[],
913                  struct rte_flow_error *error)
914 {
915         struct priv *priv = dev->data->dev_private;
916         struct rte_flow *flow;
917         int err;
918
919         err = mlx4_flow_prepare(priv, attr, pattern, actions, error, &flow);
920         if (err)
921                 return NULL;
922         err = mlx4_flow_toggle(priv, flow, priv->started, error);
923         if (!err) {
924                 struct rte_flow *curr = LIST_FIRST(&priv->flows);
925
926                 /* New rules are inserted after internal ones. */
927                 if (!curr || !curr->internal) {
928                         LIST_INSERT_HEAD(&priv->flows, flow, next);
929                 } else {
930                         while (LIST_NEXT(curr, next) &&
931                                LIST_NEXT(curr, next)->internal)
932                                 curr = LIST_NEXT(curr, next);
933                         LIST_INSERT_AFTER(curr, flow, next);
934                 }
935                 return flow;
936         }
937         rte_flow_error_set(error, -err, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
938                            error->message);
939         rte_free(flow);
940         return NULL;
941 }
942
943 /**
944  * Configure isolated mode.
945  *
946  * @see rte_flow_isolate()
947  * @see rte_flow_ops
948  */
949 static int
950 mlx4_flow_isolate(struct rte_eth_dev *dev,
951                   int enable,
952                   struct rte_flow_error *error)
953 {
954         struct priv *priv = dev->data->dev_private;
955
956         if (!!enable == !!priv->isolated)
957                 return 0;
958         priv->isolated = !!enable;
959         if (mlx4_flow_sync(priv)) {
960                 priv->isolated = !enable;
961                 return rte_flow_error_set(error, rte_errno,
962                                           RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
963                                           NULL,
964                                           enable ?
965                                           "cannot enter isolated mode" :
966                                           "cannot leave isolated mode");
967         }
968         return 0;
969 }
970
971 /**
972  * Destroy a flow rule.
973  *
974  * @see rte_flow_destroy()
975  * @see rte_flow_ops
976  */
977 static int
978 mlx4_flow_destroy(struct rte_eth_dev *dev,
979                   struct rte_flow *flow,
980                   struct rte_flow_error *error)
981 {
982         struct priv *priv = dev->data->dev_private;
983         int err = mlx4_flow_toggle(priv, flow, 0, error);
984
985         if (err)
986                 return err;
987         LIST_REMOVE(flow, next);
988         rte_free(flow);
989         return 0;
990 }
991
992 /**
993  * Destroy user-configured flow rules.
994  *
995  * This function skips internal flows rules.
996  *
997  * @see rte_flow_flush()
998  * @see rte_flow_ops
999  */
1000 static int
1001 mlx4_flow_flush(struct rte_eth_dev *dev,
1002                 struct rte_flow_error *error)
1003 {
1004         struct priv *priv = dev->data->dev_private;
1005         struct rte_flow *flow = LIST_FIRST(&priv->flows);
1006
1007         while (flow) {
1008                 struct rte_flow *next = LIST_NEXT(flow, next);
1009
1010                 if (!flow->internal)
1011                         mlx4_flow_destroy(dev, flow, error);
1012                 flow = next;
1013         }
1014         return 0;
1015 }
1016
1017 /**
1018  * Generate internal flow rules.
1019  *
1020  * @param priv
1021  *   Pointer to private structure.
1022  * @param[out] error
1023  *   Perform verbose error reporting if not NULL.
1024  *
1025  * @return
1026  *   0 on success, a negative errno value otherwise and rte_errno is set.
1027  */
1028 static int
1029 mlx4_flow_internal(struct priv *priv, struct rte_flow_error *error)
1030 {
1031         struct rte_flow_attr attr = {
1032                 .priority = MLX4_FLOW_PRIORITY_LAST,
1033                 .ingress = 1,
1034         };
1035         struct rte_flow_item pattern[] = {
1036                 {
1037                         .type = MLX4_FLOW_ITEM_TYPE_INTERNAL,
1038                 },
1039                 {
1040                         .type = RTE_FLOW_ITEM_TYPE_ETH,
1041                         .spec = &(struct rte_flow_item_eth){
1042                                 .dst = priv->mac,
1043                         },
1044                         .mask = &(struct rte_flow_item_eth){
1045                                 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1046                         },
1047                 },
1048                 {
1049                         .type = RTE_FLOW_ITEM_TYPE_END,
1050                 },
1051         };
1052         struct rte_flow_action actions[] = {
1053                 {
1054                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
1055                         .conf = &(struct rte_flow_action_queue){
1056                                 .index = 0,
1057                         },
1058                 },
1059                 {
1060                         .type = RTE_FLOW_ACTION_TYPE_END,
1061                 },
1062         };
1063
1064         if (!mlx4_flow_create(priv->dev, &attr, pattern, actions, error))
1065                 return -rte_errno;
1066         return 0;
1067 }
1068
1069 /**
1070  * Synchronize flow rules.
1071  *
1072  * This function synchronizes flow rules with the state of the device by
1073  * taking into account isolated mode and whether target queues are
1074  * configured.
1075  *
1076  * @param priv
1077  *   Pointer to private structure.
1078  *
1079  * @return
1080  *   0 on success, a negative errno value otherwise and rte_errno is set.
1081  */
1082 int
1083 mlx4_flow_sync(struct priv *priv)
1084 {
1085         struct rte_flow *flow;
1086         int ret;
1087
1088         /* Internal flow rules are guaranteed to come first in the list. */
1089         if (priv->isolated) {
1090                 /*
1091                  * Get rid of them in isolated mode, stop at the first
1092                  * non-internal rule found.
1093                  */
1094                 for (flow = LIST_FIRST(&priv->flows);
1095                      flow && flow->internal;
1096                      flow = LIST_FIRST(&priv->flows))
1097                         claim_zero(mlx4_flow_destroy(priv->dev, flow, NULL));
1098         } else if (!LIST_FIRST(&priv->flows) ||
1099                    !LIST_FIRST(&priv->flows)->internal) {
1100                 /*
1101                  * If the first rule is not internal outside isolated mode,
1102                  * they must be added back.
1103                  */
1104                 ret = mlx4_flow_internal(priv, NULL);
1105                 if (ret)
1106                         return ret;
1107         }
1108         if (priv->started)
1109                 return mlx4_flow_start(priv);
1110         mlx4_flow_stop(priv);
1111         return 0;
1112 }
1113
1114 /**
1115  * Clean up all flow rules.
1116  *
1117  * Unlike mlx4_flow_flush(), this function takes care of all remaining flow
1118  * rules regardless of whether they are internal or user-configured.
1119  *
1120  * @param priv
1121  *   Pointer to private structure.
1122  */
1123 void
1124 mlx4_flow_clean(struct priv *priv)
1125 {
1126         struct rte_flow *flow;
1127
1128         while ((flow = LIST_FIRST(&priv->flows)))
1129                 mlx4_flow_destroy(priv->dev, flow, NULL);
1130 }
1131
1132 /**
1133  * Disable flow rules.
1134  *
1135  * @param priv
1136  *   Pointer to private structure.
1137  */
1138 void
1139 mlx4_flow_stop(struct priv *priv)
1140 {
1141         struct rte_flow *flow;
1142
1143         for (flow = LIST_FIRST(&priv->flows);
1144              flow;
1145              flow = LIST_NEXT(flow, next)) {
1146                 claim_zero(mlx4_flow_toggle(priv, flow, 0, NULL));
1147         }
1148         assert(!priv->drop);
1149 }
1150
1151 /**
1152  * Enable flow rules.
1153  *
1154  * @param priv
1155  *   Pointer to private structure.
1156  *
1157  * @return
1158  *   0 on success, a negative errno value otherwise and rte_errno is set.
1159  */
1160 int
1161 mlx4_flow_start(struct priv *priv)
1162 {
1163         int ret;
1164         struct rte_flow *flow;
1165
1166         for (flow = LIST_FIRST(&priv->flows);
1167              flow;
1168              flow = LIST_NEXT(flow, next)) {
1169                 ret = mlx4_flow_toggle(priv, flow, 1, NULL);
1170                 if (unlikely(ret)) {
1171                         mlx4_flow_stop(priv);
1172                         return ret;
1173                 }
1174         }
1175         return 0;
1176 }
1177
1178 static const struct rte_flow_ops mlx4_flow_ops = {
1179         .validate = mlx4_flow_validate,
1180         .create = mlx4_flow_create,
1181         .destroy = mlx4_flow_destroy,
1182         .flush = mlx4_flow_flush,
1183         .isolate = mlx4_flow_isolate,
1184 };
1185
1186 /**
1187  * Manage filter operations.
1188  *
1189  * @param dev
1190  *   Pointer to Ethernet device structure.
1191  * @param filter_type
1192  *   Filter type.
1193  * @param filter_op
1194  *   Operation to perform.
1195  * @param arg
1196  *   Pointer to operation-specific structure.
1197  *
1198  * @return
1199  *   0 on success, negative errno value otherwise and rte_errno is set.
1200  */
1201 int
1202 mlx4_filter_ctrl(struct rte_eth_dev *dev,
1203                  enum rte_filter_type filter_type,
1204                  enum rte_filter_op filter_op,
1205                  void *arg)
1206 {
1207         switch (filter_type) {
1208         case RTE_ETH_FILTER_GENERIC:
1209                 if (filter_op != RTE_ETH_FILTER_GET)
1210                         break;
1211                 *(const void **)arg = &mlx4_flow_ops;
1212                 return 0;
1213         default:
1214                 ERROR("%p: filter type (%d) not supported",
1215                       (void *)dev, filter_type);
1216                 break;
1217         }
1218         rte_errno = ENOTSUP;
1219         return -rte_errno;
1220 }