net/mlx4: refactor flow item validation code
[dpdk.git] / drivers / net / mlx4 / mlx4_flow.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright 2017 6WIND S.A.
5  *   Copyright 2017 Mellanox
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of 6WIND S.A. nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 /**
35  * @file
36  * Flow API operations for mlx4 driver.
37  */
38
39 #include <arpa/inet.h>
40 #include <assert.h>
41 #include <errno.h>
42 #include <stdalign.h>
43 #include <stddef.h>
44 #include <stdint.h>
45 #include <string.h>
46 #include <sys/queue.h>
47
48 /* Verbs headers do not support -pedantic. */
49 #ifdef PEDANTIC
50 #pragma GCC diagnostic ignored "-Wpedantic"
51 #endif
52 #include <infiniband/verbs.h>
53 #ifdef PEDANTIC
54 #pragma GCC diagnostic error "-Wpedantic"
55 #endif
56
57 #include <rte_byteorder.h>
58 #include <rte_errno.h>
59 #include <rte_eth_ctrl.h>
60 #include <rte_ethdev.h>
61 #include <rte_flow.h>
62 #include <rte_flow_driver.h>
63 #include <rte_malloc.h>
64
65 /* PMD headers. */
66 #include "mlx4.h"
67 #include "mlx4_flow.h"
68 #include "mlx4_rxtx.h"
69 #include "mlx4_utils.h"
70
71 /** Static initializer for a list of subsequent item types. */
72 #define NEXT_ITEM(...) \
73         (const enum rte_flow_item_type []){ \
74                 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
75         }
76
77 /** Processor structure associated with a flow item. */
78 struct mlx4_flow_proc_item {
79         /** Bit-mask for fields supported by this PMD. */
80         const void *mask_support;
81         /** Bit-mask to use when @p item->mask is not provided. */
82         const void *mask_default;
83         /** Size in bytes for @p mask_support and @p mask_default. */
84         const unsigned int mask_sz;
85         /** Merge a pattern item into a flow rule handle. */
86         int (*merge)(struct rte_flow *flow,
87                      const struct rte_flow_item *item,
88                      const struct mlx4_flow_proc_item *proc,
89                      struct rte_flow_error *error);
90         /** Size in bytes of the destination structure. */
91         const unsigned int dst_sz;
92         /** List of possible subsequent items. */
93         const enum rte_flow_item_type *const next_item;
94 };
95
96 /** Shared resources for drop flow rules. */
97 struct mlx4_drop {
98         struct ibv_qp *qp; /**< QP target. */
99         struct ibv_cq *cq; /**< CQ associated with above QP. */
100         struct priv *priv; /**< Back pointer to private data. */
101         uint32_t refcnt; /**< Reference count. */
102 };
103
104 /**
105  * Merge Ethernet pattern item into flow rule handle.
106  *
107  * Additional mlx4-specific constraints on supported fields:
108  *
109  * - No support for partial masks.
110  * - Not providing @p item->spec or providing an empty @p mask->dst is
111  *   *only* supported if the rule doesn't specify additional matching
112  *   criteria (i.e. rule is promiscuous-like).
113  *
114  * @param[in, out] flow
115  *   Flow rule handle to update.
116  * @param[in] item
117  *   Pattern item to merge.
118  * @param[in] proc
119  *   Associated item-processing object.
120  * @param[out] error
121  *   Perform verbose error reporting if not NULL.
122  *
123  * @return
124  *   0 on success, a negative errno value otherwise and rte_errno is set.
125  */
126 static int
127 mlx4_flow_merge_eth(struct rte_flow *flow,
128                     const struct rte_flow_item *item,
129                     const struct mlx4_flow_proc_item *proc,
130                     struct rte_flow_error *error)
131 {
132         const struct rte_flow_item_eth *spec = item->spec;
133         const struct rte_flow_item_eth *mask =
134                 spec ? (item->mask ? item->mask : proc->mask_default) : NULL;
135         struct ibv_flow_spec_eth *eth;
136         const char *msg;
137         unsigned int i;
138
139         if (!mask) {
140                 flow->promisc = 1;
141         } else {
142                 uint32_t sum_dst = 0;
143                 uint32_t sum_src = 0;
144
145                 for (i = 0; i != sizeof(mask->dst.addr_bytes); ++i) {
146                         sum_dst += mask->dst.addr_bytes[i];
147                         sum_src += mask->src.addr_bytes[i];
148                 }
149                 if (sum_src) {
150                         msg = "mlx4 does not support source MAC matching";
151                         goto error;
152                 } else if (!sum_dst) {
153                         flow->promisc = 1;
154                 } else if (sum_dst != (UINT8_C(0xff) * ETHER_ADDR_LEN)) {
155                         msg = "mlx4 does not support matching partial"
156                                 " Ethernet fields";
157                         goto error;
158                 }
159         }
160         if (!flow->ibv_attr)
161                 return 0;
162         if (flow->promisc) {
163                 flow->ibv_attr->type = IBV_FLOW_ATTR_ALL_DEFAULT;
164                 return 0;
165         }
166         ++flow->ibv_attr->num_of_specs;
167         eth = (void *)((uintptr_t)flow->ibv_attr + flow->ibv_attr_size);
168         *eth = (struct ibv_flow_spec_eth) {
169                 .type = IBV_FLOW_SPEC_ETH,
170                 .size = sizeof(*eth),
171         };
172         memcpy(eth->val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
173         memcpy(eth->mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
174         /* Remove unwanted bits from values. */
175         for (i = 0; i < ETHER_ADDR_LEN; ++i) {
176                 eth->val.dst_mac[i] &= eth->mask.dst_mac[i];
177         }
178         return 0;
179 error:
180         return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
181                                   item, msg);
182 }
183
184 /**
185  * Merge VLAN pattern item into flow rule handle.
186  *
187  * Additional mlx4-specific constraints on supported fields:
188  *
189  * - Matching *all* VLAN traffic by omitting @p item->spec or providing an
190  *   empty @p item->mask would also include non-VLAN traffic. Doing so is
191  *   therefore unsupported.
192  * - No support for partial masks.
193  *
194  * @param[in, out] flow
195  *   Flow rule handle to update.
196  * @param[in] item
197  *   Pattern item to merge.
198  * @param[in] proc
199  *   Associated item-processing object.
200  * @param[out] error
201  *   Perform verbose error reporting if not NULL.
202  *
203  * @return
204  *   0 on success, a negative errno value otherwise and rte_errno is set.
205  */
206 static int
207 mlx4_flow_merge_vlan(struct rte_flow *flow,
208                      const struct rte_flow_item *item,
209                      const struct mlx4_flow_proc_item *proc,
210                      struct rte_flow_error *error)
211 {
212         const struct rte_flow_item_vlan *spec = item->spec;
213         const struct rte_flow_item_vlan *mask =
214                 spec ? (item->mask ? item->mask : proc->mask_default) : NULL;
215         struct ibv_flow_spec_eth *eth;
216         const char *msg;
217
218         if (!mask || !mask->tci) {
219                 msg = "mlx4 cannot match all VLAN traffic while excluding"
220                         " non-VLAN traffic, TCI VID must be specified";
221                 goto error;
222         }
223         if (mask->tci != RTE_BE16(0x0fff)) {
224                 msg = "mlx4 does not support partial TCI VID matching";
225                 goto error;
226         }
227         if (!flow->ibv_attr)
228                 return 0;
229         eth = (void *)((uintptr_t)flow->ibv_attr + flow->ibv_attr_size -
230                        sizeof(*eth));
231         eth->val.vlan_tag = spec->tci;
232         eth->mask.vlan_tag = mask->tci;
233         eth->val.vlan_tag &= eth->mask.vlan_tag;
234         return 0;
235 error:
236         return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
237                                   item, msg);
238 }
239
240 /**
241  * Merge IPv4 pattern item into flow rule handle.
242  *
243  * Additional mlx4-specific constraints on supported fields:
244  *
245  * - No support for partial masks.
246  *
247  * @param[in, out] flow
248  *   Flow rule handle to update.
249  * @param[in] item
250  *   Pattern item to merge.
251  * @param[in] proc
252  *   Associated item-processing object.
253  * @param[out] error
254  *   Perform verbose error reporting if not NULL.
255  *
256  * @return
257  *   0 on success, a negative errno value otherwise and rte_errno is set.
258  */
259 static int
260 mlx4_flow_merge_ipv4(struct rte_flow *flow,
261                      const struct rte_flow_item *item,
262                      const struct mlx4_flow_proc_item *proc,
263                      struct rte_flow_error *error)
264 {
265         const struct rte_flow_item_ipv4 *spec = item->spec;
266         const struct rte_flow_item_ipv4 *mask =
267                 spec ? (item->mask ? item->mask : proc->mask_default) : NULL;
268         struct ibv_flow_spec_ipv4 *ipv4;
269         const char *msg;
270
271         if (mask &&
272             ((uint32_t)(mask->hdr.src_addr + 1) > UINT32_C(1) ||
273              (uint32_t)(mask->hdr.dst_addr + 1) > UINT32_C(1))) {
274                 msg = "mlx4 does not support matching partial IPv4 fields";
275                 goto error;
276         }
277         if (!flow->ibv_attr)
278                 return 0;
279         ++flow->ibv_attr->num_of_specs;
280         ipv4 = (void *)((uintptr_t)flow->ibv_attr + flow->ibv_attr_size);
281         *ipv4 = (struct ibv_flow_spec_ipv4) {
282                 .type = IBV_FLOW_SPEC_IPV4,
283                 .size = sizeof(*ipv4),
284         };
285         if (!spec)
286                 return 0;
287         ipv4->val = (struct ibv_flow_ipv4_filter) {
288                 .src_ip = spec->hdr.src_addr,
289                 .dst_ip = spec->hdr.dst_addr,
290         };
291         ipv4->mask = (struct ibv_flow_ipv4_filter) {
292                 .src_ip = mask->hdr.src_addr,
293                 .dst_ip = mask->hdr.dst_addr,
294         };
295         /* Remove unwanted bits from values. */
296         ipv4->val.src_ip &= ipv4->mask.src_ip;
297         ipv4->val.dst_ip &= ipv4->mask.dst_ip;
298         return 0;
299 error:
300         return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
301                                   item, msg);
302 }
303
304 /**
305  * Merge UDP pattern item into flow rule handle.
306  *
307  * Additional mlx4-specific constraints on supported fields:
308  *
309  * - No support for partial masks.
310  *
311  * @param[in, out] flow
312  *   Flow rule handle to update.
313  * @param[in] item
314  *   Pattern item to merge.
315  * @param[in] proc
316  *   Associated item-processing object.
317  * @param[out] error
318  *   Perform verbose error reporting if not NULL.
319  *
320  * @return
321  *   0 on success, a negative errno value otherwise and rte_errno is set.
322  */
323 static int
324 mlx4_flow_merge_udp(struct rte_flow *flow,
325                     const struct rte_flow_item *item,
326                     const struct mlx4_flow_proc_item *proc,
327                     struct rte_flow_error *error)
328 {
329         const struct rte_flow_item_udp *spec = item->spec;
330         const struct rte_flow_item_udp *mask =
331                 spec ? (item->mask ? item->mask : proc->mask_default) : NULL;
332         struct ibv_flow_spec_tcp_udp *udp;
333         const char *msg;
334
335         if (!mask ||
336             ((uint16_t)(mask->hdr.src_port + 1) > UINT16_C(1) ||
337              (uint16_t)(mask->hdr.dst_port + 1) > UINT16_C(1))) {
338                 msg = "mlx4 does not support matching partial UDP fields";
339                 goto error;
340         }
341         if (!flow->ibv_attr)
342                 return 0;
343         ++flow->ibv_attr->num_of_specs;
344         udp = (void *)((uintptr_t)flow->ibv_attr + flow->ibv_attr_size);
345         *udp = (struct ibv_flow_spec_tcp_udp) {
346                 .type = IBV_FLOW_SPEC_UDP,
347                 .size = sizeof(*udp),
348         };
349         if (!spec)
350                 return 0;
351         udp->val.dst_port = spec->hdr.dst_port;
352         udp->val.src_port = spec->hdr.src_port;
353         udp->mask.dst_port = mask->hdr.dst_port;
354         udp->mask.src_port = mask->hdr.src_port;
355         /* Remove unwanted bits from values. */
356         udp->val.src_port &= udp->mask.src_port;
357         udp->val.dst_port &= udp->mask.dst_port;
358         return 0;
359 error:
360         return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
361                                   item, msg);
362 }
363
364 /**
365  * Merge TCP pattern item into flow rule handle.
366  *
367  * Additional mlx4-specific constraints on supported fields:
368  *
369  * - No support for partial masks.
370  *
371  * @param[in, out] flow
372  *   Flow rule handle to update.
373  * @param[in] item
374  *   Pattern item to merge.
375  * @param[in] proc
376  *   Associated item-processing object.
377  * @param[out] error
378  *   Perform verbose error reporting if not NULL.
379  *
380  * @return
381  *   0 on success, a negative errno value otherwise and rte_errno is set.
382  */
383 static int
384 mlx4_flow_merge_tcp(struct rte_flow *flow,
385                     const struct rte_flow_item *item,
386                     const struct mlx4_flow_proc_item *proc,
387                     struct rte_flow_error *error)
388 {
389         const struct rte_flow_item_tcp *spec = item->spec;
390         const struct rte_flow_item_tcp *mask =
391                 spec ? (item->mask ? item->mask : proc->mask_default) : NULL;
392         struct ibv_flow_spec_tcp_udp *tcp;
393         const char *msg;
394
395         if (!mask ||
396             ((uint16_t)(mask->hdr.src_port + 1) > UINT16_C(1) ||
397              (uint16_t)(mask->hdr.dst_port + 1) > UINT16_C(1))) {
398                 msg = "mlx4 does not support matching partial TCP fields";
399                 goto error;
400         }
401         if (!flow->ibv_attr)
402                 return 0;
403         ++flow->ibv_attr->num_of_specs;
404         tcp = (void *)((uintptr_t)flow->ibv_attr + flow->ibv_attr_size);
405         *tcp = (struct ibv_flow_spec_tcp_udp) {
406                 .type = IBV_FLOW_SPEC_TCP,
407                 .size = sizeof(*tcp),
408         };
409         if (!spec)
410                 return 0;
411         tcp->val.dst_port = spec->hdr.dst_port;
412         tcp->val.src_port = spec->hdr.src_port;
413         tcp->mask.dst_port = mask->hdr.dst_port;
414         tcp->mask.src_port = mask->hdr.src_port;
415         /* Remove unwanted bits from values. */
416         tcp->val.src_port &= tcp->mask.src_port;
417         tcp->val.dst_port &= tcp->mask.dst_port;
418         return 0;
419 error:
420         return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
421                                   item, msg);
422 }
423
424 /**
425  * Perform basic sanity checks on a pattern item.
426  *
427  * @param[in] item
428  *   Item specification.
429  * @param[in] proc
430  *   Associated item-processing object.
431  * @param[out] error
432  *   Perform verbose error reporting if not NULL.
433  *
434  * @return
435  *   0 on success, a negative errno value otherwise and rte_errno is set.
436  */
437 static int
438 mlx4_flow_item_check(const struct rte_flow_item *item,
439                      const struct mlx4_flow_proc_item *proc,
440                      struct rte_flow_error *error)
441 {
442         const uint8_t *mask;
443         unsigned int i;
444
445         /* item->last and item->mask cannot exist without item->spec. */
446         if (!item->spec && (item->mask || item->last))
447                 return rte_flow_error_set
448                         (error, EINVAL, RTE_FLOW_ERROR_TYPE_ITEM, item,
449                          "\"mask\" or \"last\" field provided without a"
450                          " corresponding \"spec\"");
451         /* No spec, no mask, no problem. */
452         if (!item->spec)
453                 return 0;
454         mask = item->mask ?
455                 (const uint8_t *)item->mask :
456                 (const uint8_t *)proc->mask_default;
457         assert(mask);
458         /*
459          * Single-pass check to make sure that:
460          * - Mask is supported, no bits are set outside proc->mask_support.
461          * - Both item->spec and item->last are included in mask.
462          */
463         for (i = 0; i != proc->mask_sz; ++i) {
464                 if (!mask[i])
465                         continue;
466                 if ((mask[i] | ((const uint8_t *)proc->mask_support)[i]) !=
467                     ((const uint8_t *)proc->mask_support)[i])
468                         return rte_flow_error_set
469                                 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
470                                  item, "unsupported field found in \"mask\"");
471                 if (item->last &&
472                     (((const uint8_t *)item->spec)[i] & mask[i]) !=
473                     (((const uint8_t *)item->last)[i] & mask[i]))
474                         return rte_flow_error_set
475                                 (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
476                                  item,
477                                  "range between \"spec\" and \"last\""
478                                  " is larger than \"mask\"");
479         }
480         return 0;
481 }
482
483 /** Graph of supported items and associated actions. */
484 static const struct mlx4_flow_proc_item mlx4_flow_proc_item_list[] = {
485         [RTE_FLOW_ITEM_TYPE_END] = {
486                 .next_item = NEXT_ITEM(RTE_FLOW_ITEM_TYPE_ETH),
487         },
488         [RTE_FLOW_ITEM_TYPE_ETH] = {
489                 .next_item = NEXT_ITEM(RTE_FLOW_ITEM_TYPE_VLAN,
490                                        RTE_FLOW_ITEM_TYPE_IPV4),
491                 .mask_support = &(const struct rte_flow_item_eth){
492                         /* Only destination MAC can be matched. */
493                         .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
494                 },
495                 .mask_default = &rte_flow_item_eth_mask,
496                 .mask_sz = sizeof(struct rte_flow_item_eth),
497                 .merge = mlx4_flow_merge_eth,
498                 .dst_sz = sizeof(struct ibv_flow_spec_eth),
499         },
500         [RTE_FLOW_ITEM_TYPE_VLAN] = {
501                 .next_item = NEXT_ITEM(RTE_FLOW_ITEM_TYPE_IPV4),
502                 .mask_support = &(const struct rte_flow_item_vlan){
503                         /* Only TCI VID matching is supported. */
504                         .tci = RTE_BE16(0x0fff),
505                 },
506                 .mask_default = &rte_flow_item_vlan_mask,
507                 .mask_sz = sizeof(struct rte_flow_item_vlan),
508                 .merge = mlx4_flow_merge_vlan,
509                 .dst_sz = 0,
510         },
511         [RTE_FLOW_ITEM_TYPE_IPV4] = {
512                 .next_item = NEXT_ITEM(RTE_FLOW_ITEM_TYPE_UDP,
513                                        RTE_FLOW_ITEM_TYPE_TCP),
514                 .mask_support = &(const struct rte_flow_item_ipv4){
515                         .hdr = {
516                                 .src_addr = RTE_BE32(0xffffffff),
517                                 .dst_addr = RTE_BE32(0xffffffff),
518                         },
519                 },
520                 .mask_default = &rte_flow_item_ipv4_mask,
521                 .mask_sz = sizeof(struct rte_flow_item_ipv4),
522                 .merge = mlx4_flow_merge_ipv4,
523                 .dst_sz = sizeof(struct ibv_flow_spec_ipv4),
524         },
525         [RTE_FLOW_ITEM_TYPE_UDP] = {
526                 .mask_support = &(const struct rte_flow_item_udp){
527                         .hdr = {
528                                 .src_port = RTE_BE16(0xffff),
529                                 .dst_port = RTE_BE16(0xffff),
530                         },
531                 },
532                 .mask_default = &rte_flow_item_udp_mask,
533                 .mask_sz = sizeof(struct rte_flow_item_udp),
534                 .merge = mlx4_flow_merge_udp,
535                 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
536         },
537         [RTE_FLOW_ITEM_TYPE_TCP] = {
538                 .mask_support = &(const struct rte_flow_item_tcp){
539                         .hdr = {
540                                 .src_port = RTE_BE16(0xffff),
541                                 .dst_port = RTE_BE16(0xffff),
542                         },
543                 },
544                 .mask_default = &rte_flow_item_tcp_mask,
545                 .mask_sz = sizeof(struct rte_flow_item_tcp),
546                 .merge = mlx4_flow_merge_tcp,
547                 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
548         },
549 };
550
551 /**
552  * Make sure a flow rule is supported and initialize associated structure.
553  *
554  * @param priv
555  *   Pointer to private structure.
556  * @param[in] attr
557  *   Flow rule attributes.
558  * @param[in] pattern
559  *   Pattern specification (list terminated by the END pattern item).
560  * @param[in] actions
561  *   Associated actions (list terminated by the END action).
562  * @param[out] error
563  *   Perform verbose error reporting if not NULL.
564  * @param[in, out] addr
565  *   Buffer where the resulting flow rule handle pointer must be stored.
566  *   If NULL, stop processing after validation stage.
567  *
568  * @return
569  *   0 on success, a negative errno value otherwise and rte_errno is set.
570  */
571 static int
572 mlx4_flow_prepare(struct priv *priv,
573                   const struct rte_flow_attr *attr,
574                   const struct rte_flow_item pattern[],
575                   const struct rte_flow_action actions[],
576                   struct rte_flow_error *error,
577                   struct rte_flow **addr)
578 {
579         const struct rte_flow_item *item;
580         const struct rte_flow_action *action;
581         const struct mlx4_flow_proc_item *proc;
582         struct rte_flow temp = { .ibv_attr_size = sizeof(*temp.ibv_attr) };
583         struct rte_flow *flow = &temp;
584         const char *msg = NULL;
585
586         if (attr->group)
587                 return rte_flow_error_set
588                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
589                          NULL, "groups are not supported");
590         if (attr->priority > MLX4_FLOW_PRIORITY_LAST)
591                 return rte_flow_error_set
592                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
593                          NULL, "maximum priority level is "
594                          MLX4_STR_EXPAND(MLX4_FLOW_PRIORITY_LAST));
595         if (attr->egress)
596                 return rte_flow_error_set
597                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
598                          NULL, "egress is not supported");
599         if (!attr->ingress)
600                 return rte_flow_error_set
601                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
602                          NULL, "only ingress is supported");
603 fill:
604         proc = mlx4_flow_proc_item_list;
605         /* Go over pattern. */
606         for (item = pattern; item->type; ++item) {
607                 const struct mlx4_flow_proc_item *next = NULL;
608                 unsigned int i;
609                 int err;
610
611                 if (item->type == RTE_FLOW_ITEM_TYPE_VOID)
612                         continue;
613                 if (item->type == MLX4_FLOW_ITEM_TYPE_INTERNAL) {
614                         flow->internal = 1;
615                         continue;
616                 }
617                 if (flow->promisc) {
618                         msg = "mlx4 does not support additional matching"
619                                 " criteria combined with indiscriminate"
620                                 " matching on Ethernet headers";
621                         goto exit_item_not_supported;
622                 }
623                 for (i = 0; proc->next_item && proc->next_item[i]; ++i) {
624                         if (proc->next_item[i] == item->type) {
625                                 next = &mlx4_flow_proc_item_list[item->type];
626                                 break;
627                         }
628                 }
629                 if (!next)
630                         goto exit_item_not_supported;
631                 proc = next;
632                 /*
633                  * Perform basic sanity checks only once, while handle is
634                  * not allocated.
635                  */
636                 if (flow == &temp) {
637                         err = mlx4_flow_item_check(item, proc, error);
638                         if (err)
639                                 return err;
640                 }
641                 if (proc->merge) {
642                         err = proc->merge(flow, item, proc, error);
643                         if (err)
644                                 return err;
645                 }
646                 flow->ibv_attr_size += proc->dst_sz;
647         }
648         /* Go over actions list. */
649         for (action = actions; action->type; ++action) {
650                 switch (action->type) {
651                         const struct rte_flow_action_queue *queue;
652
653                 case RTE_FLOW_ACTION_TYPE_VOID:
654                         continue;
655                 case RTE_FLOW_ACTION_TYPE_DROP:
656                         flow->drop = 1;
657                         break;
658                 case RTE_FLOW_ACTION_TYPE_QUEUE:
659                         queue = action->conf;
660                         if (queue->index >= priv->dev->data->nb_rx_queues)
661                                 goto exit_action_not_supported;
662                         flow->queue = 1;
663                         flow->queue_id = queue->index;
664                         break;
665                 default:
666                         goto exit_action_not_supported;
667                 }
668         }
669         if (!flow->queue && !flow->drop)
670                 return rte_flow_error_set
671                         (error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
672                          NULL, "no valid action");
673         /* Validation ends here. */
674         if (!addr)
675                 return 0;
676         if (flow == &temp) {
677                 /* Allocate proper handle based on collected data. */
678                 const struct mlx4_malloc_vec vec[] = {
679                         {
680                                 .align = alignof(struct rte_flow),
681                                 .size = sizeof(*flow),
682                                 .addr = (void **)&flow,
683                         },
684                         {
685                                 .align = alignof(struct ibv_flow_attr),
686                                 .size = temp.ibv_attr_size,
687                                 .addr = (void **)&temp.ibv_attr,
688                         },
689                 };
690
691                 if (!mlx4_zmallocv(__func__, vec, RTE_DIM(vec)))
692                         return rte_flow_error_set
693                                 (error, -rte_errno,
694                                  RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
695                                  "flow rule handle allocation failure");
696                 /* Most fields will be updated by second pass. */
697                 *flow = (struct rte_flow){
698                         .ibv_attr = temp.ibv_attr,
699                         .ibv_attr_size = sizeof(*flow->ibv_attr),
700                 };
701                 *flow->ibv_attr = (struct ibv_flow_attr){
702                         .type = IBV_FLOW_ATTR_NORMAL,
703                         .size = sizeof(*flow->ibv_attr),
704                         .priority = attr->priority,
705                         .port = priv->port,
706                 };
707                 goto fill;
708         }
709         *addr = flow;
710         return 0;
711 exit_item_not_supported:
712         return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
713                                   item, msg ? msg : "item not supported");
714 exit_action_not_supported:
715         return rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
716                                   action, "action not supported");
717 }
718
719 /**
720  * Validate a flow supported by the NIC.
721  *
722  * @see rte_flow_validate()
723  * @see rte_flow_ops
724  */
725 static int
726 mlx4_flow_validate(struct rte_eth_dev *dev,
727                    const struct rte_flow_attr *attr,
728                    const struct rte_flow_item pattern[],
729                    const struct rte_flow_action actions[],
730                    struct rte_flow_error *error)
731 {
732         struct priv *priv = dev->data->dev_private;
733
734         return mlx4_flow_prepare(priv, attr, pattern, actions, error, NULL);
735 }
736
737 /**
738  * Get a drop flow rule resources instance.
739  *
740  * @param priv
741  *   Pointer to private structure.
742  *
743  * @return
744  *   Pointer to drop flow resources on success, NULL otherwise and rte_errno
745  *   is set.
746  */
747 static struct mlx4_drop *
748 mlx4_drop_get(struct priv *priv)
749 {
750         struct mlx4_drop *drop = priv->drop;
751
752         if (drop) {
753                 assert(drop->refcnt);
754                 assert(drop->priv == priv);
755                 ++drop->refcnt;
756                 return drop;
757         }
758         drop = rte_malloc(__func__, sizeof(*drop), 0);
759         if (!drop)
760                 goto error;
761         *drop = (struct mlx4_drop){
762                 .priv = priv,
763                 .refcnt = 1,
764         };
765         drop->cq = ibv_create_cq(priv->ctx, 1, NULL, NULL, 0);
766         if (!drop->cq)
767                 goto error;
768         drop->qp = ibv_create_qp(priv->pd,
769                                  &(struct ibv_qp_init_attr){
770                                         .send_cq = drop->cq,
771                                         .recv_cq = drop->cq,
772                                         .qp_type = IBV_QPT_RAW_PACKET,
773                                  });
774         if (!drop->qp)
775                 goto error;
776         priv->drop = drop;
777         return drop;
778 error:
779         if (drop->qp)
780                 claim_zero(ibv_destroy_qp(drop->qp));
781         if (drop->cq)
782                 claim_zero(ibv_destroy_cq(drop->cq));
783         if (drop)
784                 rte_free(drop);
785         rte_errno = ENOMEM;
786         return NULL;
787 }
788
789 /**
790  * Give back a drop flow rule resources instance.
791  *
792  * @param drop
793  *   Pointer to drop flow rule resources.
794  */
795 static void
796 mlx4_drop_put(struct mlx4_drop *drop)
797 {
798         assert(drop->refcnt);
799         if (--drop->refcnt)
800                 return;
801         drop->priv->drop = NULL;
802         claim_zero(ibv_destroy_qp(drop->qp));
803         claim_zero(ibv_destroy_cq(drop->cq));
804         rte_free(drop);
805 }
806
807 /**
808  * Toggle a configured flow rule.
809  *
810  * @param priv
811  *   Pointer to private structure.
812  * @param flow
813  *   Flow rule handle to toggle.
814  * @param enable
815  *   Whether associated Verbs flow must be created or removed.
816  * @param[out] error
817  *   Perform verbose error reporting if not NULL.
818  *
819  * @return
820  *   0 on success, a negative errno value otherwise and rte_errno is set.
821  */
822 static int
823 mlx4_flow_toggle(struct priv *priv,
824                  struct rte_flow *flow,
825                  int enable,
826                  struct rte_flow_error *error)
827 {
828         struct ibv_qp *qp = NULL;
829         const char *msg;
830         int err;
831
832         if (!enable) {
833                 if (!flow->ibv_flow)
834                         return 0;
835                 claim_zero(ibv_destroy_flow(flow->ibv_flow));
836                 flow->ibv_flow = NULL;
837                 if (flow->drop)
838                         mlx4_drop_put(priv->drop);
839                 return 0;
840         }
841         assert(flow->ibv_attr);
842         if (!flow->internal &&
843             !priv->isolated &&
844             flow->ibv_attr->priority == MLX4_FLOW_PRIORITY_LAST) {
845                 if (flow->ibv_flow) {
846                         claim_zero(ibv_destroy_flow(flow->ibv_flow));
847                         flow->ibv_flow = NULL;
848                         if (flow->drop)
849                                 mlx4_drop_put(priv->drop);
850                 }
851                 err = EACCES;
852                 msg = ("priority level "
853                        MLX4_STR_EXPAND(MLX4_FLOW_PRIORITY_LAST)
854                        " is reserved when not in isolated mode");
855                 goto error;
856         }
857         if (flow->queue) {
858                 struct rxq *rxq = NULL;
859
860                 if (flow->queue_id < priv->dev->data->nb_rx_queues)
861                         rxq = priv->dev->data->rx_queues[flow->queue_id];
862                 if (flow->ibv_flow) {
863                         if (!rxq ^ !flow->drop)
864                                 return 0;
865                         /* Verbs flow needs updating. */
866                         claim_zero(ibv_destroy_flow(flow->ibv_flow));
867                         flow->ibv_flow = NULL;
868                         if (flow->drop)
869                                 mlx4_drop_put(priv->drop);
870                 }
871                 if (rxq)
872                         qp = rxq->qp;
873                 /* A missing target queue drops traffic implicitly. */
874                 flow->drop = !rxq;
875         }
876         if (flow->drop) {
877                 mlx4_drop_get(priv);
878                 if (!priv->drop) {
879                         err = rte_errno;
880                         msg = "resources for drop flow rule cannot be created";
881                         goto error;
882                 }
883                 qp = priv->drop->qp;
884         }
885         assert(qp);
886         if (flow->ibv_flow)
887                 return 0;
888         flow->ibv_flow = ibv_create_flow(qp, flow->ibv_attr);
889         if (flow->ibv_flow)
890                 return 0;
891         if (flow->drop)
892                 mlx4_drop_put(priv->drop);
893         err = errno;
894         msg = "flow rule rejected by device";
895 error:
896         return rte_flow_error_set
897                 (error, err, RTE_FLOW_ERROR_TYPE_HANDLE, flow, msg);
898 }
899
900 /**
901  * Create a flow.
902  *
903  * @see rte_flow_create()
904  * @see rte_flow_ops
905  */
906 static struct rte_flow *
907 mlx4_flow_create(struct rte_eth_dev *dev,
908                  const struct rte_flow_attr *attr,
909                  const struct rte_flow_item pattern[],
910                  const struct rte_flow_action actions[],
911                  struct rte_flow_error *error)
912 {
913         struct priv *priv = dev->data->dev_private;
914         struct rte_flow *flow;
915         int err;
916
917         err = mlx4_flow_prepare(priv, attr, pattern, actions, error, &flow);
918         if (err)
919                 return NULL;
920         err = mlx4_flow_toggle(priv, flow, priv->started, error);
921         if (!err) {
922                 struct rte_flow *curr = LIST_FIRST(&priv->flows);
923
924                 /* New rules are inserted after internal ones. */
925                 if (!curr || !curr->internal) {
926                         LIST_INSERT_HEAD(&priv->flows, flow, next);
927                 } else {
928                         while (LIST_NEXT(curr, next) &&
929                                LIST_NEXT(curr, next)->internal)
930                                 curr = LIST_NEXT(curr, next);
931                         LIST_INSERT_AFTER(curr, flow, next);
932                 }
933                 return flow;
934         }
935         rte_flow_error_set(error, -err, RTE_FLOW_ERROR_TYPE_UNSPECIFIED, NULL,
936                            error->message);
937         rte_free(flow);
938         return NULL;
939 }
940
941 /**
942  * Configure isolated mode.
943  *
944  * @see rte_flow_isolate()
945  * @see rte_flow_ops
946  */
947 static int
948 mlx4_flow_isolate(struct rte_eth_dev *dev,
949                   int enable,
950                   struct rte_flow_error *error)
951 {
952         struct priv *priv = dev->data->dev_private;
953
954         if (!!enable == !!priv->isolated)
955                 return 0;
956         priv->isolated = !!enable;
957         if (mlx4_flow_sync(priv, error)) {
958                 priv->isolated = !enable;
959                 return -rte_errno;
960         }
961         return 0;
962 }
963
964 /**
965  * Destroy a flow rule.
966  *
967  * @see rte_flow_destroy()
968  * @see rte_flow_ops
969  */
970 static int
971 mlx4_flow_destroy(struct rte_eth_dev *dev,
972                   struct rte_flow *flow,
973                   struct rte_flow_error *error)
974 {
975         struct priv *priv = dev->data->dev_private;
976         int err = mlx4_flow_toggle(priv, flow, 0, error);
977
978         if (err)
979                 return err;
980         LIST_REMOVE(flow, next);
981         rte_free(flow);
982         return 0;
983 }
984
985 /**
986  * Destroy user-configured flow rules.
987  *
988  * This function skips internal flows rules.
989  *
990  * @see rte_flow_flush()
991  * @see rte_flow_ops
992  */
993 static int
994 mlx4_flow_flush(struct rte_eth_dev *dev,
995                 struct rte_flow_error *error)
996 {
997         struct priv *priv = dev->data->dev_private;
998         struct rte_flow *flow = LIST_FIRST(&priv->flows);
999
1000         while (flow) {
1001                 struct rte_flow *next = LIST_NEXT(flow, next);
1002
1003                 if (!flow->internal)
1004                         mlx4_flow_destroy(dev, flow, error);
1005                 flow = next;
1006         }
1007         return 0;
1008 }
1009
1010 /**
1011  * Generate internal flow rules.
1012  *
1013  * @param priv
1014  *   Pointer to private structure.
1015  * @param[out] error
1016  *   Perform verbose error reporting if not NULL.
1017  *
1018  * @return
1019  *   0 on success, a negative errno value otherwise and rte_errno is set.
1020  */
1021 static int
1022 mlx4_flow_internal(struct priv *priv, struct rte_flow_error *error)
1023 {
1024         struct rte_flow_attr attr = {
1025                 .priority = MLX4_FLOW_PRIORITY_LAST,
1026                 .ingress = 1,
1027         };
1028         struct rte_flow_item pattern[] = {
1029                 {
1030                         .type = MLX4_FLOW_ITEM_TYPE_INTERNAL,
1031                 },
1032                 {
1033                         .type = RTE_FLOW_ITEM_TYPE_ETH,
1034                         .spec = &(struct rte_flow_item_eth){
1035                                 .dst = priv->mac,
1036                         },
1037                         .mask = &(struct rte_flow_item_eth){
1038                                 .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
1039                         },
1040                 },
1041                 {
1042                         .type = RTE_FLOW_ITEM_TYPE_END,
1043                 },
1044         };
1045         struct rte_flow_action actions[] = {
1046                 {
1047                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
1048                         .conf = &(struct rte_flow_action_queue){
1049                                 .index = 0,
1050                         },
1051                 },
1052                 {
1053                         .type = RTE_FLOW_ACTION_TYPE_END,
1054                 },
1055         };
1056
1057         if (!mlx4_flow_create(priv->dev, &attr, pattern, actions, error))
1058                 return -rte_errno;
1059         return 0;
1060 }
1061
1062 /**
1063  * Synchronize flow rules.
1064  *
1065  * This function synchronizes flow rules with the state of the device by
1066  * taking into account isolated mode and whether target queues are
1067  * configured.
1068  *
1069  * @param priv
1070  *   Pointer to private structure.
1071  * @param[out] error
1072  *   Perform verbose error reporting if not NULL.
1073  *
1074  * @return
1075  *   0 on success, a negative errno value otherwise and rte_errno is set.
1076  */
1077 int
1078 mlx4_flow_sync(struct priv *priv, struct rte_flow_error *error)
1079 {
1080         struct rte_flow *flow;
1081         int ret;
1082
1083         /* Internal flow rules are guaranteed to come first in the list. */
1084         if (priv->isolated) {
1085                 /*
1086                  * Get rid of them in isolated mode, stop at the first
1087                  * non-internal rule found.
1088                  */
1089                 for (flow = LIST_FIRST(&priv->flows);
1090                      flow && flow->internal;
1091                      flow = LIST_FIRST(&priv->flows))
1092                         claim_zero(mlx4_flow_destroy(priv->dev, flow, error));
1093         } else if (!LIST_FIRST(&priv->flows) ||
1094                    !LIST_FIRST(&priv->flows)->internal) {
1095                 /*
1096                  * If the first rule is not internal outside isolated mode,
1097                  * they must be added back.
1098                  */
1099                 ret = mlx4_flow_internal(priv, error);
1100                 if (ret)
1101                         return ret;
1102         }
1103         /* Toggle the remaining flow rules . */
1104         for (flow = LIST_FIRST(&priv->flows);
1105              flow;
1106              flow = LIST_NEXT(flow, next)) {
1107                 ret = mlx4_flow_toggle(priv, flow, priv->started, error);
1108                 if (ret)
1109                         return ret;
1110         }
1111         if (!priv->started)
1112                 assert(!priv->drop);
1113         return 0;
1114 }
1115
1116 /**
1117  * Clean up all flow rules.
1118  *
1119  * Unlike mlx4_flow_flush(), this function takes care of all remaining flow
1120  * rules regardless of whether they are internal or user-configured.
1121  *
1122  * @param priv
1123  *   Pointer to private structure.
1124  */
1125 void
1126 mlx4_flow_clean(struct priv *priv)
1127 {
1128         struct rte_flow *flow;
1129
1130         while ((flow = LIST_FIRST(&priv->flows)))
1131                 mlx4_flow_destroy(priv->dev, flow, NULL);
1132 }
1133
1134 static const struct rte_flow_ops mlx4_flow_ops = {
1135         .validate = mlx4_flow_validate,
1136         .create = mlx4_flow_create,
1137         .destroy = mlx4_flow_destroy,
1138         .flush = mlx4_flow_flush,
1139         .isolate = mlx4_flow_isolate,
1140 };
1141
1142 /**
1143  * Manage filter operations.
1144  *
1145  * @param dev
1146  *   Pointer to Ethernet device structure.
1147  * @param filter_type
1148  *   Filter type.
1149  * @param filter_op
1150  *   Operation to perform.
1151  * @param arg
1152  *   Pointer to operation-specific structure.
1153  *
1154  * @return
1155  *   0 on success, negative errno value otherwise and rte_errno is set.
1156  */
1157 int
1158 mlx4_filter_ctrl(struct rte_eth_dev *dev,
1159                  enum rte_filter_type filter_type,
1160                  enum rte_filter_op filter_op,
1161                  void *arg)
1162 {
1163         switch (filter_type) {
1164         case RTE_ETH_FILTER_GENERIC:
1165                 if (filter_op != RTE_ETH_FILTER_GET)
1166                         break;
1167                 *(const void **)arg = &mlx4_flow_ops;
1168                 return 0;
1169         default:
1170                 ERROR("%p: filter type (%d) not supported",
1171                       (void *)dev, filter_type);
1172                 break;
1173         }
1174         rte_errno = ENOTSUP;
1175         return -rte_errno;
1176 }