net/mlx5: use flow to enable promiscuous mode
[dpdk.git] / drivers / net / mlx5 / mlx5_flow.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright 2016 6WIND S.A.
5  *   Copyright 2016 Mellanox.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of 6WIND S.A. nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <sys/queue.h>
35 #include <string.h>
36
37 /* Verbs header. */
38 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
39 #ifdef PEDANTIC
40 #pragma GCC diagnostic ignored "-Wpedantic"
41 #endif
42 #include <infiniband/verbs.h>
43 #ifdef PEDANTIC
44 #pragma GCC diagnostic error "-Wpedantic"
45 #endif
46
47 #include <rte_ethdev.h>
48 #include <rte_flow.h>
49 #include <rte_flow_driver.h>
50 #include <rte_malloc.h>
51
52 #include "mlx5.h"
53 #include "mlx5_prm.h"
54
55 /* Define minimal priority for control plane flows. */
56 #define MLX5_CTRL_FLOW_PRIORITY 4
57
58 static int
59 mlx5_flow_create_eth(const struct rte_flow_item *item,
60                      const void *default_mask,
61                      void *data);
62
63 static int
64 mlx5_flow_create_vlan(const struct rte_flow_item *item,
65                       const void *default_mask,
66                       void *data);
67
68 static int
69 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
70                       const void *default_mask,
71                       void *data);
72
73 static int
74 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
75                       const void *default_mask,
76                       void *data);
77
78 static int
79 mlx5_flow_create_udp(const struct rte_flow_item *item,
80                      const void *default_mask,
81                      void *data);
82
83 static int
84 mlx5_flow_create_tcp(const struct rte_flow_item *item,
85                      const void *default_mask,
86                      void *data);
87
88 static int
89 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
90                        const void *default_mask,
91                        void *data);
92
93 /** Structure for Drop queue. */
94 struct mlx5_hrxq_drop {
95         struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
96         struct ibv_qp *qp; /**< Verbs queue pair. */
97         struct ibv_wq *wq; /**< Verbs work queue. */
98         struct ibv_cq *cq; /**< Verbs completion queue. */
99 };
100
101 /* Flows structures. */
102 struct mlx5_flow {
103         uint64_t hash_fields; /**< Fields that participate in the hash. */
104         struct mlx5_hrxq *hrxq; /**< Hash Rx queues. */
105 };
106
107 /* Drop flows structures. */
108 struct mlx5_flow_drop {
109         struct mlx5_hrxq_drop hrxq; /**< Drop hash Rx queue. */
110 };
111
112 struct rte_flow {
113         TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
114         uint32_t mark:1; /**< Set if the flow is marked. */
115         uint32_t drop:1; /**< Drop queue. */
116         struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
117         struct ibv_flow *ibv_flow; /**< Verbs flow. */
118         uint16_t queues_n; /**< Number of entries in queue[]. */
119         uint16_t (*queues)[]; /**< Queues indexes to use. */
120         union {
121                 struct mlx5_flow frxq; /**< Flow with Rx queue. */
122                 struct mlx5_flow_drop drxq; /**< Flow with drop Rx queue. */
123         };
124 };
125
126 /** Static initializer for items. */
127 #define ITEMS(...) \
128         (const enum rte_flow_item_type []){ \
129                 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
130         }
131
132 /** Structure to generate a simple graph of layers supported by the NIC. */
133 struct mlx5_flow_items {
134         /** List of possible actions for these items. */
135         const enum rte_flow_action_type *const actions;
136         /** Bit-masks corresponding to the possibilities for the item. */
137         const void *mask;
138         /**
139          * Default bit-masks to use when item->mask is not provided. When
140          * \default_mask is also NULL, the full supported bit-mask (\mask) is
141          * used instead.
142          */
143         const void *default_mask;
144         /** Bit-masks size in bytes. */
145         const unsigned int mask_sz;
146         /**
147          * Conversion function from rte_flow to NIC specific flow.
148          *
149          * @param item
150          *   rte_flow item to convert.
151          * @param default_mask
152          *   Default bit-masks to use when item->mask is not provided.
153          * @param data
154          *   Internal structure to store the conversion.
155          *
156          * @return
157          *   0 on success, negative value otherwise.
158          */
159         int (*convert)(const struct rte_flow_item *item,
160                        const void *default_mask,
161                        void *data);
162         /** Size in bytes of the destination structure. */
163         const unsigned int dst_sz;
164         /** List of possible following items.  */
165         const enum rte_flow_item_type *const items;
166 };
167
168 /** Valid action for this PMD. */
169 static const enum rte_flow_action_type valid_actions[] = {
170         RTE_FLOW_ACTION_TYPE_DROP,
171         RTE_FLOW_ACTION_TYPE_QUEUE,
172         RTE_FLOW_ACTION_TYPE_MARK,
173         RTE_FLOW_ACTION_TYPE_FLAG,
174         RTE_FLOW_ACTION_TYPE_END,
175 };
176
177 /** Graph of supported items and associated actions. */
178 static const struct mlx5_flow_items mlx5_flow_items[] = {
179         [RTE_FLOW_ITEM_TYPE_END] = {
180                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
181                                RTE_FLOW_ITEM_TYPE_VXLAN),
182         },
183         [RTE_FLOW_ITEM_TYPE_ETH] = {
184                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
185                                RTE_FLOW_ITEM_TYPE_IPV4,
186                                RTE_FLOW_ITEM_TYPE_IPV6),
187                 .actions = valid_actions,
188                 .mask = &(const struct rte_flow_item_eth){
189                         .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
190                         .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
191                         .type = -1,
192                 },
193                 .default_mask = &rte_flow_item_eth_mask,
194                 .mask_sz = sizeof(struct rte_flow_item_eth),
195                 .convert = mlx5_flow_create_eth,
196                 .dst_sz = sizeof(struct ibv_flow_spec_eth),
197         },
198         [RTE_FLOW_ITEM_TYPE_VLAN] = {
199                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
200                                RTE_FLOW_ITEM_TYPE_IPV6),
201                 .actions = valid_actions,
202                 .mask = &(const struct rte_flow_item_vlan){
203                         .tci = -1,
204                 },
205                 .default_mask = &rte_flow_item_vlan_mask,
206                 .mask_sz = sizeof(struct rte_flow_item_vlan),
207                 .convert = mlx5_flow_create_vlan,
208                 .dst_sz = 0,
209         },
210         [RTE_FLOW_ITEM_TYPE_IPV4] = {
211                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
212                                RTE_FLOW_ITEM_TYPE_TCP),
213                 .actions = valid_actions,
214                 .mask = &(const struct rte_flow_item_ipv4){
215                         .hdr = {
216                                 .src_addr = -1,
217                                 .dst_addr = -1,
218                                 .type_of_service = -1,
219                                 .next_proto_id = -1,
220                         },
221                 },
222                 .default_mask = &rte_flow_item_ipv4_mask,
223                 .mask_sz = sizeof(struct rte_flow_item_ipv4),
224                 .convert = mlx5_flow_create_ipv4,
225                 .dst_sz = sizeof(struct ibv_flow_spec_ipv4),
226         },
227         [RTE_FLOW_ITEM_TYPE_IPV6] = {
228                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
229                                RTE_FLOW_ITEM_TYPE_TCP),
230                 .actions = valid_actions,
231                 .mask = &(const struct rte_flow_item_ipv6){
232                         .hdr = {
233                                 .src_addr = {
234                                         0xff, 0xff, 0xff, 0xff,
235                                         0xff, 0xff, 0xff, 0xff,
236                                         0xff, 0xff, 0xff, 0xff,
237                                         0xff, 0xff, 0xff, 0xff,
238                                 },
239                                 .dst_addr = {
240                                         0xff, 0xff, 0xff, 0xff,
241                                         0xff, 0xff, 0xff, 0xff,
242                                         0xff, 0xff, 0xff, 0xff,
243                                         0xff, 0xff, 0xff, 0xff,
244                                 },
245                                 .vtc_flow = -1,
246                                 .proto = -1,
247                                 .hop_limits = -1,
248                         },
249                 },
250                 .default_mask = &rte_flow_item_ipv6_mask,
251                 .mask_sz = sizeof(struct rte_flow_item_ipv6),
252                 .convert = mlx5_flow_create_ipv6,
253                 .dst_sz = sizeof(struct ibv_flow_spec_ipv6),
254         },
255         [RTE_FLOW_ITEM_TYPE_UDP] = {
256                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN),
257                 .actions = valid_actions,
258                 .mask = &(const struct rte_flow_item_udp){
259                         .hdr = {
260                                 .src_port = -1,
261                                 .dst_port = -1,
262                         },
263                 },
264                 .default_mask = &rte_flow_item_udp_mask,
265                 .mask_sz = sizeof(struct rte_flow_item_udp),
266                 .convert = mlx5_flow_create_udp,
267                 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
268         },
269         [RTE_FLOW_ITEM_TYPE_TCP] = {
270                 .actions = valid_actions,
271                 .mask = &(const struct rte_flow_item_tcp){
272                         .hdr = {
273                                 .src_port = -1,
274                                 .dst_port = -1,
275                         },
276                 },
277                 .default_mask = &rte_flow_item_tcp_mask,
278                 .mask_sz = sizeof(struct rte_flow_item_tcp),
279                 .convert = mlx5_flow_create_tcp,
280                 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
281         },
282         [RTE_FLOW_ITEM_TYPE_VXLAN] = {
283                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
284                 .actions = valid_actions,
285                 .mask = &(const struct rte_flow_item_vxlan){
286                         .vni = "\xff\xff\xff",
287                 },
288                 .default_mask = &rte_flow_item_vxlan_mask,
289                 .mask_sz = sizeof(struct rte_flow_item_vxlan),
290                 .convert = mlx5_flow_create_vxlan,
291                 .dst_sz = sizeof(struct ibv_flow_spec_tunnel),
292         },
293 };
294
295 /* Structure to parse actions. */
296 struct mlx5_flow_action {
297         uint32_t queue:1; /**< Target is a receive queue. */
298         uint32_t drop:1; /**< Target is a drop queue. */
299         uint32_t mark:1; /**< Mark is present in the flow. */
300         uint32_t mark_id; /**< Mark identifier. */
301         uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
302         uint16_t queues_n; /**< Number of entries in queue[]. */
303 };
304
305 /** Structure to pass to the conversion function. */
306 struct mlx5_flow_parse {
307         struct ibv_flow_attr *ibv_attr; /**< Verbs attribute. */
308         unsigned int offset; /**< Offset in bytes in the ibv_attr buffer. */
309         uint32_t inner; /**< Set once VXLAN is encountered. */
310         uint64_t hash_fields; /**< Fields that participate in the hash. */
311         struct mlx5_flow_action actions; /**< Parsed action result. */
312 };
313
314 static const struct rte_flow_ops mlx5_flow_ops = {
315         .validate = mlx5_flow_validate,
316         .create = mlx5_flow_create,
317         .destroy = mlx5_flow_destroy,
318         .flush = mlx5_flow_flush,
319         .query = NULL,
320         .isolate = mlx5_flow_isolate,
321 };
322
323 /**
324  * Manage filter operations.
325  *
326  * @param dev
327  *   Pointer to Ethernet device structure.
328  * @param filter_type
329  *   Filter type.
330  * @param filter_op
331  *   Operation to perform.
332  * @param arg
333  *   Pointer to operation-specific structure.
334  *
335  * @return
336  *   0 on success, negative errno value on failure.
337  */
338 int
339 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
340                      enum rte_filter_type filter_type,
341                      enum rte_filter_op filter_op,
342                      void *arg)
343 {
344         int ret = EINVAL;
345
346         if (filter_type == RTE_ETH_FILTER_GENERIC) {
347                 if (filter_op != RTE_ETH_FILTER_GET)
348                         return -EINVAL;
349                 *(const void **)arg = &mlx5_flow_ops;
350                 return 0;
351         }
352         ERROR("%p: filter type (%d) not supported",
353               (void *)dev, filter_type);
354         return -ret;
355 }
356
357 /**
358  * Check support for a given item.
359  *
360  * @param item[in]
361  *   Item specification.
362  * @param mask[in]
363  *   Bit-masks covering supported fields to compare with spec, last and mask in
364  *   \item.
365  * @param size
366  *   Bit-Mask size in bytes.
367  *
368  * @return
369  *   0 on success.
370  */
371 static int
372 mlx5_flow_item_validate(const struct rte_flow_item *item,
373                         const uint8_t *mask, unsigned int size)
374 {
375         int ret = 0;
376
377         if (!item->spec && (item->mask || item->last))
378                 return -1;
379         if (item->spec && !item->mask) {
380                 unsigned int i;
381                 const uint8_t *spec = item->spec;
382
383                 for (i = 0; i < size; ++i)
384                         if ((spec[i] | mask[i]) != mask[i])
385                                 return -1;
386         }
387         if (item->last && !item->mask) {
388                 unsigned int i;
389                 const uint8_t *spec = item->last;
390
391                 for (i = 0; i < size; ++i)
392                         if ((spec[i] | mask[i]) != mask[i])
393                                 return -1;
394         }
395         if (item->mask) {
396                 unsigned int i;
397                 const uint8_t *spec = item->mask;
398
399                 for (i = 0; i < size; ++i)
400                         if ((spec[i] | mask[i]) != mask[i])
401                                 return -1;
402         }
403         if (item->spec && item->last) {
404                 uint8_t spec[size];
405                 uint8_t last[size];
406                 const uint8_t *apply = mask;
407                 unsigned int i;
408
409                 if (item->mask)
410                         apply = item->mask;
411                 for (i = 0; i < size; ++i) {
412                         spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
413                         last[i] = ((const uint8_t *)item->last)[i] & apply[i];
414                 }
415                 ret = memcmp(spec, last, size);
416         }
417         return ret;
418 }
419
420 /**
421  * Validate a flow supported by the NIC.
422  *
423  * @param priv
424  *   Pointer to private structure.
425  * @param[in] attr
426  *   Flow rule attributes.
427  * @param[in] pattern
428  *   Pattern specification (list terminated by the END pattern item).
429  * @param[in] actions
430  *   Associated actions (list terminated by the END action).
431  * @param[out] error
432  *   Perform verbose error reporting if not NULL.
433  * @param[in, out] flow
434  *   Flow structure to update.
435  *
436  * @return
437  *   0 on success, a negative errno value otherwise and rte_errno is set.
438  */
439 static int
440 priv_flow_validate(struct priv *priv,
441                    const struct rte_flow_attr *attr,
442                    const struct rte_flow_item items[],
443                    const struct rte_flow_action actions[],
444                    struct rte_flow_error *error,
445                    struct mlx5_flow_parse *flow)
446 {
447         const struct mlx5_flow_items *cur_item = mlx5_flow_items;
448
449         (void)priv;
450         if (attr->group) {
451                 rte_flow_error_set(error, ENOTSUP,
452                                    RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
453                                    NULL,
454                                    "groups are not supported");
455                 return -rte_errno;
456         }
457         if (attr->priority && attr->priority != MLX5_CTRL_FLOW_PRIORITY) {
458                 rte_flow_error_set(error, ENOTSUP,
459                                    RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
460                                    NULL,
461                                    "priorities are not supported");
462                 return -rte_errno;
463         }
464         if (attr->egress) {
465                 rte_flow_error_set(error, ENOTSUP,
466                                    RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
467                                    NULL,
468                                    "egress is not supported");
469                 return -rte_errno;
470         }
471         if (!attr->ingress) {
472                 rte_flow_error_set(error, ENOTSUP,
473                                    RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
474                                    NULL,
475                                    "only ingress is supported");
476                 return -rte_errno;
477         }
478         for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
479                 const struct mlx5_flow_items *token = NULL;
480                 unsigned int i;
481                 int err;
482
483                 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
484                         continue;
485                 for (i = 0;
486                      cur_item->items &&
487                      cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
488                      ++i) {
489                         if (cur_item->items[i] == items->type) {
490                                 token = &mlx5_flow_items[items->type];
491                                 break;
492                         }
493                 }
494                 if (!token)
495                         goto exit_item_not_supported;
496                 cur_item = token;
497                 err = mlx5_flow_item_validate(items,
498                                               (const uint8_t *)cur_item->mask,
499                                               cur_item->mask_sz);
500                 if (err)
501                         goto exit_item_not_supported;
502                 if (flow->ibv_attr && cur_item->convert) {
503                         err = cur_item->convert(items,
504                                                 (cur_item->default_mask ?
505                                                  cur_item->default_mask :
506                                                  cur_item->mask),
507                                                 flow);
508                         if (err)
509                                 goto exit_item_not_supported;
510                 } else if (items->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
511                         if (flow->inner) {
512                                 rte_flow_error_set(error, ENOTSUP,
513                                                    RTE_FLOW_ERROR_TYPE_ITEM,
514                                                    items,
515                                                    "cannot recognize multiple"
516                                                    " VXLAN encapsulations");
517                                 return -rte_errno;
518                         }
519                         flow->inner = 1;
520                 }
521                 flow->offset += cur_item->dst_sz;
522         }
523         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
524                 if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
525                         continue;
526                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
527                         flow->actions.drop = 1;
528                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
529                         const struct rte_flow_action_queue *queue =
530                                 (const struct rte_flow_action_queue *)
531                                 actions->conf;
532                         uint16_t n;
533                         uint16_t found = 0;
534
535                         if (!queue || (queue->index > (priv->rxqs_n - 1)))
536                                 goto exit_action_not_supported;
537                         for (n = 0; n < flow->actions.queues_n; ++n) {
538                                 if (flow->actions.queues[n] == queue->index) {
539                                         found = 1;
540                                         break;
541                                 }
542                         }
543                         if (flow->actions.queues_n > 1 && !found) {
544                                 rte_flow_error_set(error, ENOTSUP,
545                                            RTE_FLOW_ERROR_TYPE_ACTION,
546                                            actions,
547                                            "queue action not in RSS queues");
548                                 return -rte_errno;
549                         }
550                         if (!found) {
551                                 flow->actions.queue = 1;
552                                 flow->actions.queues_n = 1;
553                                 flow->actions.queues[0] = queue->index;
554                         }
555                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
556                         const struct rte_flow_action_rss *rss =
557                                 (const struct rte_flow_action_rss *)
558                                 actions->conf;
559                         uint16_t n;
560
561                         if (!rss || !rss->num) {
562                                 rte_flow_error_set(error, EINVAL,
563                                                    RTE_FLOW_ERROR_TYPE_ACTION,
564                                                    actions,
565                                                    "no valid queues");
566                                 return -rte_errno;
567                         }
568                         if (flow->actions.queues_n == 1) {
569                                 uint16_t found = 0;
570
571                                 assert(flow->actions.queues_n);
572                                 for (n = 0; n < rss->num; ++n) {
573                                         if (flow->actions.queues[0] ==
574                                             rss->queue[n]) {
575                                                 found = 1;
576                                                 break;
577                                         }
578                                 }
579                                 if (!found) {
580                                         rte_flow_error_set(error, ENOTSUP,
581                                                    RTE_FLOW_ERROR_TYPE_ACTION,
582                                                    actions,
583                                                    "queue action not in RSS"
584                                                    " queues");
585                                         return -rte_errno;
586                                 }
587                         }
588                         for (n = 0; n < rss->num; ++n) {
589                                 if (rss->queue[n] >= priv->rxqs_n) {
590                                         rte_flow_error_set(error, EINVAL,
591                                                    RTE_FLOW_ERROR_TYPE_ACTION,
592                                                    actions,
593                                                    "queue id > number of"
594                                                    " queues");
595                                         return -rte_errno;
596                                 }
597                         }
598                         flow->actions.queue = 1;
599                         for (n = 0; n < rss->num; ++n)
600                                 flow->actions.queues[n] = rss->queue[n];
601                         flow->actions.queues_n = rss->num;
602                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
603                         const struct rte_flow_action_mark *mark =
604                                 (const struct rte_flow_action_mark *)
605                                 actions->conf;
606
607                         if (!mark) {
608                                 rte_flow_error_set(error, EINVAL,
609                                                    RTE_FLOW_ERROR_TYPE_ACTION,
610                                                    actions,
611                                                    "mark must be defined");
612                                 return -rte_errno;
613                         } else if (mark->id >= MLX5_FLOW_MARK_MAX) {
614                                 rte_flow_error_set(error, ENOTSUP,
615                                                    RTE_FLOW_ERROR_TYPE_ACTION,
616                                                    actions,
617                                                    "mark must be between 0"
618                                                    " and 16777199");
619                                 return -rte_errno;
620                         }
621                         flow->actions.mark = 1;
622                         flow->actions.mark_id = mark->id;
623                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
624                         flow->actions.mark = 1;
625                 } else {
626                         goto exit_action_not_supported;
627                 }
628         }
629         if (flow->actions.mark && !flow->ibv_attr && !flow->actions.drop)
630                 flow->offset += sizeof(struct ibv_flow_spec_action_tag);
631         if (!flow->ibv_attr && flow->actions.drop)
632                 flow->offset += sizeof(struct ibv_flow_spec_action_drop);
633         if (!flow->actions.queue && !flow->actions.drop) {
634                 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
635                                    NULL, "no valid action");
636                 return -rte_errno;
637         }
638         return 0;
639 exit_item_not_supported:
640         rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
641                            items, "item not supported");
642         return -rte_errno;
643 exit_action_not_supported:
644         rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
645                            actions, "action not supported");
646         return -rte_errno;
647 }
648
649 /**
650  * Validate a flow supported by the NIC.
651  *
652  * @see rte_flow_validate()
653  * @see rte_flow_ops
654  */
655 int
656 mlx5_flow_validate(struct rte_eth_dev *dev,
657                    const struct rte_flow_attr *attr,
658                    const struct rte_flow_item items[],
659                    const struct rte_flow_action actions[],
660                    struct rte_flow_error *error)
661 {
662         struct priv *priv = dev->data->dev_private;
663         int ret;
664         struct mlx5_flow_parse flow = {
665                 .offset = sizeof(struct ibv_flow_attr),
666                 .actions = {
667                         .mark_id = MLX5_FLOW_MARK_DEFAULT,
668                         .queues_n = 0,
669                 },
670         };
671
672         priv_lock(priv);
673         ret = priv_flow_validate(priv, attr, items, actions, error, &flow);
674         priv_unlock(priv);
675         return ret;
676 }
677
678 /**
679  * Convert Ethernet item to Verbs specification.
680  *
681  * @param item[in]
682  *   Item specification.
683  * @param default_mask[in]
684  *   Default bit-masks to use when item->mask is not provided.
685  * @param data[in, out]
686  *   User structure.
687  */
688 static int
689 mlx5_flow_create_eth(const struct rte_flow_item *item,
690                      const void *default_mask,
691                      void *data)
692 {
693         const struct rte_flow_item_eth *spec = item->spec;
694         const struct rte_flow_item_eth *mask = item->mask;
695         struct mlx5_flow_parse *flow = (struct mlx5_flow_parse *)data;
696         struct ibv_flow_spec_eth *eth;
697         const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
698         unsigned int i;
699
700         ++flow->ibv_attr->num_of_specs;
701         flow->ibv_attr->priority = 2;
702         flow->hash_fields = 0;
703         eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
704         *eth = (struct ibv_flow_spec_eth) {
705                 .type = flow->inner | IBV_FLOW_SPEC_ETH,
706                 .size = eth_size,
707         };
708         if (!spec)
709                 return 0;
710         if (!mask)
711                 mask = default_mask;
712         memcpy(eth->val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
713         memcpy(eth->val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
714         eth->val.ether_type = spec->type;
715         memcpy(eth->mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
716         memcpy(eth->mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
717         eth->mask.ether_type = mask->type;
718         /* Remove unwanted bits from values. */
719         for (i = 0; i < ETHER_ADDR_LEN; ++i) {
720                 eth->val.dst_mac[i] &= eth->mask.dst_mac[i];
721                 eth->val.src_mac[i] &= eth->mask.src_mac[i];
722         }
723         eth->val.ether_type &= eth->mask.ether_type;
724         return 0;
725 }
726
727 /**
728  * Convert VLAN item to Verbs specification.
729  *
730  * @param item[in]
731  *   Item specification.
732  * @param default_mask[in]
733  *   Default bit-masks to use when item->mask is not provided.
734  * @param data[in, out]
735  *   User structure.
736  */
737 static int
738 mlx5_flow_create_vlan(const struct rte_flow_item *item,
739                       const void *default_mask,
740                       void *data)
741 {
742         const struct rte_flow_item_vlan *spec = item->spec;
743         const struct rte_flow_item_vlan *mask = item->mask;
744         struct mlx5_flow_parse *flow = (struct mlx5_flow_parse *)data;
745         struct ibv_flow_spec_eth *eth;
746         const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
747
748         eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset - eth_size);
749         if (!spec)
750                 return 0;
751         if (!mask)
752                 mask = default_mask;
753         eth->val.vlan_tag = spec->tci;
754         eth->mask.vlan_tag = mask->tci;
755         eth->val.vlan_tag &= eth->mask.vlan_tag;
756         return 0;
757 }
758
759 /**
760  * Convert IPv4 item to Verbs specification.
761  *
762  * @param item[in]
763  *   Item specification.
764  * @param default_mask[in]
765  *   Default bit-masks to use when item->mask is not provided.
766  * @param data[in, out]
767  *   User structure.
768  */
769 static int
770 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
771                       const void *default_mask,
772                       void *data)
773 {
774         const struct rte_flow_item_ipv4 *spec = item->spec;
775         const struct rte_flow_item_ipv4 *mask = item->mask;
776         struct mlx5_flow_parse *flow = (struct mlx5_flow_parse *)data;
777         struct ibv_flow_spec_ipv4_ext *ipv4;
778         unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4_ext);
779
780         ++flow->ibv_attr->num_of_specs;
781         flow->ibv_attr->priority = 1;
782         flow->hash_fields = (IBV_RX_HASH_SRC_IPV4 |
783                              IBV_RX_HASH_DST_IPV4);
784         ipv4 = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
785         *ipv4 = (struct ibv_flow_spec_ipv4_ext) {
786                 .type = flow->inner | IBV_FLOW_SPEC_IPV4_EXT,
787                 .size = ipv4_size,
788         };
789         if (!spec)
790                 return 0;
791         if (!mask)
792                 mask = default_mask;
793         ipv4->val = (struct ibv_flow_ipv4_ext_filter){
794                 .src_ip = spec->hdr.src_addr,
795                 .dst_ip = spec->hdr.dst_addr,
796                 .proto = spec->hdr.next_proto_id,
797                 .tos = spec->hdr.type_of_service,
798         };
799         ipv4->mask = (struct ibv_flow_ipv4_ext_filter){
800                 .src_ip = mask->hdr.src_addr,
801                 .dst_ip = mask->hdr.dst_addr,
802                 .proto = mask->hdr.next_proto_id,
803                 .tos = mask->hdr.type_of_service,
804         };
805         /* Remove unwanted bits from values. */
806         ipv4->val.src_ip &= ipv4->mask.src_ip;
807         ipv4->val.dst_ip &= ipv4->mask.dst_ip;
808         ipv4->val.proto &= ipv4->mask.proto;
809         ipv4->val.tos &= ipv4->mask.tos;
810         return 0;
811 }
812
813 /**
814  * Convert IPv6 item to Verbs specification.
815  *
816  * @param item[in]
817  *   Item specification.
818  * @param default_mask[in]
819  *   Default bit-masks to use when item->mask is not provided.
820  * @param data[in, out]
821  *   User structure.
822  */
823 static int
824 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
825                       const void *default_mask,
826                       void *data)
827 {
828         const struct rte_flow_item_ipv6 *spec = item->spec;
829         const struct rte_flow_item_ipv6 *mask = item->mask;
830         struct mlx5_flow_parse *flow = (struct mlx5_flow_parse *)data;
831         struct ibv_flow_spec_ipv6 *ipv6;
832         unsigned int ipv6_size = sizeof(struct ibv_flow_spec_ipv6);
833         unsigned int i;
834
835         ++flow->ibv_attr->num_of_specs;
836         flow->ibv_attr->priority = 1;
837         flow->hash_fields = (IBV_RX_HASH_SRC_IPV6 |
838                              IBV_RX_HASH_DST_IPV6);
839         ipv6 = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
840         *ipv6 = (struct ibv_flow_spec_ipv6) {
841                 .type = flow->inner | IBV_FLOW_SPEC_IPV6,
842                 .size = ipv6_size,
843         };
844         if (!spec)
845                 return 0;
846         if (!mask)
847                 mask = default_mask;
848         memcpy(ipv6->val.src_ip, spec->hdr.src_addr,
849                RTE_DIM(ipv6->val.src_ip));
850         memcpy(ipv6->val.dst_ip, spec->hdr.dst_addr,
851                RTE_DIM(ipv6->val.dst_ip));
852         memcpy(ipv6->mask.src_ip, mask->hdr.src_addr,
853                RTE_DIM(ipv6->mask.src_ip));
854         memcpy(ipv6->mask.dst_ip, mask->hdr.dst_addr,
855                RTE_DIM(ipv6->mask.dst_ip));
856         ipv6->mask.flow_label = mask->hdr.vtc_flow;
857         ipv6->mask.next_hdr = mask->hdr.proto;
858         ipv6->mask.hop_limit = mask->hdr.hop_limits;
859         /* Remove unwanted bits from values. */
860         for (i = 0; i < RTE_DIM(ipv6->val.src_ip); ++i) {
861                 ipv6->val.src_ip[i] &= ipv6->mask.src_ip[i];
862                 ipv6->val.dst_ip[i] &= ipv6->mask.dst_ip[i];
863         }
864         ipv6->val.flow_label &= ipv6->mask.flow_label;
865         ipv6->val.next_hdr &= ipv6->mask.next_hdr;
866         ipv6->val.hop_limit &= ipv6->mask.hop_limit;
867         return 0;
868 }
869
870 /**
871  * Convert UDP item to Verbs specification.
872  *
873  * @param item[in]
874  *   Item specification.
875  * @param default_mask[in]
876  *   Default bit-masks to use when item->mask is not provided.
877  * @param data[in, out]
878  *   User structure.
879  */
880 static int
881 mlx5_flow_create_udp(const struct rte_flow_item *item,
882                      const void *default_mask,
883                      void *data)
884 {
885         const struct rte_flow_item_udp *spec = item->spec;
886         const struct rte_flow_item_udp *mask = item->mask;
887         struct mlx5_flow_parse *flow = (struct mlx5_flow_parse *)data;
888         struct ibv_flow_spec_tcp_udp *udp;
889         unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
890
891         ++flow->ibv_attr->num_of_specs;
892         flow->ibv_attr->priority = 0;
893         flow->hash_fields |= (IBV_RX_HASH_SRC_PORT_UDP |
894                               IBV_RX_HASH_DST_PORT_UDP);
895         udp = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
896         *udp = (struct ibv_flow_spec_tcp_udp) {
897                 .type = flow->inner | IBV_FLOW_SPEC_UDP,
898                 .size = udp_size,
899         };
900         if (!spec)
901                 return 0;
902         if (!mask)
903                 mask = default_mask;
904         udp->val.dst_port = spec->hdr.dst_port;
905         udp->val.src_port = spec->hdr.src_port;
906         udp->mask.dst_port = mask->hdr.dst_port;
907         udp->mask.src_port = mask->hdr.src_port;
908         /* Remove unwanted bits from values. */
909         udp->val.src_port &= udp->mask.src_port;
910         udp->val.dst_port &= udp->mask.dst_port;
911         return 0;
912 }
913
914 /**
915  * Convert TCP item to Verbs specification.
916  *
917  * @param item[in]
918  *   Item specification.
919  * @param default_mask[in]
920  *   Default bit-masks to use when item->mask is not provided.
921  * @param data[in, out]
922  *   User structure.
923  */
924 static int
925 mlx5_flow_create_tcp(const struct rte_flow_item *item,
926                      const void *default_mask,
927                      void *data)
928 {
929         const struct rte_flow_item_tcp *spec = item->spec;
930         const struct rte_flow_item_tcp *mask = item->mask;
931         struct mlx5_flow_parse *flow = (struct mlx5_flow_parse *)data;
932         struct ibv_flow_spec_tcp_udp *tcp;
933         unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
934
935         ++flow->ibv_attr->num_of_specs;
936         flow->ibv_attr->priority = 0;
937         flow->hash_fields |= (IBV_RX_HASH_SRC_PORT_TCP |
938                               IBV_RX_HASH_DST_PORT_TCP);
939         tcp = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
940         *tcp = (struct ibv_flow_spec_tcp_udp) {
941                 .type = flow->inner | IBV_FLOW_SPEC_TCP,
942                 .size = tcp_size,
943         };
944         if (!spec)
945                 return 0;
946         if (!mask)
947                 mask = default_mask;
948         tcp->val.dst_port = spec->hdr.dst_port;
949         tcp->val.src_port = spec->hdr.src_port;
950         tcp->mask.dst_port = mask->hdr.dst_port;
951         tcp->mask.src_port = mask->hdr.src_port;
952         /* Remove unwanted bits from values. */
953         tcp->val.src_port &= tcp->mask.src_port;
954         tcp->val.dst_port &= tcp->mask.dst_port;
955         return 0;
956 }
957
958 /**
959  * Convert VXLAN item to Verbs specification.
960  *
961  * @param item[in]
962  *   Item specification.
963  * @param default_mask[in]
964  *   Default bit-masks to use when item->mask is not provided.
965  * @param data[in, out]
966  *   User structure.
967  */
968 static int
969 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
970                        const void *default_mask,
971                        void *data)
972 {
973         const struct rte_flow_item_vxlan *spec = item->spec;
974         const struct rte_flow_item_vxlan *mask = item->mask;
975         struct mlx5_flow_parse *flow = (struct mlx5_flow_parse *)data;
976         struct ibv_flow_spec_tunnel *vxlan;
977         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
978         union vni {
979                 uint32_t vlan_id;
980                 uint8_t vni[4];
981         } id;
982
983         ++flow->ibv_attr->num_of_specs;
984         flow->ibv_attr->priority = 0;
985         id.vni[0] = 0;
986         vxlan = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
987         *vxlan = (struct ibv_flow_spec_tunnel) {
988                 .type = flow->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
989                 .size = size,
990         };
991         flow->inner = IBV_FLOW_SPEC_INNER;
992         if (!spec)
993                 return 0;
994         if (!mask)
995                 mask = default_mask;
996         memcpy(&id.vni[1], spec->vni, 3);
997         vxlan->val.tunnel_id = id.vlan_id;
998         memcpy(&id.vni[1], mask->vni, 3);
999         vxlan->mask.tunnel_id = id.vlan_id;
1000         /* Remove unwanted bits from values. */
1001         vxlan->val.tunnel_id &= vxlan->mask.tunnel_id;
1002         return 0;
1003 }
1004
1005 /**
1006  * Convert mark/flag action to Verbs specification.
1007  *
1008  * @param flow
1009  *   Pointer to MLX5 flow structure.
1010  * @param mark_id
1011  *   Mark identifier.
1012  */
1013 static int
1014 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *flow, uint32_t mark_id)
1015 {
1016         struct ibv_flow_spec_action_tag *tag;
1017         unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1018
1019         tag = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
1020         *tag = (struct ibv_flow_spec_action_tag){
1021                 .type = IBV_FLOW_SPEC_ACTION_TAG,
1022                 .size = size,
1023                 .tag_id = mlx5_flow_mark_set(mark_id),
1024         };
1025         ++flow->ibv_attr->num_of_specs;
1026         return 0;
1027 }
1028
1029 /**
1030  * Complete flow rule creation with a drop queue.
1031  *
1032  * @param priv
1033  *   Pointer to private structure.
1034  * @param flow
1035  *   MLX5 flow attributes (filled by mlx5_flow_validate()).
1036  * @param[out] error
1037  *   Perform verbose error reporting if not NULL.
1038  *
1039  * @return
1040  *   A flow if the rule could be created.
1041  */
1042 static struct rte_flow *
1043 priv_flow_create_action_queue_drop(struct priv *priv,
1044                                    struct mlx5_flow_parse *flow,
1045                                    struct rte_flow_error *error)
1046 {
1047         struct rte_flow *rte_flow;
1048         struct ibv_flow_spec_action_drop *drop;
1049         unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
1050
1051         assert(priv->pd);
1052         assert(priv->ctx);
1053         rte_flow = rte_calloc(__func__, 1, sizeof(*rte_flow), 0);
1054         if (!rte_flow) {
1055                 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1056                                    NULL, "cannot allocate flow memory");
1057                 return NULL;
1058         }
1059         rte_flow->drop = 1;
1060         drop = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
1061         *drop = (struct ibv_flow_spec_action_drop){
1062                         .type = IBV_FLOW_SPEC_ACTION_DROP,
1063                         .size = size,
1064         };
1065         ++flow->ibv_attr->num_of_specs;
1066         flow->offset += sizeof(struct ibv_flow_spec_action_drop);
1067         rte_flow->ibv_attr = flow->ibv_attr;
1068         if (!priv->dev->data->dev_started)
1069                 return rte_flow;
1070         rte_flow->drxq.hrxq.qp = priv->flow_drop_queue->qp;
1071         rte_flow->ibv_flow = ibv_create_flow(rte_flow->drxq.hrxq.qp,
1072                                              rte_flow->ibv_attr);
1073         if (!rte_flow->ibv_flow) {
1074                 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1075                                    NULL, "flow rule creation failure");
1076                 goto error;
1077         }
1078         return rte_flow;
1079 error:
1080         assert(rte_flow);
1081         rte_free(rte_flow);
1082         return NULL;
1083 }
1084
1085 /**
1086  * Complete flow rule creation.
1087  *
1088  * @param priv
1089  *   Pointer to private structure.
1090  * @param flow
1091  *   MLX5 flow attributes (filled by mlx5_flow_validate()).
1092  * @param[out] error
1093  *   Perform verbose error reporting if not NULL.
1094  *
1095  * @return
1096  *   A flow if the rule could be created.
1097  */
1098 static struct rte_flow *
1099 priv_flow_create_action_queue(struct priv *priv,
1100                               struct mlx5_flow_parse *flow,
1101                               struct rte_flow_error *error)
1102 {
1103         struct rte_flow *rte_flow;
1104         unsigned int i;
1105
1106         assert(priv->pd);
1107         assert(priv->ctx);
1108         assert(!flow->actions.drop);
1109         rte_flow =
1110                 rte_calloc(__func__, 1,
1111                            sizeof(*flow) +
1112                            flow->actions.queues_n * sizeof(uint16_t),
1113                            0);
1114         if (!rte_flow) {
1115                 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1116                                    NULL, "cannot allocate flow memory");
1117                 return NULL;
1118         }
1119         rte_flow->mark = flow->actions.mark;
1120         rte_flow->ibv_attr = flow->ibv_attr;
1121         rte_flow->queues = (uint16_t (*)[])(rte_flow + 1);
1122         memcpy(rte_flow->queues, flow->actions.queues,
1123                flow->actions.queues_n * sizeof(uint16_t));
1124         rte_flow->queues_n = flow->actions.queues_n;
1125         rte_flow->frxq.hash_fields = flow->hash_fields;
1126         rte_flow->frxq.hrxq = mlx5_priv_hrxq_get(priv, rss_hash_default_key,
1127                                                  rss_hash_default_key_len,
1128                                                  flow->hash_fields,
1129                                                  (*rte_flow->queues),
1130                                                  rte_flow->queues_n);
1131         if (rte_flow->frxq.hrxq) {
1132                 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1133                                    NULL, "duplicated flow");
1134                 goto error;
1135         }
1136         rte_flow->frxq.hrxq = mlx5_priv_hrxq_new(priv, rss_hash_default_key,
1137                                                  rss_hash_default_key_len,
1138                                                  flow->hash_fields,
1139                                                  (*rte_flow->queues),
1140                                                  rte_flow->queues_n);
1141         if (!rte_flow->frxq.hrxq) {
1142                 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1143                                    NULL, "cannot create hash rxq");
1144                 goto error;
1145         }
1146         for (i = 0; i != flow->actions.queues_n; ++i) {
1147                 struct mlx5_rxq_data *q =
1148                         (*priv->rxqs)[flow->actions.queues[i]];
1149
1150                 q->mark |= flow->actions.mark;
1151         }
1152         if (!priv->dev->data->dev_started)
1153                 return rte_flow;
1154         rte_flow->ibv_flow = ibv_create_flow(rte_flow->frxq.hrxq->qp,
1155                                              rte_flow->ibv_attr);
1156         if (!rte_flow->ibv_flow) {
1157                 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1158                                    NULL, "flow rule creation failure");
1159                 goto error;
1160         }
1161         return rte_flow;
1162 error:
1163         assert(rte_flow);
1164         if (rte_flow->frxq.hrxq)
1165                 mlx5_priv_hrxq_release(priv, rte_flow->frxq.hrxq);
1166         rte_free(rte_flow);
1167         return NULL;
1168 }
1169
1170 /**
1171  * Convert a flow.
1172  *
1173  * @param priv
1174  *   Pointer to private structure.
1175  * @param list
1176  *   Pointer to a TAILQ flow list.
1177  * @param[in] attr
1178  *   Flow rule attributes.
1179  * @param[in] pattern
1180  *   Pattern specification (list terminated by the END pattern item).
1181  * @param[in] actions
1182  *   Associated actions (list terminated by the END action).
1183  * @param[out] error
1184  *   Perform verbose error reporting if not NULL.
1185  *
1186  * @return
1187  *   A flow on success, NULL otherwise.
1188  */
1189 static struct rte_flow *
1190 priv_flow_create(struct priv *priv,
1191                  struct mlx5_flows *list,
1192                  const struct rte_flow_attr *attr,
1193                  const struct rte_flow_item items[],
1194                  const struct rte_flow_action actions[],
1195                  struct rte_flow_error *error)
1196 {
1197         struct rte_flow *rte_flow;
1198         struct mlx5_flow_parse flow = {
1199                 .offset = sizeof(struct ibv_flow_attr),
1200                 .actions = {
1201                         .mark_id = MLX5_FLOW_MARK_DEFAULT,
1202                         .queues = { 0 },
1203                         .queues_n = 0,
1204                 },
1205         };
1206         int err;
1207
1208         err = priv_flow_validate(priv, attr, items, actions, error, &flow);
1209         if (err)
1210                 goto exit;
1211         flow.ibv_attr = rte_malloc(__func__, flow.offset, 0);
1212         flow.offset = sizeof(struct ibv_flow_attr);
1213         if (!flow.ibv_attr) {
1214                 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1215                                    NULL, "cannot allocate ibv_attr memory");
1216                 goto exit;
1217         }
1218         *flow.ibv_attr = (struct ibv_flow_attr){
1219                 .type = IBV_FLOW_ATTR_NORMAL,
1220                 .size = sizeof(struct ibv_flow_attr),
1221                 .priority = attr->priority,
1222                 .num_of_specs = 0,
1223                 .port = 0,
1224                 .flags = 0,
1225         };
1226         flow.inner = 0;
1227         flow.hash_fields = 0;
1228         claim_zero(priv_flow_validate(priv, attr, items, actions,
1229                                       error, &flow));
1230         if (flow.actions.mark && !flow.actions.drop) {
1231                 mlx5_flow_create_flag_mark(&flow, flow.actions.mark_id);
1232                 flow.offset += sizeof(struct ibv_flow_spec_action_tag);
1233         }
1234         if (flow.actions.drop)
1235                 rte_flow =
1236                         priv_flow_create_action_queue_drop(priv, &flow, error);
1237         else
1238                 rte_flow = priv_flow_create_action_queue(priv, &flow, error);
1239         if (!rte_flow)
1240                 goto exit;
1241         if (rte_flow) {
1242                 TAILQ_INSERT_TAIL(list, rte_flow, next);
1243                 DEBUG("Flow created %p", (void *)rte_flow);
1244         }
1245         return rte_flow;
1246 exit:
1247         rte_free(flow.ibv_attr);
1248         return NULL;
1249 }
1250
1251 /**
1252  * Create a flow.
1253  *
1254  * @see rte_flow_create()
1255  * @see rte_flow_ops
1256  */
1257 struct rte_flow *
1258 mlx5_flow_create(struct rte_eth_dev *dev,
1259                  const struct rte_flow_attr *attr,
1260                  const struct rte_flow_item items[],
1261                  const struct rte_flow_action actions[],
1262                  struct rte_flow_error *error)
1263 {
1264         struct priv *priv = dev->data->dev_private;
1265         struct rte_flow *flow;
1266
1267         priv_lock(priv);
1268         flow = priv_flow_create(priv, &priv->flows, attr, items, actions,
1269                                 error);
1270         priv_unlock(priv);
1271         return flow;
1272 }
1273
1274 /**
1275  * Destroy a flow.
1276  *
1277  * @param priv
1278  *   Pointer to private structure.
1279  * @param list
1280  *   Pointer to a TAILQ flow list.
1281  * @param[in] flow
1282  *   Flow to destroy.
1283  */
1284 static void
1285 priv_flow_destroy(struct priv *priv,
1286                   struct mlx5_flows *list,
1287                   struct rte_flow *flow)
1288 {
1289         unsigned int i;
1290         uint16_t *queues;
1291         uint16_t queues_n;
1292
1293         if (flow->drop || !flow->mark)
1294                 goto free;
1295         queues = flow->frxq.hrxq->ind_table->queues;
1296         queues_n = flow->frxq.hrxq->ind_table->queues_n;
1297         for (i = 0; i != queues_n; ++i) {
1298                 struct rte_flow *tmp;
1299                 struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[queues[i]];
1300                 int mark = 0;
1301
1302                 /*
1303                  * To remove the mark from the queue, the queue must not be
1304                  * present in any other marked flow (RSS or not).
1305                  */
1306                 TAILQ_FOREACH(tmp, list, next) {
1307                         unsigned int j;
1308
1309                         if (!tmp->mark)
1310                                 continue;
1311                         for (j = 0;
1312                              (j != tmp->frxq.hrxq->ind_table->queues_n) &&
1313                              !mark;
1314                              j++)
1315                                 if (tmp->frxq.hrxq->ind_table->queues[j] ==
1316                                     queues[i])
1317                                         mark = 1;
1318                 }
1319                 rxq_data->mark = mark;
1320         }
1321 free:
1322         if (flow->ibv_flow)
1323                 claim_zero(ibv_destroy_flow(flow->ibv_flow));
1324         if (!flow->drop)
1325                 mlx5_priv_hrxq_release(priv, flow->frxq.hrxq);
1326         TAILQ_REMOVE(list, flow, next);
1327         rte_free(flow->ibv_attr);
1328         DEBUG("Flow destroyed %p", (void *)flow);
1329         rte_free(flow);
1330 }
1331
1332 /**
1333  * Destroy a flow.
1334  *
1335  * @see rte_flow_destroy()
1336  * @see rte_flow_ops
1337  */
1338 int
1339 mlx5_flow_destroy(struct rte_eth_dev *dev,
1340                   struct rte_flow *flow,
1341                   struct rte_flow_error *error)
1342 {
1343         struct priv *priv = dev->data->dev_private;
1344
1345         (void)error;
1346         priv_lock(priv);
1347         priv_flow_destroy(priv, &priv->flows, flow);
1348         priv_unlock(priv);
1349         return 0;
1350 }
1351
1352 /**
1353  * Destroy all flows.
1354  *
1355  * @param priv
1356  *   Pointer to private structure.
1357  * @param list
1358  *   Pointer to a TAILQ flow list.
1359  */
1360 void
1361 priv_flow_flush(struct priv *priv, struct mlx5_flows *list)
1362 {
1363         while (!TAILQ_EMPTY(list)) {
1364                 struct rte_flow *flow;
1365
1366                 flow = TAILQ_FIRST(list);
1367                 priv_flow_destroy(priv, list, flow);
1368         }
1369 }
1370
1371 /**
1372  * Destroy all flows.
1373  *
1374  * @see rte_flow_flush()
1375  * @see rte_flow_ops
1376  */
1377 int
1378 mlx5_flow_flush(struct rte_eth_dev *dev,
1379                 struct rte_flow_error *error)
1380 {
1381         struct priv *priv = dev->data->dev_private;
1382
1383         (void)error;
1384         priv_lock(priv);
1385         priv_flow_flush(priv, &priv->flows);
1386         priv_unlock(priv);
1387         return 0;
1388 }
1389
1390 /**
1391  * Create drop queue.
1392  *
1393  * @param priv
1394  *   Pointer to private structure.
1395  *
1396  * @return
1397  *   0 on success.
1398  */
1399 static int
1400 priv_flow_create_drop_queue(struct priv *priv)
1401 {
1402         struct mlx5_hrxq_drop *fdq = NULL;
1403
1404         assert(priv->pd);
1405         assert(priv->ctx);
1406         fdq = rte_calloc(__func__, 1, sizeof(*fdq), 0);
1407         if (!fdq) {
1408                 WARN("cannot allocate memory for drop queue");
1409                 goto error;
1410         }
1411         fdq->cq = ibv_create_cq(priv->ctx, 1, NULL, NULL, 0);
1412         if (!fdq->cq) {
1413                 WARN("cannot allocate CQ for drop queue");
1414                 goto error;
1415         }
1416         fdq->wq = ibv_create_wq(priv->ctx,
1417                         &(struct ibv_wq_init_attr){
1418                         .wq_type = IBV_WQT_RQ,
1419                         .max_wr = 1,
1420                         .max_sge = 1,
1421                         .pd = priv->pd,
1422                         .cq = fdq->cq,
1423                         });
1424         if (!fdq->wq) {
1425                 WARN("cannot allocate WQ for drop queue");
1426                 goto error;
1427         }
1428         fdq->ind_table = ibv_create_rwq_ind_table(priv->ctx,
1429                         &(struct ibv_rwq_ind_table_init_attr){
1430                         .log_ind_tbl_size = 0,
1431                         .ind_tbl = &fdq->wq,
1432                         .comp_mask = 0,
1433                         });
1434         if (!fdq->ind_table) {
1435                 WARN("cannot allocate indirection table for drop queue");
1436                 goto error;
1437         }
1438         fdq->qp = ibv_create_qp_ex(priv->ctx,
1439                 &(struct ibv_qp_init_attr_ex){
1440                         .qp_type = IBV_QPT_RAW_PACKET,
1441                         .comp_mask =
1442                                 IBV_QP_INIT_ATTR_PD |
1443                                 IBV_QP_INIT_ATTR_IND_TABLE |
1444                                 IBV_QP_INIT_ATTR_RX_HASH,
1445                         .rx_hash_conf = (struct ibv_rx_hash_conf){
1446                                 .rx_hash_function =
1447                                         IBV_RX_HASH_FUNC_TOEPLITZ,
1448                                 .rx_hash_key_len = rss_hash_default_key_len,
1449                                 .rx_hash_key = rss_hash_default_key,
1450                                 .rx_hash_fields_mask = 0,
1451                                 },
1452                         .rwq_ind_tbl = fdq->ind_table,
1453                         .pd = priv->pd
1454                 });
1455         if (!fdq->qp) {
1456                 WARN("cannot allocate QP for drop queue");
1457                 goto error;
1458         }
1459         priv->flow_drop_queue = fdq;
1460         return 0;
1461 error:
1462         if (fdq->qp)
1463                 claim_zero(ibv_destroy_qp(fdq->qp));
1464         if (fdq->ind_table)
1465                 claim_zero(ibv_destroy_rwq_ind_table(fdq->ind_table));
1466         if (fdq->wq)
1467                 claim_zero(ibv_destroy_wq(fdq->wq));
1468         if (fdq->cq)
1469                 claim_zero(ibv_destroy_cq(fdq->cq));
1470         if (fdq)
1471                 rte_free(fdq);
1472         priv->flow_drop_queue = NULL;
1473         return -1;
1474 }
1475
1476 /**
1477  * Delete drop queue.
1478  *
1479  * @param priv
1480  *   Pointer to private structure.
1481  */
1482 static void
1483 priv_flow_delete_drop_queue(struct priv *priv)
1484 {
1485         struct mlx5_hrxq_drop *fdq = priv->flow_drop_queue;
1486
1487         if (!fdq)
1488                 return;
1489         if (fdq->qp)
1490                 claim_zero(ibv_destroy_qp(fdq->qp));
1491         if (fdq->ind_table)
1492                 claim_zero(ibv_destroy_rwq_ind_table(fdq->ind_table));
1493         if (fdq->wq)
1494                 claim_zero(ibv_destroy_wq(fdq->wq));
1495         if (fdq->cq)
1496                 claim_zero(ibv_destroy_cq(fdq->cq));
1497         rte_free(fdq);
1498         priv->flow_drop_queue = NULL;
1499 }
1500
1501 /**
1502  * Remove all flows.
1503  *
1504  * Called by dev_stop() to remove all flows.
1505  *
1506  * @param priv
1507  *   Pointer to private structure.
1508  * @param list
1509  *   Pointer to a TAILQ flow list.
1510  */
1511 void
1512 priv_flow_stop(struct priv *priv, struct mlx5_flows *list)
1513 {
1514         struct rte_flow *flow;
1515
1516         TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {
1517                 claim_zero(ibv_destroy_flow(flow->ibv_flow));
1518                 flow->ibv_flow = NULL;
1519                 mlx5_priv_hrxq_release(priv, flow->frxq.hrxq);
1520                 flow->frxq.hrxq = NULL;
1521                 if (flow->mark) {
1522                         unsigned int n;
1523                         struct mlx5_ind_table_ibv *ind_tbl =
1524                                 flow->frxq.hrxq->ind_table;
1525
1526                         for (n = 0; n < ind_tbl->queues_n; ++n)
1527                                 (*priv->rxqs)[ind_tbl->queues[n]]->mark = 0;
1528                 }
1529                 DEBUG("Flow %p removed", (void *)flow);
1530         }
1531         priv_flow_delete_drop_queue(priv);
1532 }
1533
1534 /**
1535  * Add all flows.
1536  *
1537  * @param priv
1538  *   Pointer to private structure.
1539  * @param list
1540  *   Pointer to a TAILQ flow list.
1541  *
1542  * @return
1543  *   0 on success, a errno value otherwise and rte_errno is set.
1544  */
1545 int
1546 priv_flow_start(struct priv *priv, struct mlx5_flows *list)
1547 {
1548         int ret;
1549         struct rte_flow *flow;
1550
1551         ret = priv_flow_create_drop_queue(priv);
1552         if (ret)
1553                 return -1;
1554         TAILQ_FOREACH(flow, list, next) {
1555                 if (flow->frxq.hrxq)
1556                         goto flow_create;
1557                 flow->frxq.hrxq =
1558                         mlx5_priv_hrxq_get(priv, rss_hash_default_key,
1559                                            rss_hash_default_key_len,
1560                                            flow->frxq.hash_fields,
1561                                            (*flow->queues),
1562                                            flow->queues_n);
1563                 if (flow->frxq.hrxq)
1564                         goto flow_create;
1565                 flow->frxq.hrxq =
1566                         mlx5_priv_hrxq_new(priv, rss_hash_default_key,
1567                                            rss_hash_default_key_len,
1568                                            flow->frxq.hash_fields,
1569                                            (*flow->queues),
1570                                            flow->queues_n);
1571                 if (!flow->frxq.hrxq) {
1572                         DEBUG("Flow %p cannot be applied",
1573                               (void *)flow);
1574                         rte_errno = EINVAL;
1575                         return rte_errno;
1576                 }
1577 flow_create:
1578                 flow->ibv_flow = ibv_create_flow(flow->frxq.hrxq->qp,
1579                                                  flow->ibv_attr);
1580                 if (!flow->ibv_flow) {
1581                         DEBUG("Flow %p cannot be applied", (void *)flow);
1582                         rte_errno = EINVAL;
1583                         return rte_errno;
1584                 }
1585                 DEBUG("Flow %p applied", (void *)flow);
1586                 if (flow->mark) {
1587                         unsigned int n;
1588
1589                         for (n = 0;
1590                              n < flow->frxq.hrxq->ind_table->queues_n;
1591                              ++n) {
1592                                 uint16_t idx =
1593                                         flow->frxq.hrxq->ind_table->queues[n];
1594                                 (*priv->rxqs)[idx]->mark = 1;
1595                         }
1596                 }
1597         }
1598         return 0;
1599 }
1600
1601 /**
1602  * Isolated mode.
1603  *
1604  * @see rte_flow_isolate()
1605  * @see rte_flow_ops
1606  */
1607 int
1608 mlx5_flow_isolate(struct rte_eth_dev *dev,
1609                   int enable,
1610                   struct rte_flow_error *error)
1611 {
1612         struct priv *priv = dev->data->dev_private;
1613
1614         priv_lock(priv);
1615         if (dev->data->dev_started) {
1616                 rte_flow_error_set(error, EBUSY,
1617                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1618                                    NULL,
1619                                    "port must be stopped first");
1620                 priv_unlock(priv);
1621                 return -rte_errno;
1622         }
1623         priv->isolated = !!enable;
1624         priv_unlock(priv);
1625         return 0;
1626 }
1627
1628 /**
1629  * Verify the flow list is empty
1630  *
1631  * @param priv
1632  *  Pointer to private structure.
1633  *
1634  * @return the number of flows not released.
1635  */
1636 int
1637 priv_flow_verify(struct priv *priv)
1638 {
1639         struct rte_flow *flow;
1640         int ret = 0;
1641
1642         TAILQ_FOREACH(flow, &priv->flows, next) {
1643                 DEBUG("%p: flow %p still referenced", (void *)priv,
1644                       (void *)flow);
1645                 ++ret;
1646         }
1647         return ret;
1648 }
1649
1650 /**
1651  * Enable/disable a control flow configured from the control plane.
1652  *
1653  * @param dev
1654  *   Pointer to Ethernet device.
1655  * @param spec
1656  *   An Ethernet flow spec to apply.
1657  * @param mask
1658  *   An Ethernet flow mask to apply.
1659  * @param enable
1660  *   Enable/disable the flow.
1661  *
1662  * @return
1663  *   0 on success.
1664  */
1665 int
1666 mlx5_ctrl_flow(struct rte_eth_dev *dev,
1667                struct rte_flow_item_eth *spec,
1668                struct rte_flow_item_eth *mask,
1669                unsigned int enable)
1670 {
1671         struct priv *priv = dev->data->dev_private;
1672         const struct rte_flow_attr attr = {
1673                 .ingress = 1,
1674                 .priority = MLX5_CTRL_FLOW_PRIORITY,
1675         };
1676         struct rte_flow_item items[] = {
1677                 {
1678                         .type = RTE_FLOW_ITEM_TYPE_ETH,
1679                         .spec = spec,
1680                         .last = NULL,
1681                         .mask = mask,
1682                 },
1683                 {
1684                         .type = RTE_FLOW_ITEM_TYPE_END,
1685                 },
1686         };
1687         struct rte_flow_action actions[] = {
1688                 {
1689                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
1690                         .conf = &(struct rte_flow_action_queue){
1691                                 .index = 0,
1692                         },
1693                 },
1694                 {
1695                         .type = RTE_FLOW_ACTION_TYPE_END,
1696                 },
1697         };
1698         struct rte_flow *flow;
1699         struct rte_flow_error error;
1700
1701         if (enable) {
1702                 flow = priv_flow_create(priv, &priv->ctrl_flows, &attr, items,
1703                                         actions, &error);
1704                 if (!flow)
1705                         return 1;
1706         } else {
1707                 struct spec {
1708                         struct ibv_flow_attr ibv_attr;
1709                         struct ibv_flow_spec_eth eth;
1710                 } spec;
1711                 struct mlx5_flow_parse parser = {
1712                         .ibv_attr = &spec.ibv_attr,
1713                         .offset = sizeof(struct ibv_flow_attr),
1714                 };
1715                 struct ibv_flow_spec_eth *eth;
1716                 const unsigned int attr_size = sizeof(struct ibv_flow_attr);
1717
1718                 claim_zero(mlx5_flow_create_eth(&items[0], NULL, &parser));
1719                 TAILQ_FOREACH(flow, &priv->ctrl_flows, next) {
1720                         eth = (void *)((uintptr_t)flow->ibv_attr + attr_size);
1721                         assert(eth->type == IBV_FLOW_SPEC_ETH);
1722                         if (!memcmp(eth, &spec.eth, sizeof(*eth)))
1723                                 break;
1724                 }
1725                 if (flow) {
1726                         claim_zero(ibv_destroy_flow(flow->ibv_flow));
1727                         mlx5_priv_hrxq_release(priv, flow->frxq.hrxq);
1728                         rte_free(flow->ibv_attr);
1729                         DEBUG("Control flow destroyed %p", (void *)flow);
1730                         TAILQ_REMOVE(&priv->ctrl_flows, flow, next);
1731                         rte_free(flow);
1732                 }
1733         }
1734         return 0;
1735 }