net/mlx5: use flow to enable unicast traffic
[dpdk.git] / drivers / net / mlx5 / mlx5_flow.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright 2016 6WIND S.A.
5  *   Copyright 2016 Mellanox.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of 6WIND S.A. nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <sys/queue.h>
35 #include <string.h>
36
37 /* Verbs header. */
38 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
39 #ifdef PEDANTIC
40 #pragma GCC diagnostic ignored "-Wpedantic"
41 #endif
42 #include <infiniband/verbs.h>
43 #ifdef PEDANTIC
44 #pragma GCC diagnostic error "-Wpedantic"
45 #endif
46
47 #include <rte_ethdev.h>
48 #include <rte_flow.h>
49 #include <rte_flow_driver.h>
50 #include <rte_malloc.h>
51
52 #include "mlx5.h"
53 #include "mlx5_prm.h"
54
55 /* Define minimal priority for control plane flows. */
56 #define MLX5_CTRL_FLOW_PRIORITY 4
57
58 static int
59 mlx5_flow_create_eth(const struct rte_flow_item *item,
60                      const void *default_mask,
61                      void *data);
62
63 static int
64 mlx5_flow_create_vlan(const struct rte_flow_item *item,
65                       const void *default_mask,
66                       void *data);
67
68 static int
69 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
70                       const void *default_mask,
71                       void *data);
72
73 static int
74 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
75                       const void *default_mask,
76                       void *data);
77
78 static int
79 mlx5_flow_create_udp(const struct rte_flow_item *item,
80                      const void *default_mask,
81                      void *data);
82
83 static int
84 mlx5_flow_create_tcp(const struct rte_flow_item *item,
85                      const void *default_mask,
86                      void *data);
87
88 static int
89 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
90                        const void *default_mask,
91                        void *data);
92
93 /** Structure for Drop queue. */
94 struct mlx5_hrxq_drop {
95         struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
96         struct ibv_qp *qp; /**< Verbs queue pair. */
97         struct ibv_wq *wq; /**< Verbs work queue. */
98         struct ibv_cq *cq; /**< Verbs completion queue. */
99 };
100
101 /* Flows structures. */
102 struct mlx5_flow {
103         uint64_t hash_fields; /**< Fields that participate in the hash. */
104         struct mlx5_hrxq *hrxq; /**< Hash Rx queues. */
105 };
106
107 /* Drop flows structures. */
108 struct mlx5_flow_drop {
109         struct mlx5_hrxq_drop hrxq; /**< Drop hash Rx queue. */
110 };
111
112 struct rte_flow {
113         TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
114         uint32_t mark:1; /**< Set if the flow is marked. */
115         uint32_t drop:1; /**< Drop queue. */
116         struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
117         struct ibv_flow *ibv_flow; /**< Verbs flow. */
118         uint16_t queues_n; /**< Number of entries in queue[]. */
119         uint16_t (*queues)[]; /**< Queues indexes to use. */
120         union {
121                 struct mlx5_flow frxq; /**< Flow with Rx queue. */
122                 struct mlx5_flow_drop drxq; /**< Flow with drop Rx queue. */
123         };
124 };
125
126 /** Static initializer for items. */
127 #define ITEMS(...) \
128         (const enum rte_flow_item_type []){ \
129                 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
130         }
131
132 /** Structure to generate a simple graph of layers supported by the NIC. */
133 struct mlx5_flow_items {
134         /** List of possible actions for these items. */
135         const enum rte_flow_action_type *const actions;
136         /** Bit-masks corresponding to the possibilities for the item. */
137         const void *mask;
138         /**
139          * Default bit-masks to use when item->mask is not provided. When
140          * \default_mask is also NULL, the full supported bit-mask (\mask) is
141          * used instead.
142          */
143         const void *default_mask;
144         /** Bit-masks size in bytes. */
145         const unsigned int mask_sz;
146         /**
147          * Conversion function from rte_flow to NIC specific flow.
148          *
149          * @param item
150          *   rte_flow item to convert.
151          * @param default_mask
152          *   Default bit-masks to use when item->mask is not provided.
153          * @param data
154          *   Internal structure to store the conversion.
155          *
156          * @return
157          *   0 on success, negative value otherwise.
158          */
159         int (*convert)(const struct rte_flow_item *item,
160                        const void *default_mask,
161                        void *data);
162         /** Size in bytes of the destination structure. */
163         const unsigned int dst_sz;
164         /** List of possible following items.  */
165         const enum rte_flow_item_type *const items;
166 };
167
168 /** Valid action for this PMD. */
169 static const enum rte_flow_action_type valid_actions[] = {
170         RTE_FLOW_ACTION_TYPE_DROP,
171         RTE_FLOW_ACTION_TYPE_QUEUE,
172         RTE_FLOW_ACTION_TYPE_MARK,
173         RTE_FLOW_ACTION_TYPE_FLAG,
174         RTE_FLOW_ACTION_TYPE_END,
175 };
176
177 /** Graph of supported items and associated actions. */
178 static const struct mlx5_flow_items mlx5_flow_items[] = {
179         [RTE_FLOW_ITEM_TYPE_END] = {
180                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
181                                RTE_FLOW_ITEM_TYPE_VXLAN),
182         },
183         [RTE_FLOW_ITEM_TYPE_ETH] = {
184                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
185                                RTE_FLOW_ITEM_TYPE_IPV4,
186                                RTE_FLOW_ITEM_TYPE_IPV6),
187                 .actions = valid_actions,
188                 .mask = &(const struct rte_flow_item_eth){
189                         .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
190                         .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
191                         .type = -1,
192                 },
193                 .default_mask = &rte_flow_item_eth_mask,
194                 .mask_sz = sizeof(struct rte_flow_item_eth),
195                 .convert = mlx5_flow_create_eth,
196                 .dst_sz = sizeof(struct ibv_flow_spec_eth),
197         },
198         [RTE_FLOW_ITEM_TYPE_VLAN] = {
199                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
200                                RTE_FLOW_ITEM_TYPE_IPV6),
201                 .actions = valid_actions,
202                 .mask = &(const struct rte_flow_item_vlan){
203                         .tci = -1,
204                 },
205                 .default_mask = &rte_flow_item_vlan_mask,
206                 .mask_sz = sizeof(struct rte_flow_item_vlan),
207                 .convert = mlx5_flow_create_vlan,
208                 .dst_sz = 0,
209         },
210         [RTE_FLOW_ITEM_TYPE_IPV4] = {
211                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
212                                RTE_FLOW_ITEM_TYPE_TCP),
213                 .actions = valid_actions,
214                 .mask = &(const struct rte_flow_item_ipv4){
215                         .hdr = {
216                                 .src_addr = -1,
217                                 .dst_addr = -1,
218                                 .type_of_service = -1,
219                                 .next_proto_id = -1,
220                         },
221                 },
222                 .default_mask = &rte_flow_item_ipv4_mask,
223                 .mask_sz = sizeof(struct rte_flow_item_ipv4),
224                 .convert = mlx5_flow_create_ipv4,
225                 .dst_sz = sizeof(struct ibv_flow_spec_ipv4),
226         },
227         [RTE_FLOW_ITEM_TYPE_IPV6] = {
228                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
229                                RTE_FLOW_ITEM_TYPE_TCP),
230                 .actions = valid_actions,
231                 .mask = &(const struct rte_flow_item_ipv6){
232                         .hdr = {
233                                 .src_addr = {
234                                         0xff, 0xff, 0xff, 0xff,
235                                         0xff, 0xff, 0xff, 0xff,
236                                         0xff, 0xff, 0xff, 0xff,
237                                         0xff, 0xff, 0xff, 0xff,
238                                 },
239                                 .dst_addr = {
240                                         0xff, 0xff, 0xff, 0xff,
241                                         0xff, 0xff, 0xff, 0xff,
242                                         0xff, 0xff, 0xff, 0xff,
243                                         0xff, 0xff, 0xff, 0xff,
244                                 },
245                                 .vtc_flow = -1,
246                                 .proto = -1,
247                                 .hop_limits = -1,
248                         },
249                 },
250                 .default_mask = &rte_flow_item_ipv6_mask,
251                 .mask_sz = sizeof(struct rte_flow_item_ipv6),
252                 .convert = mlx5_flow_create_ipv6,
253                 .dst_sz = sizeof(struct ibv_flow_spec_ipv6),
254         },
255         [RTE_FLOW_ITEM_TYPE_UDP] = {
256                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN),
257                 .actions = valid_actions,
258                 .mask = &(const struct rte_flow_item_udp){
259                         .hdr = {
260                                 .src_port = -1,
261                                 .dst_port = -1,
262                         },
263                 },
264                 .default_mask = &rte_flow_item_udp_mask,
265                 .mask_sz = sizeof(struct rte_flow_item_udp),
266                 .convert = mlx5_flow_create_udp,
267                 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
268         },
269         [RTE_FLOW_ITEM_TYPE_TCP] = {
270                 .actions = valid_actions,
271                 .mask = &(const struct rte_flow_item_tcp){
272                         .hdr = {
273                                 .src_port = -1,
274                                 .dst_port = -1,
275                         },
276                 },
277                 .default_mask = &rte_flow_item_tcp_mask,
278                 .mask_sz = sizeof(struct rte_flow_item_tcp),
279                 .convert = mlx5_flow_create_tcp,
280                 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
281         },
282         [RTE_FLOW_ITEM_TYPE_VXLAN] = {
283                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
284                 .actions = valid_actions,
285                 .mask = &(const struct rte_flow_item_vxlan){
286                         .vni = "\xff\xff\xff",
287                 },
288                 .default_mask = &rte_flow_item_vxlan_mask,
289                 .mask_sz = sizeof(struct rte_flow_item_vxlan),
290                 .convert = mlx5_flow_create_vxlan,
291                 .dst_sz = sizeof(struct ibv_flow_spec_tunnel),
292         },
293 };
294
295 /* Structure to parse actions. */
296 struct mlx5_flow_action {
297         uint32_t queue:1; /**< Target is a receive queue. */
298         uint32_t drop:1; /**< Target is a drop queue. */
299         uint32_t mark:1; /**< Mark is present in the flow. */
300         uint32_t mark_id; /**< Mark identifier. */
301         uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
302         uint16_t queues_n; /**< Number of entries in queue[]. */
303 };
304
305 /** Structure to pass to the conversion function. */
306 struct mlx5_flow_parse {
307         struct ibv_flow_attr *ibv_attr; /**< Verbs attribute. */
308         unsigned int offset; /**< Offset in bytes in the ibv_attr buffer. */
309         uint32_t inner; /**< Set once VXLAN is encountered. */
310         uint64_t hash_fields; /**< Fields that participate in the hash. */
311         struct mlx5_flow_action actions; /**< Parsed action result. */
312 };
313
314 static const struct rte_flow_ops mlx5_flow_ops = {
315         .validate = mlx5_flow_validate,
316         .create = mlx5_flow_create,
317         .destroy = mlx5_flow_destroy,
318         .flush = mlx5_flow_flush,
319         .query = NULL,
320         .isolate = mlx5_flow_isolate,
321 };
322
323 /**
324  * Manage filter operations.
325  *
326  * @param dev
327  *   Pointer to Ethernet device structure.
328  * @param filter_type
329  *   Filter type.
330  * @param filter_op
331  *   Operation to perform.
332  * @param arg
333  *   Pointer to operation-specific structure.
334  *
335  * @return
336  *   0 on success, negative errno value on failure.
337  */
338 int
339 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
340                      enum rte_filter_type filter_type,
341                      enum rte_filter_op filter_op,
342                      void *arg)
343 {
344         int ret = EINVAL;
345
346         if (filter_type == RTE_ETH_FILTER_GENERIC) {
347                 if (filter_op != RTE_ETH_FILTER_GET)
348                         return -EINVAL;
349                 *(const void **)arg = &mlx5_flow_ops;
350                 return 0;
351         }
352         ERROR("%p: filter type (%d) not supported",
353               (void *)dev, filter_type);
354         return -ret;
355 }
356
357 /**
358  * Check support for a given item.
359  *
360  * @param item[in]
361  *   Item specification.
362  * @param mask[in]
363  *   Bit-masks covering supported fields to compare with spec, last and mask in
364  *   \item.
365  * @param size
366  *   Bit-Mask size in bytes.
367  *
368  * @return
369  *   0 on success.
370  */
371 static int
372 mlx5_flow_item_validate(const struct rte_flow_item *item,
373                         const uint8_t *mask, unsigned int size)
374 {
375         int ret = 0;
376
377         if (!item->spec && (item->mask || item->last))
378                 return -1;
379         if (item->spec && !item->mask) {
380                 unsigned int i;
381                 const uint8_t *spec = item->spec;
382
383                 for (i = 0; i < size; ++i)
384                         if ((spec[i] | mask[i]) != mask[i])
385                                 return -1;
386         }
387         if (item->last && !item->mask) {
388                 unsigned int i;
389                 const uint8_t *spec = item->last;
390
391                 for (i = 0; i < size; ++i)
392                         if ((spec[i] | mask[i]) != mask[i])
393                                 return -1;
394         }
395         if (item->mask) {
396                 unsigned int i;
397                 const uint8_t *spec = item->mask;
398
399                 for (i = 0; i < size; ++i)
400                         if ((spec[i] | mask[i]) != mask[i])
401                                 return -1;
402         }
403         if (item->spec && item->last) {
404                 uint8_t spec[size];
405                 uint8_t last[size];
406                 const uint8_t *apply = mask;
407                 unsigned int i;
408
409                 if (item->mask)
410                         apply = item->mask;
411                 for (i = 0; i < size; ++i) {
412                         spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
413                         last[i] = ((const uint8_t *)item->last)[i] & apply[i];
414                 }
415                 ret = memcmp(spec, last, size);
416         }
417         return ret;
418 }
419
420 /**
421  * Validate a flow supported by the NIC.
422  *
423  * @param priv
424  *   Pointer to private structure.
425  * @param[in] attr
426  *   Flow rule attributes.
427  * @param[in] pattern
428  *   Pattern specification (list terminated by the END pattern item).
429  * @param[in] actions
430  *   Associated actions (list terminated by the END action).
431  * @param[out] error
432  *   Perform verbose error reporting if not NULL.
433  * @param[in, out] flow
434  *   Flow structure to update.
435  *
436  * @return
437  *   0 on success, a negative errno value otherwise and rte_errno is set.
438  */
439 static int
440 priv_flow_validate(struct priv *priv,
441                    const struct rte_flow_attr *attr,
442                    const struct rte_flow_item items[],
443                    const struct rte_flow_action actions[],
444                    struct rte_flow_error *error,
445                    struct mlx5_flow_parse *flow)
446 {
447         const struct mlx5_flow_items *cur_item = mlx5_flow_items;
448
449         (void)priv;
450         if (attr->group) {
451                 rte_flow_error_set(error, ENOTSUP,
452                                    RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
453                                    NULL,
454                                    "groups are not supported");
455                 return -rte_errno;
456         }
457         if (attr->priority && attr->priority != MLX5_CTRL_FLOW_PRIORITY) {
458                 rte_flow_error_set(error, ENOTSUP,
459                                    RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
460                                    NULL,
461                                    "priorities are not supported");
462                 return -rte_errno;
463         }
464         if (attr->egress) {
465                 rte_flow_error_set(error, ENOTSUP,
466                                    RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
467                                    NULL,
468                                    "egress is not supported");
469                 return -rte_errno;
470         }
471         if (!attr->ingress) {
472                 rte_flow_error_set(error, ENOTSUP,
473                                    RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
474                                    NULL,
475                                    "only ingress is supported");
476                 return -rte_errno;
477         }
478         for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
479                 const struct mlx5_flow_items *token = NULL;
480                 unsigned int i;
481                 int err;
482
483                 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
484                         continue;
485                 for (i = 0;
486                      cur_item->items &&
487                      cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
488                      ++i) {
489                         if (cur_item->items[i] == items->type) {
490                                 token = &mlx5_flow_items[items->type];
491                                 break;
492                         }
493                 }
494                 if (!token)
495                         goto exit_item_not_supported;
496                 cur_item = token;
497                 err = mlx5_flow_item_validate(items,
498                                               (const uint8_t *)cur_item->mask,
499                                               cur_item->mask_sz);
500                 if (err)
501                         goto exit_item_not_supported;
502                 if (flow->ibv_attr && cur_item->convert) {
503                         err = cur_item->convert(items,
504                                                 (cur_item->default_mask ?
505                                                  cur_item->default_mask :
506                                                  cur_item->mask),
507                                                 flow);
508                         if (err)
509                                 goto exit_item_not_supported;
510                 } else if (items->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
511                         if (flow->inner) {
512                                 rte_flow_error_set(error, ENOTSUP,
513                                                    RTE_FLOW_ERROR_TYPE_ITEM,
514                                                    items,
515                                                    "cannot recognize multiple"
516                                                    " VXLAN encapsulations");
517                                 return -rte_errno;
518                         }
519                         flow->inner = 1;
520                 }
521                 flow->offset += cur_item->dst_sz;
522         }
523         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
524                 if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
525                         continue;
526                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
527                         flow->actions.drop = 1;
528                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
529                         const struct rte_flow_action_queue *queue =
530                                 (const struct rte_flow_action_queue *)
531                                 actions->conf;
532                         uint16_t n;
533                         uint16_t found = 0;
534
535                         if (!queue || (queue->index > (priv->rxqs_n - 1)))
536                                 goto exit_action_not_supported;
537                         for (n = 0; n < flow->actions.queues_n; ++n) {
538                                 if (flow->actions.queues[n] == queue->index) {
539                                         found = 1;
540                                         break;
541                                 }
542                         }
543                         if (flow->actions.queues_n > 1 && !found) {
544                                 rte_flow_error_set(error, ENOTSUP,
545                                            RTE_FLOW_ERROR_TYPE_ACTION,
546                                            actions,
547                                            "queue action not in RSS queues");
548                                 return -rte_errno;
549                         }
550                         if (!found) {
551                                 flow->actions.queue = 1;
552                                 flow->actions.queues_n = 1;
553                                 flow->actions.queues[0] = queue->index;
554                         }
555                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
556                         const struct rte_flow_action_rss *rss =
557                                 (const struct rte_flow_action_rss *)
558                                 actions->conf;
559                         uint16_t n;
560
561                         if (!rss || !rss->num) {
562                                 rte_flow_error_set(error, EINVAL,
563                                                    RTE_FLOW_ERROR_TYPE_ACTION,
564                                                    actions,
565                                                    "no valid queues");
566                                 return -rte_errno;
567                         }
568                         if (flow->actions.queues_n == 1) {
569                                 uint16_t found = 0;
570
571                                 assert(flow->actions.queues_n);
572                                 for (n = 0; n < rss->num; ++n) {
573                                         if (flow->actions.queues[0] ==
574                                             rss->queue[n]) {
575                                                 found = 1;
576                                                 break;
577                                         }
578                                 }
579                                 if (!found) {
580                                         rte_flow_error_set(error, ENOTSUP,
581                                                    RTE_FLOW_ERROR_TYPE_ACTION,
582                                                    actions,
583                                                    "queue action not in RSS"
584                                                    " queues");
585                                         return -rte_errno;
586                                 }
587                         }
588                         for (n = 0; n < rss->num; ++n) {
589                                 if (rss->queue[n] >= priv->rxqs_n) {
590                                         rte_flow_error_set(error, EINVAL,
591                                                    RTE_FLOW_ERROR_TYPE_ACTION,
592                                                    actions,
593                                                    "queue id > number of"
594                                                    " queues");
595                                         return -rte_errno;
596                                 }
597                         }
598                         flow->actions.queue = 1;
599                         for (n = 0; n < rss->num; ++n)
600                                 flow->actions.queues[n] = rss->queue[n];
601                         flow->actions.queues_n = rss->num;
602                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
603                         const struct rte_flow_action_mark *mark =
604                                 (const struct rte_flow_action_mark *)
605                                 actions->conf;
606
607                         if (!mark) {
608                                 rte_flow_error_set(error, EINVAL,
609                                                    RTE_FLOW_ERROR_TYPE_ACTION,
610                                                    actions,
611                                                    "mark must be defined");
612                                 return -rte_errno;
613                         } else if (mark->id >= MLX5_FLOW_MARK_MAX) {
614                                 rte_flow_error_set(error, ENOTSUP,
615                                                    RTE_FLOW_ERROR_TYPE_ACTION,
616                                                    actions,
617                                                    "mark must be between 0"
618                                                    " and 16777199");
619                                 return -rte_errno;
620                         }
621                         flow->actions.mark = 1;
622                         flow->actions.mark_id = mark->id;
623                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
624                         flow->actions.mark = 1;
625                 } else {
626                         goto exit_action_not_supported;
627                 }
628         }
629         if (flow->actions.mark && !flow->ibv_attr && !flow->actions.drop)
630                 flow->offset += sizeof(struct ibv_flow_spec_action_tag);
631         if (!flow->ibv_attr && flow->actions.drop)
632                 flow->offset += sizeof(struct ibv_flow_spec_action_drop);
633         if (!flow->actions.queue && !flow->actions.drop) {
634                 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
635                                    NULL, "no valid action");
636                 return -rte_errno;
637         }
638         return 0;
639 exit_item_not_supported:
640         rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
641                            items, "item not supported");
642         return -rte_errno;
643 exit_action_not_supported:
644         rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
645                            actions, "action not supported");
646         return -rte_errno;
647 }
648
649 /**
650  * Validate a flow supported by the NIC.
651  *
652  * @see rte_flow_validate()
653  * @see rte_flow_ops
654  */
655 int
656 mlx5_flow_validate(struct rte_eth_dev *dev,
657                    const struct rte_flow_attr *attr,
658                    const struct rte_flow_item items[],
659                    const struct rte_flow_action actions[],
660                    struct rte_flow_error *error)
661 {
662         struct priv *priv = dev->data->dev_private;
663         int ret;
664         struct mlx5_flow_parse flow = {
665                 .offset = sizeof(struct ibv_flow_attr),
666                 .actions = {
667                         .mark_id = MLX5_FLOW_MARK_DEFAULT,
668                         .queues_n = 0,
669                 },
670         };
671
672         priv_lock(priv);
673         ret = priv_flow_validate(priv, attr, items, actions, error, &flow);
674         priv_unlock(priv);
675         return ret;
676 }
677
678 /**
679  * Convert Ethernet item to Verbs specification.
680  *
681  * @param item[in]
682  *   Item specification.
683  * @param default_mask[in]
684  *   Default bit-masks to use when item->mask is not provided.
685  * @param data[in, out]
686  *   User structure.
687  */
688 static int
689 mlx5_flow_create_eth(const struct rte_flow_item *item,
690                      const void *default_mask,
691                      void *data)
692 {
693         const struct rte_flow_item_eth *spec = item->spec;
694         const struct rte_flow_item_eth *mask = item->mask;
695         struct mlx5_flow_parse *flow = (struct mlx5_flow_parse *)data;
696         struct ibv_flow_spec_eth *eth;
697         const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
698         unsigned int i;
699
700         ++flow->ibv_attr->num_of_specs;
701         flow->ibv_attr->priority = 2;
702         flow->hash_fields = 0;
703         eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
704         *eth = (struct ibv_flow_spec_eth) {
705                 .type = flow->inner | IBV_FLOW_SPEC_ETH,
706                 .size = eth_size,
707         };
708         if (!spec)
709                 return 0;
710         if (!mask)
711                 mask = default_mask;
712         memcpy(eth->val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
713         memcpy(eth->val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
714         eth->val.ether_type = spec->type;
715         memcpy(eth->mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
716         memcpy(eth->mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
717         eth->mask.ether_type = mask->type;
718         /* Remove unwanted bits from values. */
719         for (i = 0; i < ETHER_ADDR_LEN; ++i) {
720                 eth->val.dst_mac[i] &= eth->mask.dst_mac[i];
721                 eth->val.src_mac[i] &= eth->mask.src_mac[i];
722         }
723         eth->val.ether_type &= eth->mask.ether_type;
724         return 0;
725 }
726
727 /**
728  * Convert VLAN item to Verbs specification.
729  *
730  * @param item[in]
731  *   Item specification.
732  * @param default_mask[in]
733  *   Default bit-masks to use when item->mask is not provided.
734  * @param data[in, out]
735  *   User structure.
736  */
737 static int
738 mlx5_flow_create_vlan(const struct rte_flow_item *item,
739                       const void *default_mask,
740                       void *data)
741 {
742         const struct rte_flow_item_vlan *spec = item->spec;
743         const struct rte_flow_item_vlan *mask = item->mask;
744         struct mlx5_flow_parse *flow = (struct mlx5_flow_parse *)data;
745         struct ibv_flow_spec_eth *eth;
746         const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
747
748         eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset - eth_size);
749         if (!spec)
750                 return 0;
751         if (!mask)
752                 mask = default_mask;
753         eth->val.vlan_tag = spec->tci;
754         eth->mask.vlan_tag = mask->tci;
755         eth->val.vlan_tag &= eth->mask.vlan_tag;
756         return 0;
757 }
758
759 /**
760  * Convert IPv4 item to Verbs specification.
761  *
762  * @param item[in]
763  *   Item specification.
764  * @param default_mask[in]
765  *   Default bit-masks to use when item->mask is not provided.
766  * @param data[in, out]
767  *   User structure.
768  */
769 static int
770 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
771                       const void *default_mask,
772                       void *data)
773 {
774         const struct rte_flow_item_ipv4 *spec = item->spec;
775         const struct rte_flow_item_ipv4 *mask = item->mask;
776         struct mlx5_flow_parse *flow = (struct mlx5_flow_parse *)data;
777         struct ibv_flow_spec_ipv4_ext *ipv4;
778         unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4_ext);
779
780         ++flow->ibv_attr->num_of_specs;
781         flow->ibv_attr->priority = 1;
782         flow->hash_fields = (IBV_RX_HASH_SRC_IPV4 |
783                              IBV_RX_HASH_DST_IPV4);
784         ipv4 = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
785         *ipv4 = (struct ibv_flow_spec_ipv4_ext) {
786                 .type = flow->inner | IBV_FLOW_SPEC_IPV4_EXT,
787                 .size = ipv4_size,
788         };
789         if (!spec)
790                 return 0;
791         if (!mask)
792                 mask = default_mask;
793         ipv4->val = (struct ibv_flow_ipv4_ext_filter){
794                 .src_ip = spec->hdr.src_addr,
795                 .dst_ip = spec->hdr.dst_addr,
796                 .proto = spec->hdr.next_proto_id,
797                 .tos = spec->hdr.type_of_service,
798         };
799         ipv4->mask = (struct ibv_flow_ipv4_ext_filter){
800                 .src_ip = mask->hdr.src_addr,
801                 .dst_ip = mask->hdr.dst_addr,
802                 .proto = mask->hdr.next_proto_id,
803                 .tos = mask->hdr.type_of_service,
804         };
805         /* Remove unwanted bits from values. */
806         ipv4->val.src_ip &= ipv4->mask.src_ip;
807         ipv4->val.dst_ip &= ipv4->mask.dst_ip;
808         ipv4->val.proto &= ipv4->mask.proto;
809         ipv4->val.tos &= ipv4->mask.tos;
810         return 0;
811 }
812
813 /**
814  * Convert IPv6 item to Verbs specification.
815  *
816  * @param item[in]
817  *   Item specification.
818  * @param default_mask[in]
819  *   Default bit-masks to use when item->mask is not provided.
820  * @param data[in, out]
821  *   User structure.
822  */
823 static int
824 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
825                       const void *default_mask,
826                       void *data)
827 {
828         const struct rte_flow_item_ipv6 *spec = item->spec;
829         const struct rte_flow_item_ipv6 *mask = item->mask;
830         struct mlx5_flow_parse *flow = (struct mlx5_flow_parse *)data;
831         struct ibv_flow_spec_ipv6 *ipv6;
832         unsigned int ipv6_size = sizeof(struct ibv_flow_spec_ipv6);
833         unsigned int i;
834
835         ++flow->ibv_attr->num_of_specs;
836         flow->ibv_attr->priority = 1;
837         flow->hash_fields = (IBV_RX_HASH_SRC_IPV6 |
838                              IBV_RX_HASH_DST_IPV6);
839         ipv6 = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
840         *ipv6 = (struct ibv_flow_spec_ipv6) {
841                 .type = flow->inner | IBV_FLOW_SPEC_IPV6,
842                 .size = ipv6_size,
843         };
844         if (!spec)
845                 return 0;
846         if (!mask)
847                 mask = default_mask;
848         memcpy(ipv6->val.src_ip, spec->hdr.src_addr,
849                RTE_DIM(ipv6->val.src_ip));
850         memcpy(ipv6->val.dst_ip, spec->hdr.dst_addr,
851                RTE_DIM(ipv6->val.dst_ip));
852         memcpy(ipv6->mask.src_ip, mask->hdr.src_addr,
853                RTE_DIM(ipv6->mask.src_ip));
854         memcpy(ipv6->mask.dst_ip, mask->hdr.dst_addr,
855                RTE_DIM(ipv6->mask.dst_ip));
856         ipv6->mask.flow_label = mask->hdr.vtc_flow;
857         ipv6->mask.next_hdr = mask->hdr.proto;
858         ipv6->mask.hop_limit = mask->hdr.hop_limits;
859         /* Remove unwanted bits from values. */
860         for (i = 0; i < RTE_DIM(ipv6->val.src_ip); ++i) {
861                 ipv6->val.src_ip[i] &= ipv6->mask.src_ip[i];
862                 ipv6->val.dst_ip[i] &= ipv6->mask.dst_ip[i];
863         }
864         ipv6->val.flow_label &= ipv6->mask.flow_label;
865         ipv6->val.next_hdr &= ipv6->mask.next_hdr;
866         ipv6->val.hop_limit &= ipv6->mask.hop_limit;
867         return 0;
868 }
869
870 /**
871  * Convert UDP item to Verbs specification.
872  *
873  * @param item[in]
874  *   Item specification.
875  * @param default_mask[in]
876  *   Default bit-masks to use when item->mask is not provided.
877  * @param data[in, out]
878  *   User structure.
879  */
880 static int
881 mlx5_flow_create_udp(const struct rte_flow_item *item,
882                      const void *default_mask,
883                      void *data)
884 {
885         const struct rte_flow_item_udp *spec = item->spec;
886         const struct rte_flow_item_udp *mask = item->mask;
887         struct mlx5_flow_parse *flow = (struct mlx5_flow_parse *)data;
888         struct ibv_flow_spec_tcp_udp *udp;
889         unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
890
891         ++flow->ibv_attr->num_of_specs;
892         flow->ibv_attr->priority = 0;
893         flow->hash_fields |= (IBV_RX_HASH_SRC_PORT_UDP |
894                               IBV_RX_HASH_DST_PORT_UDP);
895         udp = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
896         *udp = (struct ibv_flow_spec_tcp_udp) {
897                 .type = flow->inner | IBV_FLOW_SPEC_UDP,
898                 .size = udp_size,
899         };
900         if (!spec)
901                 return 0;
902         if (!mask)
903                 mask = default_mask;
904         udp->val.dst_port = spec->hdr.dst_port;
905         udp->val.src_port = spec->hdr.src_port;
906         udp->mask.dst_port = mask->hdr.dst_port;
907         udp->mask.src_port = mask->hdr.src_port;
908         /* Remove unwanted bits from values. */
909         udp->val.src_port &= udp->mask.src_port;
910         udp->val.dst_port &= udp->mask.dst_port;
911         return 0;
912 }
913
914 /**
915  * Convert TCP item to Verbs specification.
916  *
917  * @param item[in]
918  *   Item specification.
919  * @param default_mask[in]
920  *   Default bit-masks to use when item->mask is not provided.
921  * @param data[in, out]
922  *   User structure.
923  */
924 static int
925 mlx5_flow_create_tcp(const struct rte_flow_item *item,
926                      const void *default_mask,
927                      void *data)
928 {
929         const struct rte_flow_item_tcp *spec = item->spec;
930         const struct rte_flow_item_tcp *mask = item->mask;
931         struct mlx5_flow_parse *flow = (struct mlx5_flow_parse *)data;
932         struct ibv_flow_spec_tcp_udp *tcp;
933         unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
934
935         ++flow->ibv_attr->num_of_specs;
936         flow->ibv_attr->priority = 0;
937         flow->hash_fields |= (IBV_RX_HASH_SRC_PORT_TCP |
938                               IBV_RX_HASH_DST_PORT_TCP);
939         tcp = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
940         *tcp = (struct ibv_flow_spec_tcp_udp) {
941                 .type = flow->inner | IBV_FLOW_SPEC_TCP,
942                 .size = tcp_size,
943         };
944         if (!spec)
945                 return 0;
946         if (!mask)
947                 mask = default_mask;
948         tcp->val.dst_port = spec->hdr.dst_port;
949         tcp->val.src_port = spec->hdr.src_port;
950         tcp->mask.dst_port = mask->hdr.dst_port;
951         tcp->mask.src_port = mask->hdr.src_port;
952         /* Remove unwanted bits from values. */
953         tcp->val.src_port &= tcp->mask.src_port;
954         tcp->val.dst_port &= tcp->mask.dst_port;
955         return 0;
956 }
957
958 /**
959  * Convert VXLAN item to Verbs specification.
960  *
961  * @param item[in]
962  *   Item specification.
963  * @param default_mask[in]
964  *   Default bit-masks to use when item->mask is not provided.
965  * @param data[in, out]
966  *   User structure.
967  */
968 static int
969 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
970                        const void *default_mask,
971                        void *data)
972 {
973         const struct rte_flow_item_vxlan *spec = item->spec;
974         const struct rte_flow_item_vxlan *mask = item->mask;
975         struct mlx5_flow_parse *flow = (struct mlx5_flow_parse *)data;
976         struct ibv_flow_spec_tunnel *vxlan;
977         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
978         union vni {
979                 uint32_t vlan_id;
980                 uint8_t vni[4];
981         } id;
982
983         ++flow->ibv_attr->num_of_specs;
984         flow->ibv_attr->priority = 0;
985         id.vni[0] = 0;
986         vxlan = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
987         *vxlan = (struct ibv_flow_spec_tunnel) {
988                 .type = flow->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
989                 .size = size,
990         };
991         flow->inner = IBV_FLOW_SPEC_INNER;
992         if (!spec)
993                 return 0;
994         if (!mask)
995                 mask = default_mask;
996         memcpy(&id.vni[1], spec->vni, 3);
997         vxlan->val.tunnel_id = id.vlan_id;
998         memcpy(&id.vni[1], mask->vni, 3);
999         vxlan->mask.tunnel_id = id.vlan_id;
1000         /* Remove unwanted bits from values. */
1001         vxlan->val.tunnel_id &= vxlan->mask.tunnel_id;
1002         return 0;
1003 }
1004
1005 /**
1006  * Convert mark/flag action to Verbs specification.
1007  *
1008  * @param flow
1009  *   Pointer to MLX5 flow structure.
1010  * @param mark_id
1011  *   Mark identifier.
1012  */
1013 static int
1014 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *flow, uint32_t mark_id)
1015 {
1016         struct ibv_flow_spec_action_tag *tag;
1017         unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1018
1019         tag = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
1020         *tag = (struct ibv_flow_spec_action_tag){
1021                 .type = IBV_FLOW_SPEC_ACTION_TAG,
1022                 .size = size,
1023                 .tag_id = mlx5_flow_mark_set(mark_id),
1024         };
1025         ++flow->ibv_attr->num_of_specs;
1026         return 0;
1027 }
1028
1029 /**
1030  * Complete flow rule creation with a drop queue.
1031  *
1032  * @param priv
1033  *   Pointer to private structure.
1034  * @param flow
1035  *   MLX5 flow attributes (filled by mlx5_flow_validate()).
1036  * @param[out] error
1037  *   Perform verbose error reporting if not NULL.
1038  *
1039  * @return
1040  *   A flow if the rule could be created.
1041  */
1042 static struct rte_flow *
1043 priv_flow_create_action_queue_drop(struct priv *priv,
1044                                    struct mlx5_flow_parse *flow,
1045                                    struct rte_flow_error *error)
1046 {
1047         struct rte_flow *rte_flow;
1048         struct ibv_flow_spec_action_drop *drop;
1049         unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
1050
1051         assert(priv->pd);
1052         assert(priv->ctx);
1053         rte_flow = rte_calloc(__func__, 1, sizeof(*rte_flow), 0);
1054         if (!rte_flow) {
1055                 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1056                                    NULL, "cannot allocate flow memory");
1057                 return NULL;
1058         }
1059         rte_flow->drop = 1;
1060         drop = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
1061         *drop = (struct ibv_flow_spec_action_drop){
1062                         .type = IBV_FLOW_SPEC_ACTION_DROP,
1063                         .size = size,
1064         };
1065         ++flow->ibv_attr->num_of_specs;
1066         flow->offset += sizeof(struct ibv_flow_spec_action_drop);
1067         rte_flow->ibv_attr = flow->ibv_attr;
1068         if (!priv->dev->data->dev_started)
1069                 return rte_flow;
1070         rte_flow->drxq.hrxq.qp = priv->flow_drop_queue->qp;
1071         rte_flow->ibv_flow = ibv_create_flow(rte_flow->drxq.hrxq.qp,
1072                                              rte_flow->ibv_attr);
1073         if (!rte_flow->ibv_flow) {
1074                 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1075                                    NULL, "flow rule creation failure");
1076                 goto error;
1077         }
1078         return rte_flow;
1079 error:
1080         assert(rte_flow);
1081         rte_free(rte_flow);
1082         return NULL;
1083 }
1084
1085 /**
1086  * Complete flow rule creation.
1087  *
1088  * @param priv
1089  *   Pointer to private structure.
1090  * @param flow
1091  *   MLX5 flow attributes (filled by mlx5_flow_validate()).
1092  * @param[out] error
1093  *   Perform verbose error reporting if not NULL.
1094  *
1095  * @return
1096  *   A flow if the rule could be created.
1097  */
1098 static struct rte_flow *
1099 priv_flow_create_action_queue(struct priv *priv,
1100                               struct mlx5_flow_parse *flow,
1101                               struct rte_flow_error *error)
1102 {
1103         struct rte_flow *rte_flow;
1104         unsigned int i;
1105
1106         assert(priv->pd);
1107         assert(priv->ctx);
1108         assert(!flow->actions.drop);
1109         rte_flow =
1110                 rte_calloc(__func__, 1,
1111                            sizeof(*flow) +
1112                            flow->actions.queues_n * sizeof(uint16_t),
1113                            0);
1114         if (!rte_flow) {
1115                 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1116                                    NULL, "cannot allocate flow memory");
1117                 return NULL;
1118         }
1119         rte_flow->mark = flow->actions.mark;
1120         rte_flow->ibv_attr = flow->ibv_attr;
1121         rte_flow->queues = (uint16_t (*)[])(rte_flow + 1);
1122         memcpy(rte_flow->queues, flow->actions.queues,
1123                flow->actions.queues_n * sizeof(uint16_t));
1124         rte_flow->queues_n = flow->actions.queues_n;
1125         rte_flow->frxq.hash_fields = flow->hash_fields;
1126         rte_flow->frxq.hrxq = mlx5_priv_hrxq_get(priv, rss_hash_default_key,
1127                                                  rss_hash_default_key_len,
1128                                                  flow->hash_fields,
1129                                                  (*rte_flow->queues),
1130                                                  rte_flow->queues_n);
1131         if (!rte_flow->frxq.hrxq) {
1132                 rte_flow->frxq.hrxq =
1133                         mlx5_priv_hrxq_new(priv, rss_hash_default_key,
1134                                            rss_hash_default_key_len,
1135                                            flow->hash_fields,
1136                                            (*rte_flow->queues),
1137                                            rte_flow->queues_n);
1138                 if (!rte_flow->frxq.hrxq) {
1139                         rte_flow_error_set(error, ENOMEM,
1140                                            RTE_FLOW_ERROR_TYPE_HANDLE,
1141                                            NULL, "cannot create hash rxq");
1142                         goto error;
1143                 }
1144         }
1145         for (i = 0; i != flow->actions.queues_n; ++i) {
1146                 struct mlx5_rxq_data *q =
1147                         (*priv->rxqs)[flow->actions.queues[i]];
1148
1149                 q->mark |= flow->actions.mark;
1150         }
1151         if (!priv->dev->data->dev_started)
1152                 return rte_flow;
1153         rte_flow->ibv_flow = ibv_create_flow(rte_flow->frxq.hrxq->qp,
1154                                              rte_flow->ibv_attr);
1155         if (!rte_flow->ibv_flow) {
1156                 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1157                                    NULL, "flow rule creation failure");
1158                 goto error;
1159         }
1160         return rte_flow;
1161 error:
1162         assert(rte_flow);
1163         if (rte_flow->frxq.hrxq)
1164                 mlx5_priv_hrxq_release(priv, rte_flow->frxq.hrxq);
1165         rte_free(rte_flow);
1166         return NULL;
1167 }
1168
1169 /**
1170  * Convert a flow.
1171  *
1172  * @param priv
1173  *   Pointer to private structure.
1174  * @param list
1175  *   Pointer to a TAILQ flow list.
1176  * @param[in] attr
1177  *   Flow rule attributes.
1178  * @param[in] pattern
1179  *   Pattern specification (list terminated by the END pattern item).
1180  * @param[in] actions
1181  *   Associated actions (list terminated by the END action).
1182  * @param[out] error
1183  *   Perform verbose error reporting if not NULL.
1184  *
1185  * @return
1186  *   A flow on success, NULL otherwise.
1187  */
1188 static struct rte_flow *
1189 priv_flow_create(struct priv *priv,
1190                  struct mlx5_flows *list,
1191                  const struct rte_flow_attr *attr,
1192                  const struct rte_flow_item items[],
1193                  const struct rte_flow_action actions[],
1194                  struct rte_flow_error *error)
1195 {
1196         struct rte_flow *rte_flow;
1197         struct mlx5_flow_parse flow = {
1198                 .offset = sizeof(struct ibv_flow_attr),
1199                 .actions = {
1200                         .mark_id = MLX5_FLOW_MARK_DEFAULT,
1201                         .queues = { 0 },
1202                         .queues_n = 0,
1203                 },
1204         };
1205         int err;
1206
1207         err = priv_flow_validate(priv, attr, items, actions, error, &flow);
1208         if (err)
1209                 goto exit;
1210         flow.ibv_attr = rte_malloc(__func__, flow.offset, 0);
1211         flow.offset = sizeof(struct ibv_flow_attr);
1212         if (!flow.ibv_attr) {
1213                 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1214                                    NULL, "cannot allocate ibv_attr memory");
1215                 goto exit;
1216         }
1217         *flow.ibv_attr = (struct ibv_flow_attr){
1218                 .type = IBV_FLOW_ATTR_NORMAL,
1219                 .size = sizeof(struct ibv_flow_attr),
1220                 .priority = attr->priority,
1221                 .num_of_specs = 0,
1222                 .port = 0,
1223                 .flags = 0,
1224         };
1225         flow.inner = 0;
1226         flow.hash_fields = 0;
1227         claim_zero(priv_flow_validate(priv, attr, items, actions,
1228                                       error, &flow));
1229         if (flow.actions.mark && !flow.actions.drop) {
1230                 mlx5_flow_create_flag_mark(&flow, flow.actions.mark_id);
1231                 flow.offset += sizeof(struct ibv_flow_spec_action_tag);
1232         }
1233         if (flow.actions.drop)
1234                 rte_flow =
1235                         priv_flow_create_action_queue_drop(priv, &flow, error);
1236         else
1237                 rte_flow = priv_flow_create_action_queue(priv, &flow, error);
1238         if (!rte_flow)
1239                 goto exit;
1240         if (rte_flow) {
1241                 TAILQ_INSERT_TAIL(list, rte_flow, next);
1242                 DEBUG("Flow created %p", (void *)rte_flow);
1243         }
1244         return rte_flow;
1245 exit:
1246         rte_free(flow.ibv_attr);
1247         return NULL;
1248 }
1249
1250 /**
1251  * Create a flow.
1252  *
1253  * @see rte_flow_create()
1254  * @see rte_flow_ops
1255  */
1256 struct rte_flow *
1257 mlx5_flow_create(struct rte_eth_dev *dev,
1258                  const struct rte_flow_attr *attr,
1259                  const struct rte_flow_item items[],
1260                  const struct rte_flow_action actions[],
1261                  struct rte_flow_error *error)
1262 {
1263         struct priv *priv = dev->data->dev_private;
1264         struct rte_flow *flow;
1265
1266         priv_lock(priv);
1267         flow = priv_flow_create(priv, &priv->flows, attr, items, actions,
1268                                 error);
1269         priv_unlock(priv);
1270         return flow;
1271 }
1272
1273 /**
1274  * Destroy a flow.
1275  *
1276  * @param priv
1277  *   Pointer to private structure.
1278  * @param list
1279  *   Pointer to a TAILQ flow list.
1280  * @param[in] flow
1281  *   Flow to destroy.
1282  */
1283 static void
1284 priv_flow_destroy(struct priv *priv,
1285                   struct mlx5_flows *list,
1286                   struct rte_flow *flow)
1287 {
1288         unsigned int i;
1289         uint16_t *queues;
1290         uint16_t queues_n;
1291
1292         if (flow->drop || !flow->mark)
1293                 goto free;
1294         queues = flow->frxq.hrxq->ind_table->queues;
1295         queues_n = flow->frxq.hrxq->ind_table->queues_n;
1296         for (i = 0; i != queues_n; ++i) {
1297                 struct rte_flow *tmp;
1298                 struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[queues[i]];
1299                 int mark = 0;
1300
1301                 /*
1302                  * To remove the mark from the queue, the queue must not be
1303                  * present in any other marked flow (RSS or not).
1304                  */
1305                 TAILQ_FOREACH(tmp, list, next) {
1306                         unsigned int j;
1307
1308                         if (!tmp->mark)
1309                                 continue;
1310                         for (j = 0;
1311                              (j != tmp->frxq.hrxq->ind_table->queues_n) &&
1312                              !mark;
1313                              j++)
1314                                 if (tmp->frxq.hrxq->ind_table->queues[j] ==
1315                                     queues[i])
1316                                         mark = 1;
1317                 }
1318                 rxq_data->mark = mark;
1319         }
1320 free:
1321         if (flow->ibv_flow)
1322                 claim_zero(ibv_destroy_flow(flow->ibv_flow));
1323         if (!flow->drop)
1324                 mlx5_priv_hrxq_release(priv, flow->frxq.hrxq);
1325         TAILQ_REMOVE(list, flow, next);
1326         rte_free(flow->ibv_attr);
1327         DEBUG("Flow destroyed %p", (void *)flow);
1328         rte_free(flow);
1329 }
1330
1331 /**
1332  * Destroy a flow.
1333  *
1334  * @see rte_flow_destroy()
1335  * @see rte_flow_ops
1336  */
1337 int
1338 mlx5_flow_destroy(struct rte_eth_dev *dev,
1339                   struct rte_flow *flow,
1340                   struct rte_flow_error *error)
1341 {
1342         struct priv *priv = dev->data->dev_private;
1343
1344         (void)error;
1345         priv_lock(priv);
1346         priv_flow_destroy(priv, &priv->flows, flow);
1347         priv_unlock(priv);
1348         return 0;
1349 }
1350
1351 /**
1352  * Destroy all flows.
1353  *
1354  * @param priv
1355  *   Pointer to private structure.
1356  * @param list
1357  *   Pointer to a TAILQ flow list.
1358  */
1359 void
1360 priv_flow_flush(struct priv *priv, struct mlx5_flows *list)
1361 {
1362         while (!TAILQ_EMPTY(list)) {
1363                 struct rte_flow *flow;
1364
1365                 flow = TAILQ_FIRST(list);
1366                 priv_flow_destroy(priv, list, flow);
1367         }
1368 }
1369
1370 /**
1371  * Destroy all flows.
1372  *
1373  * @see rte_flow_flush()
1374  * @see rte_flow_ops
1375  */
1376 int
1377 mlx5_flow_flush(struct rte_eth_dev *dev,
1378                 struct rte_flow_error *error)
1379 {
1380         struct priv *priv = dev->data->dev_private;
1381
1382         (void)error;
1383         priv_lock(priv);
1384         priv_flow_flush(priv, &priv->flows);
1385         priv_unlock(priv);
1386         return 0;
1387 }
1388
1389 /**
1390  * Create drop queue.
1391  *
1392  * @param priv
1393  *   Pointer to private structure.
1394  *
1395  * @return
1396  *   0 on success.
1397  */
1398 int
1399 priv_flow_create_drop_queue(struct priv *priv)
1400 {
1401         struct mlx5_hrxq_drop *fdq = NULL;
1402
1403         assert(priv->pd);
1404         assert(priv->ctx);
1405         fdq = rte_calloc(__func__, 1, sizeof(*fdq), 0);
1406         if (!fdq) {
1407                 WARN("cannot allocate memory for drop queue");
1408                 goto error;
1409         }
1410         fdq->cq = ibv_create_cq(priv->ctx, 1, NULL, NULL, 0);
1411         if (!fdq->cq) {
1412                 WARN("cannot allocate CQ for drop queue");
1413                 goto error;
1414         }
1415         fdq->wq = ibv_create_wq(priv->ctx,
1416                         &(struct ibv_wq_init_attr){
1417                         .wq_type = IBV_WQT_RQ,
1418                         .max_wr = 1,
1419                         .max_sge = 1,
1420                         .pd = priv->pd,
1421                         .cq = fdq->cq,
1422                         });
1423         if (!fdq->wq) {
1424                 WARN("cannot allocate WQ for drop queue");
1425                 goto error;
1426         }
1427         fdq->ind_table = ibv_create_rwq_ind_table(priv->ctx,
1428                         &(struct ibv_rwq_ind_table_init_attr){
1429                         .log_ind_tbl_size = 0,
1430                         .ind_tbl = &fdq->wq,
1431                         .comp_mask = 0,
1432                         });
1433         if (!fdq->ind_table) {
1434                 WARN("cannot allocate indirection table for drop queue");
1435                 goto error;
1436         }
1437         fdq->qp = ibv_create_qp_ex(priv->ctx,
1438                 &(struct ibv_qp_init_attr_ex){
1439                         .qp_type = IBV_QPT_RAW_PACKET,
1440                         .comp_mask =
1441                                 IBV_QP_INIT_ATTR_PD |
1442                                 IBV_QP_INIT_ATTR_IND_TABLE |
1443                                 IBV_QP_INIT_ATTR_RX_HASH,
1444                         .rx_hash_conf = (struct ibv_rx_hash_conf){
1445                                 .rx_hash_function =
1446                                         IBV_RX_HASH_FUNC_TOEPLITZ,
1447                                 .rx_hash_key_len = rss_hash_default_key_len,
1448                                 .rx_hash_key = rss_hash_default_key,
1449                                 .rx_hash_fields_mask = 0,
1450                                 },
1451                         .rwq_ind_tbl = fdq->ind_table,
1452                         .pd = priv->pd
1453                 });
1454         if (!fdq->qp) {
1455                 WARN("cannot allocate QP for drop queue");
1456                 goto error;
1457         }
1458         priv->flow_drop_queue = fdq;
1459         return 0;
1460 error:
1461         if (fdq->qp)
1462                 claim_zero(ibv_destroy_qp(fdq->qp));
1463         if (fdq->ind_table)
1464                 claim_zero(ibv_destroy_rwq_ind_table(fdq->ind_table));
1465         if (fdq->wq)
1466                 claim_zero(ibv_destroy_wq(fdq->wq));
1467         if (fdq->cq)
1468                 claim_zero(ibv_destroy_cq(fdq->cq));
1469         if (fdq)
1470                 rte_free(fdq);
1471         priv->flow_drop_queue = NULL;
1472         return -1;
1473 }
1474
1475 /**
1476  * Delete drop queue.
1477  *
1478  * @param priv
1479  *   Pointer to private structure.
1480  */
1481 void
1482 priv_flow_delete_drop_queue(struct priv *priv)
1483 {
1484         struct mlx5_hrxq_drop *fdq = priv->flow_drop_queue;
1485
1486         if (!fdq)
1487                 return;
1488         if (fdq->qp)
1489                 claim_zero(ibv_destroy_qp(fdq->qp));
1490         if (fdq->ind_table)
1491                 claim_zero(ibv_destroy_rwq_ind_table(fdq->ind_table));
1492         if (fdq->wq)
1493                 claim_zero(ibv_destroy_wq(fdq->wq));
1494         if (fdq->cq)
1495                 claim_zero(ibv_destroy_cq(fdq->cq));
1496         rte_free(fdq);
1497         priv->flow_drop_queue = NULL;
1498 }
1499
1500 /**
1501  * Remove all flows.
1502  *
1503  * @param priv
1504  *   Pointer to private structure.
1505  * @param list
1506  *   Pointer to a TAILQ flow list.
1507  */
1508 void
1509 priv_flow_stop(struct priv *priv, struct mlx5_flows *list)
1510 {
1511         struct rte_flow *flow;
1512
1513         TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {
1514                 claim_zero(ibv_destroy_flow(flow->ibv_flow));
1515                 flow->ibv_flow = NULL;
1516                 mlx5_priv_hrxq_release(priv, flow->frxq.hrxq);
1517                 flow->frxq.hrxq = NULL;
1518                 if (flow->mark) {
1519                         unsigned int n;
1520                         struct mlx5_ind_table_ibv *ind_tbl =
1521                                 flow->frxq.hrxq->ind_table;
1522
1523                         for (n = 0; n < ind_tbl->queues_n; ++n)
1524                                 (*priv->rxqs)[ind_tbl->queues[n]]->mark = 0;
1525                 }
1526                 DEBUG("Flow %p removed", (void *)flow);
1527         }
1528 }
1529
1530 /**
1531  * Add all flows.
1532  *
1533  * @param priv
1534  *   Pointer to private structure.
1535  * @param list
1536  *   Pointer to a TAILQ flow list.
1537  *
1538  * @return
1539  *   0 on success, a errno value otherwise and rte_errno is set.
1540  */
1541 int
1542 priv_flow_start(struct priv *priv, struct mlx5_flows *list)
1543 {
1544         struct rte_flow *flow;
1545
1546         TAILQ_FOREACH(flow, list, next) {
1547                 if (flow->frxq.hrxq)
1548                         goto flow_create;
1549                 flow->frxq.hrxq =
1550                         mlx5_priv_hrxq_get(priv, rss_hash_default_key,
1551                                            rss_hash_default_key_len,
1552                                            flow->frxq.hash_fields,
1553                                            (*flow->queues),
1554                                            flow->queues_n);
1555                 if (flow->frxq.hrxq)
1556                         goto flow_create;
1557                 flow->frxq.hrxq =
1558                         mlx5_priv_hrxq_new(priv, rss_hash_default_key,
1559                                            rss_hash_default_key_len,
1560                                            flow->frxq.hash_fields,
1561                                            (*flow->queues),
1562                                            flow->queues_n);
1563                 if (!flow->frxq.hrxq) {
1564                         DEBUG("Flow %p cannot be applied",
1565                               (void *)flow);
1566                         rte_errno = EINVAL;
1567                         return rte_errno;
1568                 }
1569 flow_create:
1570                 flow->ibv_flow = ibv_create_flow(flow->frxq.hrxq->qp,
1571                                                  flow->ibv_attr);
1572                 if (!flow->ibv_flow) {
1573                         DEBUG("Flow %p cannot be applied", (void *)flow);
1574                         rte_errno = EINVAL;
1575                         return rte_errno;
1576                 }
1577                 DEBUG("Flow %p applied", (void *)flow);
1578                 if (flow->mark) {
1579                         unsigned int n;
1580
1581                         for (n = 0;
1582                              n < flow->frxq.hrxq->ind_table->queues_n;
1583                              ++n) {
1584                                 uint16_t idx =
1585                                         flow->frxq.hrxq->ind_table->queues[n];
1586                                 (*priv->rxqs)[idx]->mark = 1;
1587                         }
1588                 }
1589         }
1590         return 0;
1591 }
1592
1593 /**
1594  * Isolated mode.
1595  *
1596  * @see rte_flow_isolate()
1597  * @see rte_flow_ops
1598  */
1599 int
1600 mlx5_flow_isolate(struct rte_eth_dev *dev,
1601                   int enable,
1602                   struct rte_flow_error *error)
1603 {
1604         struct priv *priv = dev->data->dev_private;
1605
1606         priv_lock(priv);
1607         if (dev->data->dev_started) {
1608                 rte_flow_error_set(error, EBUSY,
1609                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1610                                    NULL,
1611                                    "port must be stopped first");
1612                 priv_unlock(priv);
1613                 return -rte_errno;
1614         }
1615         priv->isolated = !!enable;
1616         priv_unlock(priv);
1617         return 0;
1618 }
1619
1620 /**
1621  * Verify the flow list is empty
1622  *
1623  * @param priv
1624  *  Pointer to private structure.
1625  *
1626  * @return the number of flows not released.
1627  */
1628 int
1629 priv_flow_verify(struct priv *priv)
1630 {
1631         struct rte_flow *flow;
1632         int ret = 0;
1633
1634         TAILQ_FOREACH(flow, &priv->flows, next) {
1635                 DEBUG("%p: flow %p still referenced", (void *)priv,
1636                       (void *)flow);
1637                 ++ret;
1638         }
1639         return ret;
1640 }
1641
1642 /**
1643  * Enable a control flow configured from the control plane.
1644  *
1645  * @param dev
1646  *   Pointer to Ethernet device.
1647  * @param eth_spec
1648  *   An Ethernet flow spec to apply.
1649  * @param eth_mask
1650  *   An Ethernet flow mask to apply.
1651  * @param vlan_spec
1652  *   A VLAN flow spec to apply.
1653  * @param vlan_mask
1654  *   A VLAN flow mask to apply.
1655  *
1656  * @return
1657  *   0 on success.
1658  */
1659 int
1660 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
1661                     struct rte_flow_item_eth *eth_spec,
1662                     struct rte_flow_item_eth *eth_mask,
1663                     struct rte_flow_item_vlan *vlan_spec,
1664                     struct rte_flow_item_vlan *vlan_mask)
1665 {
1666         struct priv *priv = dev->data->dev_private;
1667         const struct rte_flow_attr attr = {
1668                 .ingress = 1,
1669                 .priority = MLX5_CTRL_FLOW_PRIORITY,
1670         };
1671         struct rte_flow_item items[] = {
1672                 {
1673                         .type = RTE_FLOW_ITEM_TYPE_ETH,
1674                         .spec = eth_spec,
1675                         .last = NULL,
1676                         .mask = eth_mask,
1677                 },
1678                 {
1679                         .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
1680                                 RTE_FLOW_ITEM_TYPE_END,
1681                         .spec = vlan_spec,
1682                         .last = NULL,
1683                         .mask = vlan_mask,
1684                 },
1685                 {
1686                         .type = RTE_FLOW_ITEM_TYPE_END,
1687                 },
1688         };
1689         struct rte_flow_action actions[] = {
1690                 {
1691                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
1692                         .conf = &(struct rte_flow_action_queue){
1693                                 .index = 0,
1694                         },
1695                 },
1696                 {
1697                         .type = RTE_FLOW_ACTION_TYPE_END,
1698                 },
1699         };
1700         struct rte_flow *flow;
1701         struct rte_flow_error error;
1702
1703         flow = priv_flow_create(priv, &priv->ctrl_flows, &attr, items, actions,
1704                                 &error);
1705         if (!flow)
1706                 return rte_errno;
1707         return 0;
1708 }
1709
1710 /**
1711  * Enable a flow control configured from the control plane.
1712  *
1713  * @param dev
1714  *   Pointer to Ethernet device.
1715  * @param eth_spec
1716  *   An Ethernet flow spec to apply.
1717  * @param eth_mask
1718  *   An Ethernet flow mask to apply.
1719  *
1720  * @return
1721  *   0 on success.
1722  */
1723 int
1724 mlx5_ctrl_flow(struct rte_eth_dev *dev,
1725                struct rte_flow_item_eth *eth_spec,
1726                struct rte_flow_item_eth *eth_mask)
1727 {
1728         return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
1729 }