494888284adc792794f003572a2bba06d8a44d92
[dpdk.git] / drivers / net / mlx5 / mlx5_flow.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright 2016 6WIND S.A.
5  *   Copyright 2016 Mellanox.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of 6WIND S.A. nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <sys/queue.h>
35 #include <string.h>
36
37 /* Verbs header. */
38 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
39 #ifdef PEDANTIC
40 #pragma GCC diagnostic ignored "-Wpedantic"
41 #endif
42 #include <infiniband/verbs.h>
43 #ifdef PEDANTIC
44 #pragma GCC diagnostic error "-Wpedantic"
45 #endif
46
47 #include <rte_ethdev.h>
48 #include <rte_flow.h>
49 #include <rte_flow_driver.h>
50 #include <rte_malloc.h>
51
52 #include "mlx5.h"
53 #include "mlx5_prm.h"
54
55 static int
56 mlx5_flow_create_eth(const struct rte_flow_item *item,
57                      const void *default_mask,
58                      void *data);
59
60 static int
61 mlx5_flow_create_vlan(const struct rte_flow_item *item,
62                       const void *default_mask,
63                       void *data);
64
65 static int
66 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
67                       const void *default_mask,
68                       void *data);
69
70 static int
71 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
72                       const void *default_mask,
73                       void *data);
74
75 static int
76 mlx5_flow_create_udp(const struct rte_flow_item *item,
77                      const void *default_mask,
78                      void *data);
79
80 static int
81 mlx5_flow_create_tcp(const struct rte_flow_item *item,
82                      const void *default_mask,
83                      void *data);
84
85 static int
86 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
87                        const void *default_mask,
88                        void *data);
89
90 /** Structure for Drop queue. */
91 struct mlx5_hrxq_drop {
92         struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
93         struct ibv_qp *qp; /**< Verbs queue pair. */
94         struct ibv_wq *wq; /**< Verbs work queue. */
95         struct ibv_cq *cq; /**< Verbs completion queue. */
96 };
97
98 /* Flows structures. */
99 struct mlx5_flow {
100         uint64_t hash_fields; /**< Fields that participate in the hash. */
101         struct mlx5_hrxq *hrxq; /**< Hash Rx queues. */
102 };
103
104 /* Drop flows structures. */
105 struct mlx5_flow_drop {
106         struct mlx5_hrxq_drop hrxq; /**< Drop hash Rx queue. */
107 };
108
109 struct rte_flow {
110         TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
111         uint32_t mark:1; /**< Set if the flow is marked. */
112         uint32_t drop:1; /**< Drop queue. */
113         struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
114         struct ibv_flow *ibv_flow; /**< Verbs flow. */
115         uint16_t queues_n; /**< Number of entries in queue[]. */
116         uint16_t (*queues)[]; /**< Queues indexes to use. */
117         union {
118                 struct mlx5_flow frxq; /**< Flow with Rx queue. */
119                 struct mlx5_flow_drop drxq; /**< Flow with drop Rx queue. */
120         };
121 };
122
123 /** Static initializer for items. */
124 #define ITEMS(...) \
125         (const enum rte_flow_item_type []){ \
126                 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
127         }
128
129 /** Structure to generate a simple graph of layers supported by the NIC. */
130 struct mlx5_flow_items {
131         /** List of possible actions for these items. */
132         const enum rte_flow_action_type *const actions;
133         /** Bit-masks corresponding to the possibilities for the item. */
134         const void *mask;
135         /**
136          * Default bit-masks to use when item->mask is not provided. When
137          * \default_mask is also NULL, the full supported bit-mask (\mask) is
138          * used instead.
139          */
140         const void *default_mask;
141         /** Bit-masks size in bytes. */
142         const unsigned int mask_sz;
143         /**
144          * Conversion function from rte_flow to NIC specific flow.
145          *
146          * @param item
147          *   rte_flow item to convert.
148          * @param default_mask
149          *   Default bit-masks to use when item->mask is not provided.
150          * @param data
151          *   Internal structure to store the conversion.
152          *
153          * @return
154          *   0 on success, negative value otherwise.
155          */
156         int (*convert)(const struct rte_flow_item *item,
157                        const void *default_mask,
158                        void *data);
159         /** Size in bytes of the destination structure. */
160         const unsigned int dst_sz;
161         /** List of possible following items.  */
162         const enum rte_flow_item_type *const items;
163 };
164
165 /** Valid action for this PMD. */
166 static const enum rte_flow_action_type valid_actions[] = {
167         RTE_FLOW_ACTION_TYPE_DROP,
168         RTE_FLOW_ACTION_TYPE_QUEUE,
169         RTE_FLOW_ACTION_TYPE_MARK,
170         RTE_FLOW_ACTION_TYPE_FLAG,
171         RTE_FLOW_ACTION_TYPE_END,
172 };
173
174 /** Graph of supported items and associated actions. */
175 static const struct mlx5_flow_items mlx5_flow_items[] = {
176         [RTE_FLOW_ITEM_TYPE_END] = {
177                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
178                                RTE_FLOW_ITEM_TYPE_VXLAN),
179         },
180         [RTE_FLOW_ITEM_TYPE_ETH] = {
181                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
182                                RTE_FLOW_ITEM_TYPE_IPV4,
183                                RTE_FLOW_ITEM_TYPE_IPV6),
184                 .actions = valid_actions,
185                 .mask = &(const struct rte_flow_item_eth){
186                         .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
187                         .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
188                         .type = -1,
189                 },
190                 .default_mask = &rte_flow_item_eth_mask,
191                 .mask_sz = sizeof(struct rte_flow_item_eth),
192                 .convert = mlx5_flow_create_eth,
193                 .dst_sz = sizeof(struct ibv_flow_spec_eth),
194         },
195         [RTE_FLOW_ITEM_TYPE_VLAN] = {
196                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
197                                RTE_FLOW_ITEM_TYPE_IPV6),
198                 .actions = valid_actions,
199                 .mask = &(const struct rte_flow_item_vlan){
200                         .tci = -1,
201                 },
202                 .default_mask = &rte_flow_item_vlan_mask,
203                 .mask_sz = sizeof(struct rte_flow_item_vlan),
204                 .convert = mlx5_flow_create_vlan,
205                 .dst_sz = 0,
206         },
207         [RTE_FLOW_ITEM_TYPE_IPV4] = {
208                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
209                                RTE_FLOW_ITEM_TYPE_TCP),
210                 .actions = valid_actions,
211                 .mask = &(const struct rte_flow_item_ipv4){
212                         .hdr = {
213                                 .src_addr = -1,
214                                 .dst_addr = -1,
215                                 .type_of_service = -1,
216                                 .next_proto_id = -1,
217                         },
218                 },
219                 .default_mask = &rte_flow_item_ipv4_mask,
220                 .mask_sz = sizeof(struct rte_flow_item_ipv4),
221                 .convert = mlx5_flow_create_ipv4,
222                 .dst_sz = sizeof(struct ibv_flow_spec_ipv4),
223         },
224         [RTE_FLOW_ITEM_TYPE_IPV6] = {
225                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
226                                RTE_FLOW_ITEM_TYPE_TCP),
227                 .actions = valid_actions,
228                 .mask = &(const struct rte_flow_item_ipv6){
229                         .hdr = {
230                                 .src_addr = {
231                                         0xff, 0xff, 0xff, 0xff,
232                                         0xff, 0xff, 0xff, 0xff,
233                                         0xff, 0xff, 0xff, 0xff,
234                                         0xff, 0xff, 0xff, 0xff,
235                                 },
236                                 .dst_addr = {
237                                         0xff, 0xff, 0xff, 0xff,
238                                         0xff, 0xff, 0xff, 0xff,
239                                         0xff, 0xff, 0xff, 0xff,
240                                         0xff, 0xff, 0xff, 0xff,
241                                 },
242                                 .vtc_flow = -1,
243                                 .proto = -1,
244                                 .hop_limits = -1,
245                         },
246                 },
247                 .default_mask = &rte_flow_item_ipv6_mask,
248                 .mask_sz = sizeof(struct rte_flow_item_ipv6),
249                 .convert = mlx5_flow_create_ipv6,
250                 .dst_sz = sizeof(struct ibv_flow_spec_ipv6),
251         },
252         [RTE_FLOW_ITEM_TYPE_UDP] = {
253                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN),
254                 .actions = valid_actions,
255                 .mask = &(const struct rte_flow_item_udp){
256                         .hdr = {
257                                 .src_port = -1,
258                                 .dst_port = -1,
259                         },
260                 },
261                 .default_mask = &rte_flow_item_udp_mask,
262                 .mask_sz = sizeof(struct rte_flow_item_udp),
263                 .convert = mlx5_flow_create_udp,
264                 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
265         },
266         [RTE_FLOW_ITEM_TYPE_TCP] = {
267                 .actions = valid_actions,
268                 .mask = &(const struct rte_flow_item_tcp){
269                         .hdr = {
270                                 .src_port = -1,
271                                 .dst_port = -1,
272                         },
273                 },
274                 .default_mask = &rte_flow_item_tcp_mask,
275                 .mask_sz = sizeof(struct rte_flow_item_tcp),
276                 .convert = mlx5_flow_create_tcp,
277                 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
278         },
279         [RTE_FLOW_ITEM_TYPE_VXLAN] = {
280                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
281                 .actions = valid_actions,
282                 .mask = &(const struct rte_flow_item_vxlan){
283                         .vni = "\xff\xff\xff",
284                 },
285                 .default_mask = &rte_flow_item_vxlan_mask,
286                 .mask_sz = sizeof(struct rte_flow_item_vxlan),
287                 .convert = mlx5_flow_create_vxlan,
288                 .dst_sz = sizeof(struct ibv_flow_spec_tunnel),
289         },
290 };
291
292 /* Structure to parse actions. */
293 struct mlx5_flow_action {
294         uint32_t queue:1; /**< Target is a receive queue. */
295         uint32_t drop:1; /**< Target is a drop queue. */
296         uint32_t mark:1; /**< Mark is present in the flow. */
297         uint32_t mark_id; /**< Mark identifier. */
298         uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
299         uint16_t queues_n; /**< Number of entries in queue[]. */
300 };
301
302 /** Structure to pass to the conversion function. */
303 struct mlx5_flow_parse {
304         struct ibv_flow_attr *ibv_attr; /**< Verbs attribute. */
305         unsigned int offset; /**< Offset in bytes in the ibv_attr buffer. */
306         uint32_t inner; /**< Set once VXLAN is encountered. */
307         uint64_t hash_fields; /**< Fields that participate in the hash. */
308         struct mlx5_flow_action actions; /**< Parsed action result. */
309 };
310
311 static const struct rte_flow_ops mlx5_flow_ops = {
312         .validate = mlx5_flow_validate,
313         .create = mlx5_flow_create,
314         .destroy = mlx5_flow_destroy,
315         .flush = mlx5_flow_flush,
316         .query = NULL,
317         .isolate = mlx5_flow_isolate,
318 };
319
320 /**
321  * Manage filter operations.
322  *
323  * @param dev
324  *   Pointer to Ethernet device structure.
325  * @param filter_type
326  *   Filter type.
327  * @param filter_op
328  *   Operation to perform.
329  * @param arg
330  *   Pointer to operation-specific structure.
331  *
332  * @return
333  *   0 on success, negative errno value on failure.
334  */
335 int
336 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
337                      enum rte_filter_type filter_type,
338                      enum rte_filter_op filter_op,
339                      void *arg)
340 {
341         int ret = EINVAL;
342
343         if (filter_type == RTE_ETH_FILTER_GENERIC) {
344                 if (filter_op != RTE_ETH_FILTER_GET)
345                         return -EINVAL;
346                 *(const void **)arg = &mlx5_flow_ops;
347                 return 0;
348         }
349         ERROR("%p: filter type (%d) not supported",
350               (void *)dev, filter_type);
351         return -ret;
352 }
353
354 /**
355  * Check support for a given item.
356  *
357  * @param item[in]
358  *   Item specification.
359  * @param mask[in]
360  *   Bit-masks covering supported fields to compare with spec, last and mask in
361  *   \item.
362  * @param size
363  *   Bit-Mask size in bytes.
364  *
365  * @return
366  *   0 on success.
367  */
368 static int
369 mlx5_flow_item_validate(const struct rte_flow_item *item,
370                         const uint8_t *mask, unsigned int size)
371 {
372         int ret = 0;
373
374         if (!item->spec && (item->mask || item->last))
375                 return -1;
376         if (item->spec && !item->mask) {
377                 unsigned int i;
378                 const uint8_t *spec = item->spec;
379
380                 for (i = 0; i < size; ++i)
381                         if ((spec[i] | mask[i]) != mask[i])
382                                 return -1;
383         }
384         if (item->last && !item->mask) {
385                 unsigned int i;
386                 const uint8_t *spec = item->last;
387
388                 for (i = 0; i < size; ++i)
389                         if ((spec[i] | mask[i]) != mask[i])
390                                 return -1;
391         }
392         if (item->mask) {
393                 unsigned int i;
394                 const uint8_t *spec = item->mask;
395
396                 for (i = 0; i < size; ++i)
397                         if ((spec[i] | mask[i]) != mask[i])
398                                 return -1;
399         }
400         if (item->spec && item->last) {
401                 uint8_t spec[size];
402                 uint8_t last[size];
403                 const uint8_t *apply = mask;
404                 unsigned int i;
405
406                 if (item->mask)
407                         apply = item->mask;
408                 for (i = 0; i < size; ++i) {
409                         spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
410                         last[i] = ((const uint8_t *)item->last)[i] & apply[i];
411                 }
412                 ret = memcmp(spec, last, size);
413         }
414         return ret;
415 }
416
417 /**
418  * Validate a flow supported by the NIC.
419  *
420  * @param priv
421  *   Pointer to private structure.
422  * @param[in] attr
423  *   Flow rule attributes.
424  * @param[in] pattern
425  *   Pattern specification (list terminated by the END pattern item).
426  * @param[in] actions
427  *   Associated actions (list terminated by the END action).
428  * @param[out] error
429  *   Perform verbose error reporting if not NULL.
430  * @param[in, out] flow
431  *   Flow structure to update.
432  *
433  * @return
434  *   0 on success, a negative errno value otherwise and rte_errno is set.
435  */
436 static int
437 priv_flow_validate(struct priv *priv,
438                    const struct rte_flow_attr *attr,
439                    const struct rte_flow_item items[],
440                    const struct rte_flow_action actions[],
441                    struct rte_flow_error *error,
442                    struct mlx5_flow_parse *flow)
443 {
444         const struct mlx5_flow_items *cur_item = mlx5_flow_items;
445
446         (void)priv;
447         if (attr->group) {
448                 rte_flow_error_set(error, ENOTSUP,
449                                    RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
450                                    NULL,
451                                    "groups are not supported");
452                 return -rte_errno;
453         }
454         if (attr->priority) {
455                 rte_flow_error_set(error, ENOTSUP,
456                                    RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
457                                    NULL,
458                                    "priorities are not supported");
459                 return -rte_errno;
460         }
461         if (attr->egress) {
462                 rte_flow_error_set(error, ENOTSUP,
463                                    RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
464                                    NULL,
465                                    "egress is not supported");
466                 return -rte_errno;
467         }
468         if (!attr->ingress) {
469                 rte_flow_error_set(error, ENOTSUP,
470                                    RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
471                                    NULL,
472                                    "only ingress is supported");
473                 return -rte_errno;
474         }
475         for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
476                 const struct mlx5_flow_items *token = NULL;
477                 unsigned int i;
478                 int err;
479
480                 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
481                         continue;
482                 for (i = 0;
483                      cur_item->items &&
484                      cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
485                      ++i) {
486                         if (cur_item->items[i] == items->type) {
487                                 token = &mlx5_flow_items[items->type];
488                                 break;
489                         }
490                 }
491                 if (!token)
492                         goto exit_item_not_supported;
493                 cur_item = token;
494                 err = mlx5_flow_item_validate(items,
495                                               (const uint8_t *)cur_item->mask,
496                                               cur_item->mask_sz);
497                 if (err)
498                         goto exit_item_not_supported;
499                 if (flow->ibv_attr && cur_item->convert) {
500                         err = cur_item->convert(items,
501                                                 (cur_item->default_mask ?
502                                                  cur_item->default_mask :
503                                                  cur_item->mask),
504                                                 flow);
505                         if (err)
506                                 goto exit_item_not_supported;
507                 } else if (items->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
508                         if (flow->inner) {
509                                 rte_flow_error_set(error, ENOTSUP,
510                                                    RTE_FLOW_ERROR_TYPE_ITEM,
511                                                    items,
512                                                    "cannot recognize multiple"
513                                                    " VXLAN encapsulations");
514                                 return -rte_errno;
515                         }
516                         flow->inner = 1;
517                 }
518                 flow->offset += cur_item->dst_sz;
519         }
520         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
521                 if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
522                         continue;
523                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
524                         flow->actions.drop = 1;
525                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
526                         const struct rte_flow_action_queue *queue =
527                                 (const struct rte_flow_action_queue *)
528                                 actions->conf;
529                         uint16_t n;
530                         uint16_t found = 0;
531
532                         if (!queue || (queue->index > (priv->rxqs_n - 1)))
533                                 goto exit_action_not_supported;
534                         for (n = 0; n < flow->actions.queues_n; ++n) {
535                                 if (flow->actions.queues[n] == queue->index) {
536                                         found = 1;
537                                         break;
538                                 }
539                         }
540                         if (flow->actions.queues_n > 1 && !found) {
541                                 rte_flow_error_set(error, ENOTSUP,
542                                            RTE_FLOW_ERROR_TYPE_ACTION,
543                                            actions,
544                                            "queue action not in RSS queues");
545                                 return -rte_errno;
546                         }
547                         if (!found) {
548                                 flow->actions.queue = 1;
549                                 flow->actions.queues_n = 1;
550                                 flow->actions.queues[0] = queue->index;
551                         }
552                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
553                         const struct rte_flow_action_rss *rss =
554                                 (const struct rte_flow_action_rss *)
555                                 actions->conf;
556                         uint16_t n;
557
558                         if (!rss || !rss->num) {
559                                 rte_flow_error_set(error, EINVAL,
560                                                    RTE_FLOW_ERROR_TYPE_ACTION,
561                                                    actions,
562                                                    "no valid queues");
563                                 return -rte_errno;
564                         }
565                         if (flow->actions.queues_n == 1) {
566                                 uint16_t found = 0;
567
568                                 assert(flow->actions.queues_n);
569                                 for (n = 0; n < rss->num; ++n) {
570                                         if (flow->actions.queues[0] ==
571                                             rss->queue[n]) {
572                                                 found = 1;
573                                                 break;
574                                         }
575                                 }
576                                 if (!found) {
577                                         rte_flow_error_set(error, ENOTSUP,
578                                                    RTE_FLOW_ERROR_TYPE_ACTION,
579                                                    actions,
580                                                    "queue action not in RSS"
581                                                    " queues");
582                                         return -rte_errno;
583                                 }
584                         }
585                         for (n = 0; n < rss->num; ++n) {
586                                 if (rss->queue[n] >= priv->rxqs_n) {
587                                         rte_flow_error_set(error, EINVAL,
588                                                    RTE_FLOW_ERROR_TYPE_ACTION,
589                                                    actions,
590                                                    "queue id > number of"
591                                                    " queues");
592                                         return -rte_errno;
593                                 }
594                         }
595                         flow->actions.queue = 1;
596                         for (n = 0; n < rss->num; ++n)
597                                 flow->actions.queues[n] = rss->queue[n];
598                         flow->actions.queues_n = rss->num;
599                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
600                         const struct rte_flow_action_mark *mark =
601                                 (const struct rte_flow_action_mark *)
602                                 actions->conf;
603
604                         if (!mark) {
605                                 rte_flow_error_set(error, EINVAL,
606                                                    RTE_FLOW_ERROR_TYPE_ACTION,
607                                                    actions,
608                                                    "mark must be defined");
609                                 return -rte_errno;
610                         } else if (mark->id >= MLX5_FLOW_MARK_MAX) {
611                                 rte_flow_error_set(error, ENOTSUP,
612                                                    RTE_FLOW_ERROR_TYPE_ACTION,
613                                                    actions,
614                                                    "mark must be between 0"
615                                                    " and 16777199");
616                                 return -rte_errno;
617                         }
618                         flow->actions.mark = 1;
619                         flow->actions.mark_id = mark->id;
620                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
621                         flow->actions.mark = 1;
622                 } else {
623                         goto exit_action_not_supported;
624                 }
625         }
626         if (flow->actions.mark && !flow->ibv_attr && !flow->actions.drop)
627                 flow->offset += sizeof(struct ibv_flow_spec_action_tag);
628         if (!flow->ibv_attr && flow->actions.drop)
629                 flow->offset += sizeof(struct ibv_flow_spec_action_drop);
630         if (!flow->actions.queue && !flow->actions.drop) {
631                 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
632                                    NULL, "no valid action");
633                 return -rte_errno;
634         }
635         return 0;
636 exit_item_not_supported:
637         rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
638                            items, "item not supported");
639         return -rte_errno;
640 exit_action_not_supported:
641         rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
642                            actions, "action not supported");
643         return -rte_errno;
644 }
645
646 /**
647  * Validate a flow supported by the NIC.
648  *
649  * @see rte_flow_validate()
650  * @see rte_flow_ops
651  */
652 int
653 mlx5_flow_validate(struct rte_eth_dev *dev,
654                    const struct rte_flow_attr *attr,
655                    const struct rte_flow_item items[],
656                    const struct rte_flow_action actions[],
657                    struct rte_flow_error *error)
658 {
659         struct priv *priv = dev->data->dev_private;
660         int ret;
661         struct mlx5_flow_parse flow = {
662                 .offset = sizeof(struct ibv_flow_attr),
663                 .actions = {
664                         .mark_id = MLX5_FLOW_MARK_DEFAULT,
665                         .queues_n = 0,
666                 },
667         };
668
669         priv_lock(priv);
670         ret = priv_flow_validate(priv, attr, items, actions, error, &flow);
671         priv_unlock(priv);
672         return ret;
673 }
674
675 /**
676  * Convert Ethernet item to Verbs specification.
677  *
678  * @param item[in]
679  *   Item specification.
680  * @param default_mask[in]
681  *   Default bit-masks to use when item->mask is not provided.
682  * @param data[in, out]
683  *   User structure.
684  */
685 static int
686 mlx5_flow_create_eth(const struct rte_flow_item *item,
687                      const void *default_mask,
688                      void *data)
689 {
690         const struct rte_flow_item_eth *spec = item->spec;
691         const struct rte_flow_item_eth *mask = item->mask;
692         struct mlx5_flow_parse *flow = (struct mlx5_flow_parse *)data;
693         struct ibv_flow_spec_eth *eth;
694         const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
695         unsigned int i;
696
697         ++flow->ibv_attr->num_of_specs;
698         flow->ibv_attr->priority = 2;
699         flow->hash_fields = 0;
700         eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
701         *eth = (struct ibv_flow_spec_eth) {
702                 .type = flow->inner | IBV_FLOW_SPEC_ETH,
703                 .size = eth_size,
704         };
705         if (!spec)
706                 return 0;
707         if (!mask)
708                 mask = default_mask;
709         memcpy(eth->val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
710         memcpy(eth->val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
711         eth->val.ether_type = spec->type;
712         memcpy(eth->mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
713         memcpy(eth->mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
714         eth->mask.ether_type = mask->type;
715         /* Remove unwanted bits from values. */
716         for (i = 0; i < ETHER_ADDR_LEN; ++i) {
717                 eth->val.dst_mac[i] &= eth->mask.dst_mac[i];
718                 eth->val.src_mac[i] &= eth->mask.src_mac[i];
719         }
720         eth->val.ether_type &= eth->mask.ether_type;
721         return 0;
722 }
723
724 /**
725  * Convert VLAN item to Verbs specification.
726  *
727  * @param item[in]
728  *   Item specification.
729  * @param default_mask[in]
730  *   Default bit-masks to use when item->mask is not provided.
731  * @param data[in, out]
732  *   User structure.
733  */
734 static int
735 mlx5_flow_create_vlan(const struct rte_flow_item *item,
736                       const void *default_mask,
737                       void *data)
738 {
739         const struct rte_flow_item_vlan *spec = item->spec;
740         const struct rte_flow_item_vlan *mask = item->mask;
741         struct mlx5_flow_parse *flow = (struct mlx5_flow_parse *)data;
742         struct ibv_flow_spec_eth *eth;
743         const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
744
745         eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset - eth_size);
746         if (!spec)
747                 return 0;
748         if (!mask)
749                 mask = default_mask;
750         eth->val.vlan_tag = spec->tci;
751         eth->mask.vlan_tag = mask->tci;
752         eth->val.vlan_tag &= eth->mask.vlan_tag;
753         return 0;
754 }
755
756 /**
757  * Convert IPv4 item to Verbs specification.
758  *
759  * @param item[in]
760  *   Item specification.
761  * @param default_mask[in]
762  *   Default bit-masks to use when item->mask is not provided.
763  * @param data[in, out]
764  *   User structure.
765  */
766 static int
767 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
768                       const void *default_mask,
769                       void *data)
770 {
771         const struct rte_flow_item_ipv4 *spec = item->spec;
772         const struct rte_flow_item_ipv4 *mask = item->mask;
773         struct mlx5_flow_parse *flow = (struct mlx5_flow_parse *)data;
774         struct ibv_flow_spec_ipv4_ext *ipv4;
775         unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4_ext);
776
777         ++flow->ibv_attr->num_of_specs;
778         flow->ibv_attr->priority = 1;
779         flow->hash_fields = (IBV_RX_HASH_SRC_IPV4 |
780                              IBV_RX_HASH_DST_IPV4);
781         ipv4 = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
782         *ipv4 = (struct ibv_flow_spec_ipv4_ext) {
783                 .type = flow->inner | IBV_FLOW_SPEC_IPV4_EXT,
784                 .size = ipv4_size,
785         };
786         if (!spec)
787                 return 0;
788         if (!mask)
789                 mask = default_mask;
790         ipv4->val = (struct ibv_flow_ipv4_ext_filter){
791                 .src_ip = spec->hdr.src_addr,
792                 .dst_ip = spec->hdr.dst_addr,
793                 .proto = spec->hdr.next_proto_id,
794                 .tos = spec->hdr.type_of_service,
795         };
796         ipv4->mask = (struct ibv_flow_ipv4_ext_filter){
797                 .src_ip = mask->hdr.src_addr,
798                 .dst_ip = mask->hdr.dst_addr,
799                 .proto = mask->hdr.next_proto_id,
800                 .tos = mask->hdr.type_of_service,
801         };
802         /* Remove unwanted bits from values. */
803         ipv4->val.src_ip &= ipv4->mask.src_ip;
804         ipv4->val.dst_ip &= ipv4->mask.dst_ip;
805         ipv4->val.proto &= ipv4->mask.proto;
806         ipv4->val.tos &= ipv4->mask.tos;
807         return 0;
808 }
809
810 /**
811  * Convert IPv6 item to Verbs specification.
812  *
813  * @param item[in]
814  *   Item specification.
815  * @param default_mask[in]
816  *   Default bit-masks to use when item->mask is not provided.
817  * @param data[in, out]
818  *   User structure.
819  */
820 static int
821 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
822                       const void *default_mask,
823                       void *data)
824 {
825         const struct rte_flow_item_ipv6 *spec = item->spec;
826         const struct rte_flow_item_ipv6 *mask = item->mask;
827         struct mlx5_flow_parse *flow = (struct mlx5_flow_parse *)data;
828         struct ibv_flow_spec_ipv6 *ipv6;
829         unsigned int ipv6_size = sizeof(struct ibv_flow_spec_ipv6);
830         unsigned int i;
831
832         ++flow->ibv_attr->num_of_specs;
833         flow->ibv_attr->priority = 1;
834         flow->hash_fields = (IBV_RX_HASH_SRC_IPV6 |
835                              IBV_RX_HASH_DST_IPV6);
836         ipv6 = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
837         *ipv6 = (struct ibv_flow_spec_ipv6) {
838                 .type = flow->inner | IBV_FLOW_SPEC_IPV6,
839                 .size = ipv6_size,
840         };
841         if (!spec)
842                 return 0;
843         if (!mask)
844                 mask = default_mask;
845         memcpy(ipv6->val.src_ip, spec->hdr.src_addr,
846                RTE_DIM(ipv6->val.src_ip));
847         memcpy(ipv6->val.dst_ip, spec->hdr.dst_addr,
848                RTE_DIM(ipv6->val.dst_ip));
849         memcpy(ipv6->mask.src_ip, mask->hdr.src_addr,
850                RTE_DIM(ipv6->mask.src_ip));
851         memcpy(ipv6->mask.dst_ip, mask->hdr.dst_addr,
852                RTE_DIM(ipv6->mask.dst_ip));
853         ipv6->mask.flow_label = mask->hdr.vtc_flow;
854         ipv6->mask.next_hdr = mask->hdr.proto;
855         ipv6->mask.hop_limit = mask->hdr.hop_limits;
856         /* Remove unwanted bits from values. */
857         for (i = 0; i < RTE_DIM(ipv6->val.src_ip); ++i) {
858                 ipv6->val.src_ip[i] &= ipv6->mask.src_ip[i];
859                 ipv6->val.dst_ip[i] &= ipv6->mask.dst_ip[i];
860         }
861         ipv6->val.flow_label &= ipv6->mask.flow_label;
862         ipv6->val.next_hdr &= ipv6->mask.next_hdr;
863         ipv6->val.hop_limit &= ipv6->mask.hop_limit;
864         return 0;
865 }
866
867 /**
868  * Convert UDP item to Verbs specification.
869  *
870  * @param item[in]
871  *   Item specification.
872  * @param default_mask[in]
873  *   Default bit-masks to use when item->mask is not provided.
874  * @param data[in, out]
875  *   User structure.
876  */
877 static int
878 mlx5_flow_create_udp(const struct rte_flow_item *item,
879                      const void *default_mask,
880                      void *data)
881 {
882         const struct rte_flow_item_udp *spec = item->spec;
883         const struct rte_flow_item_udp *mask = item->mask;
884         struct mlx5_flow_parse *flow = (struct mlx5_flow_parse *)data;
885         struct ibv_flow_spec_tcp_udp *udp;
886         unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
887
888         ++flow->ibv_attr->num_of_specs;
889         flow->ibv_attr->priority = 0;
890         flow->hash_fields |= (IBV_RX_HASH_SRC_PORT_UDP |
891                               IBV_RX_HASH_DST_PORT_UDP);
892         udp = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
893         *udp = (struct ibv_flow_spec_tcp_udp) {
894                 .type = flow->inner | IBV_FLOW_SPEC_UDP,
895                 .size = udp_size,
896         };
897         if (!spec)
898                 return 0;
899         if (!mask)
900                 mask = default_mask;
901         udp->val.dst_port = spec->hdr.dst_port;
902         udp->val.src_port = spec->hdr.src_port;
903         udp->mask.dst_port = mask->hdr.dst_port;
904         udp->mask.src_port = mask->hdr.src_port;
905         /* Remove unwanted bits from values. */
906         udp->val.src_port &= udp->mask.src_port;
907         udp->val.dst_port &= udp->mask.dst_port;
908         return 0;
909 }
910
911 /**
912  * Convert TCP item to Verbs specification.
913  *
914  * @param item[in]
915  *   Item specification.
916  * @param default_mask[in]
917  *   Default bit-masks to use when item->mask is not provided.
918  * @param data[in, out]
919  *   User structure.
920  */
921 static int
922 mlx5_flow_create_tcp(const struct rte_flow_item *item,
923                      const void *default_mask,
924                      void *data)
925 {
926         const struct rte_flow_item_tcp *spec = item->spec;
927         const struct rte_flow_item_tcp *mask = item->mask;
928         struct mlx5_flow_parse *flow = (struct mlx5_flow_parse *)data;
929         struct ibv_flow_spec_tcp_udp *tcp;
930         unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
931
932         ++flow->ibv_attr->num_of_specs;
933         flow->ibv_attr->priority = 0;
934         flow->hash_fields |= (IBV_RX_HASH_SRC_PORT_TCP |
935                               IBV_RX_HASH_DST_PORT_TCP);
936         tcp = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
937         *tcp = (struct ibv_flow_spec_tcp_udp) {
938                 .type = flow->inner | IBV_FLOW_SPEC_TCP,
939                 .size = tcp_size,
940         };
941         if (!spec)
942                 return 0;
943         if (!mask)
944                 mask = default_mask;
945         tcp->val.dst_port = spec->hdr.dst_port;
946         tcp->val.src_port = spec->hdr.src_port;
947         tcp->mask.dst_port = mask->hdr.dst_port;
948         tcp->mask.src_port = mask->hdr.src_port;
949         /* Remove unwanted bits from values. */
950         tcp->val.src_port &= tcp->mask.src_port;
951         tcp->val.dst_port &= tcp->mask.dst_port;
952         return 0;
953 }
954
955 /**
956  * Convert VXLAN item to Verbs specification.
957  *
958  * @param item[in]
959  *   Item specification.
960  * @param default_mask[in]
961  *   Default bit-masks to use when item->mask is not provided.
962  * @param data[in, out]
963  *   User structure.
964  */
965 static int
966 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
967                        const void *default_mask,
968                        void *data)
969 {
970         const struct rte_flow_item_vxlan *spec = item->spec;
971         const struct rte_flow_item_vxlan *mask = item->mask;
972         struct mlx5_flow_parse *flow = (struct mlx5_flow_parse *)data;
973         struct ibv_flow_spec_tunnel *vxlan;
974         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
975         union vni {
976                 uint32_t vlan_id;
977                 uint8_t vni[4];
978         } id;
979
980         ++flow->ibv_attr->num_of_specs;
981         flow->ibv_attr->priority = 0;
982         id.vni[0] = 0;
983         vxlan = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
984         *vxlan = (struct ibv_flow_spec_tunnel) {
985                 .type = flow->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
986                 .size = size,
987         };
988         flow->inner = IBV_FLOW_SPEC_INNER;
989         if (!spec)
990                 return 0;
991         if (!mask)
992                 mask = default_mask;
993         memcpy(&id.vni[1], spec->vni, 3);
994         vxlan->val.tunnel_id = id.vlan_id;
995         memcpy(&id.vni[1], mask->vni, 3);
996         vxlan->mask.tunnel_id = id.vlan_id;
997         /* Remove unwanted bits from values. */
998         vxlan->val.tunnel_id &= vxlan->mask.tunnel_id;
999         return 0;
1000 }
1001
1002 /**
1003  * Convert mark/flag action to Verbs specification.
1004  *
1005  * @param flow
1006  *   Pointer to MLX5 flow structure.
1007  * @param mark_id
1008  *   Mark identifier.
1009  */
1010 static int
1011 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *flow, uint32_t mark_id)
1012 {
1013         struct ibv_flow_spec_action_tag *tag;
1014         unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1015
1016         tag = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
1017         *tag = (struct ibv_flow_spec_action_tag){
1018                 .type = IBV_FLOW_SPEC_ACTION_TAG,
1019                 .size = size,
1020                 .tag_id = mlx5_flow_mark_set(mark_id),
1021         };
1022         ++flow->ibv_attr->num_of_specs;
1023         return 0;
1024 }
1025
1026 /**
1027  * Complete flow rule creation with a drop queue.
1028  *
1029  * @param priv
1030  *   Pointer to private structure.
1031  * @param flow
1032  *   MLX5 flow attributes (filled by mlx5_flow_validate()).
1033  * @param[out] error
1034  *   Perform verbose error reporting if not NULL.
1035  *
1036  * @return
1037  *   A flow if the rule could be created.
1038  */
1039 static struct rte_flow *
1040 priv_flow_create_action_queue_drop(struct priv *priv,
1041                                    struct mlx5_flow_parse *flow,
1042                                    struct rte_flow_error *error)
1043 {
1044         struct rte_flow *rte_flow;
1045         struct ibv_flow_spec_action_drop *drop;
1046         unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
1047
1048         assert(priv->pd);
1049         assert(priv->ctx);
1050         rte_flow = rte_calloc(__func__, 1, sizeof(*rte_flow), 0);
1051         if (!rte_flow) {
1052                 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1053                                    NULL, "cannot allocate flow memory");
1054                 return NULL;
1055         }
1056         rte_flow->drop = 1;
1057         drop = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
1058         *drop = (struct ibv_flow_spec_action_drop){
1059                         .type = IBV_FLOW_SPEC_ACTION_DROP,
1060                         .size = size,
1061         };
1062         ++flow->ibv_attr->num_of_specs;
1063         flow->offset += sizeof(struct ibv_flow_spec_action_drop);
1064         rte_flow->ibv_attr = flow->ibv_attr;
1065         if (!priv->dev->data->dev_started)
1066                 return rte_flow;
1067         rte_flow->drxq.hrxq.qp = priv->flow_drop_queue->qp;
1068         rte_flow->ibv_flow = ibv_create_flow(rte_flow->drxq.hrxq.qp,
1069                                              rte_flow->ibv_attr);
1070         if (!rte_flow->ibv_flow) {
1071                 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1072                                    NULL, "flow rule creation failure");
1073                 goto error;
1074         }
1075         return rte_flow;
1076 error:
1077         assert(rte_flow);
1078         rte_free(rte_flow);
1079         return NULL;
1080 }
1081
1082 /**
1083  * Complete flow rule creation.
1084  *
1085  * @param priv
1086  *   Pointer to private structure.
1087  * @param flow
1088  *   MLX5 flow attributes (filled by mlx5_flow_validate()).
1089  * @param[out] error
1090  *   Perform verbose error reporting if not NULL.
1091  *
1092  * @return
1093  *   A flow if the rule could be created.
1094  */
1095 static struct rte_flow *
1096 priv_flow_create_action_queue(struct priv *priv,
1097                               struct mlx5_flow_parse *flow,
1098                               struct rte_flow_error *error)
1099 {
1100         struct rte_flow *rte_flow;
1101         unsigned int i;
1102
1103         assert(priv->pd);
1104         assert(priv->ctx);
1105         assert(!flow->actions.drop);
1106         rte_flow =
1107                 rte_calloc(__func__, 1,
1108                            sizeof(*flow) +
1109                            flow->actions.queues_n * sizeof(uint16_t),
1110                            0);
1111         if (!rte_flow) {
1112                 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1113                                    NULL, "cannot allocate flow memory");
1114                 return NULL;
1115         }
1116         rte_flow->mark = flow->actions.mark;
1117         rte_flow->ibv_attr = flow->ibv_attr;
1118         rte_flow->queues = (uint16_t (*)[])(rte_flow + 1);
1119         memcpy(rte_flow->queues, flow->actions.queues,
1120                flow->actions.queues_n * sizeof(uint16_t));
1121         rte_flow->queues_n = flow->actions.queues_n;
1122         rte_flow->frxq.hash_fields = flow->hash_fields;
1123         rte_flow->frxq.hrxq = mlx5_priv_hrxq_get(priv, rss_hash_default_key,
1124                                                  rss_hash_default_key_len,
1125                                                  flow->hash_fields,
1126                                                  (*rte_flow->queues),
1127                                                  rte_flow->queues_n);
1128         if (rte_flow->frxq.hrxq) {
1129                 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1130                                    NULL, "duplicated flow");
1131                 goto error;
1132         }
1133         rte_flow->frxq.hrxq = mlx5_priv_hrxq_new(priv, rss_hash_default_key,
1134                                                  rss_hash_default_key_len,
1135                                                  flow->hash_fields,
1136                                                  (*rte_flow->queues),
1137                                                  rte_flow->queues_n);
1138         if (!rte_flow->frxq.hrxq) {
1139                 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1140                                    NULL, "cannot create hash rxq");
1141                 goto error;
1142         }
1143         for (i = 0; i != flow->actions.queues_n; ++i) {
1144                 struct mlx5_rxq_data *q =
1145                         (*priv->rxqs)[flow->actions.queues[i]];
1146
1147                 q->mark |= flow->actions.mark;
1148         }
1149         if (!priv->dev->data->dev_started)
1150                 return rte_flow;
1151         rte_flow->ibv_flow = ibv_create_flow(rte_flow->frxq.hrxq->qp,
1152                                              rte_flow->ibv_attr);
1153         if (!rte_flow->ibv_flow) {
1154                 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1155                                    NULL, "flow rule creation failure");
1156                 goto error;
1157         }
1158         return rte_flow;
1159 error:
1160         assert(rte_flow);
1161         if (rte_flow->frxq.hrxq)
1162                 mlx5_priv_hrxq_release(priv, rte_flow->frxq.hrxq);
1163         rte_free(rte_flow);
1164         return NULL;
1165 }
1166
1167 /**
1168  * Convert a flow.
1169  *
1170  * @param priv
1171  *   Pointer to private structure.
1172  * @param[in] attr
1173  *   Flow rule attributes.
1174  * @param[in] pattern
1175  *   Pattern specification (list terminated by the END pattern item).
1176  * @param[in] actions
1177  *   Associated actions (list terminated by the END action).
1178  * @param[out] error
1179  *   Perform verbose error reporting if not NULL.
1180  *
1181  * @return
1182  *   A flow on success, NULL otherwise.
1183  */
1184 static struct rte_flow *
1185 priv_flow_create(struct priv *priv,
1186                  const struct rte_flow_attr *attr,
1187                  const struct rte_flow_item items[],
1188                  const struct rte_flow_action actions[],
1189                  struct rte_flow_error *error)
1190 {
1191         struct rte_flow *rte_flow;
1192         struct mlx5_flow_parse flow = {
1193                 .offset = sizeof(struct ibv_flow_attr),
1194                 .actions = {
1195                         .mark_id = MLX5_FLOW_MARK_DEFAULT,
1196                         .queues = { 0 },
1197                         .queues_n = 0,
1198                 },
1199         };
1200         int err;
1201
1202         err = priv_flow_validate(priv, attr, items, actions, error, &flow);
1203         if (err)
1204                 goto exit;
1205         flow.ibv_attr = rte_malloc(__func__, flow.offset, 0);
1206         flow.offset = sizeof(struct ibv_flow_attr);
1207         if (!flow.ibv_attr) {
1208                 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1209                                    NULL, "cannot allocate ibv_attr memory");
1210                 goto exit;
1211         }
1212         *flow.ibv_attr = (struct ibv_flow_attr){
1213                 .type = IBV_FLOW_ATTR_NORMAL,
1214                 .size = sizeof(struct ibv_flow_attr),
1215                 .priority = attr->priority,
1216                 .num_of_specs = 0,
1217                 .port = 0,
1218                 .flags = 0,
1219         };
1220         flow.inner = 0;
1221         flow.hash_fields = 0;
1222         claim_zero(priv_flow_validate(priv, attr, items, actions,
1223                                       error, &flow));
1224         if (flow.actions.mark && !flow.actions.drop) {
1225                 mlx5_flow_create_flag_mark(&flow, flow.actions.mark_id);
1226                 flow.offset += sizeof(struct ibv_flow_spec_action_tag);
1227         }
1228         if (flow.actions.drop)
1229                 rte_flow =
1230                         priv_flow_create_action_queue_drop(priv, &flow, error);
1231         else
1232                 rte_flow = priv_flow_create_action_queue(priv, &flow, error);
1233         if (!rte_flow)
1234                 goto exit;
1235         return rte_flow;
1236 exit:
1237         rte_free(flow.ibv_attr);
1238         return NULL;
1239 }
1240
1241 /**
1242  * Create a flow.
1243  *
1244  * @see rte_flow_create()
1245  * @see rte_flow_ops
1246  */
1247 struct rte_flow *
1248 mlx5_flow_create(struct rte_eth_dev *dev,
1249                  const struct rte_flow_attr *attr,
1250                  const struct rte_flow_item items[],
1251                  const struct rte_flow_action actions[],
1252                  struct rte_flow_error *error)
1253 {
1254         struct priv *priv = dev->data->dev_private;
1255         struct rte_flow *flow;
1256
1257         priv_lock(priv);
1258         flow = priv_flow_create(priv, attr, items, actions, error);
1259         if (flow) {
1260                 TAILQ_INSERT_TAIL(&priv->flows, flow, next);
1261                 DEBUG("Flow created %p", (void *)flow);
1262         }
1263         priv_unlock(priv);
1264         return flow;
1265 }
1266
1267 /**
1268  * Destroy a flow.
1269  *
1270  * @param priv
1271  *   Pointer to private structure.
1272  * @param[in] flow
1273  *   Flow to destroy.
1274  */
1275 static void
1276 priv_flow_destroy(struct priv *priv,
1277                   struct rte_flow *flow)
1278 {
1279         unsigned int i;
1280         uint16_t *queues;
1281         uint16_t queues_n;
1282
1283         if (flow->drop || !flow->mark)
1284                 goto free;
1285         queues = flow->frxq.hrxq->ind_table->queues;
1286         queues_n = flow->frxq.hrxq->ind_table->queues_n;
1287         for (i = 0; i != queues_n; ++i) {
1288                 struct rte_flow *tmp;
1289                 struct mlx5_rxq_data *rxq_data = (*priv->rxqs)[queues[i]];
1290                 int mark = 0;
1291
1292                 /*
1293                  * To remove the mark from the queue, the queue must not be
1294                  * present in any other marked flow (RSS or not).
1295                  */
1296                 TAILQ_FOREACH(tmp, &priv->flows, next) {
1297                         unsigned int j;
1298
1299                         if (!tmp->mark)
1300                                 continue;
1301                         for (j = 0;
1302                              (j != tmp->frxq.hrxq->ind_table->queues_n) &&
1303                              !mark;
1304                              j++)
1305                                 if (tmp->frxq.hrxq->ind_table->queues[j] ==
1306                                     queues[i])
1307                                         mark = 1;
1308                 }
1309                 rxq_data->mark = mark;
1310         }
1311 free:
1312         if (flow->ibv_flow)
1313                 claim_zero(ibv_destroy_flow(flow->ibv_flow));
1314         if (!flow->drop)
1315                 mlx5_priv_hrxq_release(priv, flow->frxq.hrxq);
1316         TAILQ_REMOVE(&priv->flows, flow, next);
1317         rte_free(flow->ibv_attr);
1318         DEBUG("Flow destroyed %p", (void *)flow);
1319         rte_free(flow);
1320 }
1321
1322 /**
1323  * Destroy a flow.
1324  *
1325  * @see rte_flow_destroy()
1326  * @see rte_flow_ops
1327  */
1328 int
1329 mlx5_flow_destroy(struct rte_eth_dev *dev,
1330                   struct rte_flow *flow,
1331                   struct rte_flow_error *error)
1332 {
1333         struct priv *priv = dev->data->dev_private;
1334
1335         (void)error;
1336         priv_lock(priv);
1337         priv_flow_destroy(priv, flow);
1338         priv_unlock(priv);
1339         return 0;
1340 }
1341
1342 /**
1343  * Destroy all flows.
1344  *
1345  * @param priv
1346  *   Pointer to private structure.
1347  */
1348 static void
1349 priv_flow_flush(struct priv *priv)
1350 {
1351         while (!TAILQ_EMPTY(&priv->flows)) {
1352                 struct rte_flow *flow;
1353
1354                 flow = TAILQ_FIRST(&priv->flows);
1355                 priv_flow_destroy(priv, flow);
1356         }
1357 }
1358
1359 /**
1360  * Destroy all flows.
1361  *
1362  * @see rte_flow_flush()
1363  * @see rte_flow_ops
1364  */
1365 int
1366 mlx5_flow_flush(struct rte_eth_dev *dev,
1367                 struct rte_flow_error *error)
1368 {
1369         struct priv *priv = dev->data->dev_private;
1370
1371         (void)error;
1372         priv_lock(priv);
1373         priv_flow_flush(priv);
1374         priv_unlock(priv);
1375         return 0;
1376 }
1377
1378 /**
1379  * Create drop queue.
1380  *
1381  * @param priv
1382  *   Pointer to private structure.
1383  *
1384  * @return
1385  *   0 on success.
1386  */
1387 static int
1388 priv_flow_create_drop_queue(struct priv *priv)
1389 {
1390         struct mlx5_hrxq_drop *fdq = NULL;
1391
1392         assert(priv->pd);
1393         assert(priv->ctx);
1394         fdq = rte_calloc(__func__, 1, sizeof(*fdq), 0);
1395         if (!fdq) {
1396                 WARN("cannot allocate memory for drop queue");
1397                 goto error;
1398         }
1399         fdq->cq = ibv_create_cq(priv->ctx, 1, NULL, NULL, 0);
1400         if (!fdq->cq) {
1401                 WARN("cannot allocate CQ for drop queue");
1402                 goto error;
1403         }
1404         fdq->wq = ibv_create_wq(priv->ctx,
1405                         &(struct ibv_wq_init_attr){
1406                         .wq_type = IBV_WQT_RQ,
1407                         .max_wr = 1,
1408                         .max_sge = 1,
1409                         .pd = priv->pd,
1410                         .cq = fdq->cq,
1411                         });
1412         if (!fdq->wq) {
1413                 WARN("cannot allocate WQ for drop queue");
1414                 goto error;
1415         }
1416         fdq->ind_table = ibv_create_rwq_ind_table(priv->ctx,
1417                         &(struct ibv_rwq_ind_table_init_attr){
1418                         .log_ind_tbl_size = 0,
1419                         .ind_tbl = &fdq->wq,
1420                         .comp_mask = 0,
1421                         });
1422         if (!fdq->ind_table) {
1423                 WARN("cannot allocate indirection table for drop queue");
1424                 goto error;
1425         }
1426         fdq->qp = ibv_create_qp_ex(priv->ctx,
1427                 &(struct ibv_qp_init_attr_ex){
1428                         .qp_type = IBV_QPT_RAW_PACKET,
1429                         .comp_mask =
1430                                 IBV_QP_INIT_ATTR_PD |
1431                                 IBV_QP_INIT_ATTR_IND_TABLE |
1432                                 IBV_QP_INIT_ATTR_RX_HASH,
1433                         .rx_hash_conf = (struct ibv_rx_hash_conf){
1434                                 .rx_hash_function =
1435                                         IBV_RX_HASH_FUNC_TOEPLITZ,
1436                                 .rx_hash_key_len = rss_hash_default_key_len,
1437                                 .rx_hash_key = rss_hash_default_key,
1438                                 .rx_hash_fields_mask = 0,
1439                                 },
1440                         .rwq_ind_tbl = fdq->ind_table,
1441                         .pd = priv->pd
1442                 });
1443         if (!fdq->qp) {
1444                 WARN("cannot allocate QP for drop queue");
1445                 goto error;
1446         }
1447         priv->flow_drop_queue = fdq;
1448         return 0;
1449 error:
1450         if (fdq->qp)
1451                 claim_zero(ibv_destroy_qp(fdq->qp));
1452         if (fdq->ind_table)
1453                 claim_zero(ibv_destroy_rwq_ind_table(fdq->ind_table));
1454         if (fdq->wq)
1455                 claim_zero(ibv_destroy_wq(fdq->wq));
1456         if (fdq->cq)
1457                 claim_zero(ibv_destroy_cq(fdq->cq));
1458         if (fdq)
1459                 rte_free(fdq);
1460         priv->flow_drop_queue = NULL;
1461         return -1;
1462 }
1463
1464 /**
1465  * Delete drop queue.
1466  *
1467  * @param priv
1468  *   Pointer to private structure.
1469  */
1470 static void
1471 priv_flow_delete_drop_queue(struct priv *priv)
1472 {
1473         struct mlx5_hrxq_drop *fdq = priv->flow_drop_queue;
1474
1475         if (!fdq)
1476                 return;
1477         if (fdq->qp)
1478                 claim_zero(ibv_destroy_qp(fdq->qp));
1479         if (fdq->ind_table)
1480                 claim_zero(ibv_destroy_rwq_ind_table(fdq->ind_table));
1481         if (fdq->wq)
1482                 claim_zero(ibv_destroy_wq(fdq->wq));
1483         if (fdq->cq)
1484                 claim_zero(ibv_destroy_cq(fdq->cq));
1485         rte_free(fdq);
1486         priv->flow_drop_queue = NULL;
1487 }
1488
1489 /**
1490  * Remove all flows.
1491  *
1492  * Called by dev_stop() to remove all flows.
1493  *
1494  * @param priv
1495  *   Pointer to private structure.
1496  */
1497 void
1498 priv_flow_stop(struct priv *priv)
1499 {
1500         struct rte_flow *flow;
1501
1502         TAILQ_FOREACH_REVERSE(flow, &priv->flows, mlx5_flows, next) {
1503                 claim_zero(ibv_destroy_flow(flow->ibv_flow));
1504                 flow->ibv_flow = NULL;
1505                 mlx5_priv_hrxq_release(priv, flow->frxq.hrxq);
1506                 flow->frxq.hrxq = NULL;
1507                 if (flow->mark) {
1508                         unsigned int n;
1509                         struct mlx5_ind_table_ibv *ind_tbl =
1510                                 flow->frxq.hrxq->ind_table;
1511
1512                         for (n = 0; n < ind_tbl->queues_n; ++n)
1513                                 (*priv->rxqs)[ind_tbl->queues[n]]->mark = 0;
1514                 }
1515                 DEBUG("Flow %p removed", (void *)flow);
1516         }
1517         priv_flow_delete_drop_queue(priv);
1518 }
1519
1520 /**
1521  * Add all flows.
1522  *
1523  * @param priv
1524  *   Pointer to private structure.
1525  *
1526  * @return
1527  *   0 on success, a errno value otherwise and rte_errno is set.
1528  */
1529 int
1530 priv_flow_start(struct priv *priv)
1531 {
1532         int ret;
1533         struct rte_flow *flow;
1534
1535         ret = priv_flow_create_drop_queue(priv);
1536         if (ret)
1537                 return -1;
1538         TAILQ_FOREACH(flow, &priv->flows, next) {
1539                 if (flow->frxq.hrxq)
1540                         goto flow_create;
1541                 flow->frxq.hrxq =
1542                         mlx5_priv_hrxq_get(priv, rss_hash_default_key,
1543                                            rss_hash_default_key_len,
1544                                            flow->frxq.hash_fields,
1545                                            (*flow->queues),
1546                                            flow->queues_n);
1547                 if (flow->frxq.hrxq)
1548                         goto flow_create;
1549                 flow->frxq.hrxq =
1550                         mlx5_priv_hrxq_new(priv, rss_hash_default_key,
1551                                            rss_hash_default_key_len,
1552                                            flow->frxq.hash_fields,
1553                                            (*flow->queues),
1554                                            flow->queues_n);
1555                 if (!flow->frxq.hrxq) {
1556                         DEBUG("Flow %p cannot be applied",
1557                               (void *)flow);
1558                         rte_errno = EINVAL;
1559                         return rte_errno;
1560                 }
1561 flow_create:
1562                 flow->ibv_flow = ibv_create_flow(flow->frxq.hrxq->qp,
1563                                                  flow->ibv_attr);
1564                 if (!flow->ibv_flow) {
1565                         DEBUG("Flow %p cannot be applied", (void *)flow);
1566                         rte_errno = EINVAL;
1567                         return rte_errno;
1568                 }
1569                 DEBUG("Flow %p applied", (void *)flow);
1570                 if (flow->mark) {
1571                         unsigned int n;
1572
1573                         for (n = 0;
1574                              n < flow->frxq.hrxq->ind_table->queues_n;
1575                              ++n) {
1576                                 uint16_t idx =
1577                                         flow->frxq.hrxq->ind_table->queues[n];
1578                                 (*priv->rxqs)[idx]->mark = 1;
1579                         }
1580                 }
1581         }
1582         return 0;
1583 }
1584
1585 /**
1586  * Isolated mode.
1587  *
1588  * @see rte_flow_isolate()
1589  * @see rte_flow_ops
1590  */
1591 int
1592 mlx5_flow_isolate(struct rte_eth_dev *dev,
1593                   int enable,
1594                   struct rte_flow_error *error)
1595 {
1596         struct priv *priv = dev->data->dev_private;
1597
1598         priv_lock(priv);
1599         if (dev->data->dev_started) {
1600                 rte_flow_error_set(error, EBUSY,
1601                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1602                                    NULL,
1603                                    "port must be stopped first");
1604                 priv_unlock(priv);
1605                 return -rte_errno;
1606         }
1607         priv->isolated = !!enable;
1608         priv_unlock(priv);
1609         return 0;
1610 }
1611
1612 /**
1613  * Verify the flow list is empty
1614  *
1615  * @param priv
1616  *  Pointer to private structure.
1617  *
1618  * @return the number of flows not released.
1619  */
1620 int
1621 priv_flow_verify(struct priv *priv)
1622 {
1623         struct rte_flow *flow;
1624         int ret = 0;
1625
1626         TAILQ_FOREACH(flow, &priv->flows, next) {
1627                 DEBUG("%p: flow %p still referenced", (void *)priv,
1628                       (void *)flow);
1629                 ++ret;
1630         }
1631         return ret;
1632 }