1784e64fa9d95ed00837baf282ff8fc9678ba552
[dpdk.git] / drivers / net / mlx5 / mlx5_flow.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright 2016 6WIND S.A.
5  *   Copyright 2016 Mellanox.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of 6WIND S.A. nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <sys/queue.h>
35 #include <string.h>
36
37 /* Verbs header. */
38 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
39 #ifdef PEDANTIC
40 #pragma GCC diagnostic ignored "-Wpedantic"
41 #endif
42 #include <infiniband/verbs.h>
43 #ifdef PEDANTIC
44 #pragma GCC diagnostic error "-Wpedantic"
45 #endif
46
47 #include <rte_ethdev.h>
48 #include <rte_flow.h>
49 #include <rte_flow_driver.h>
50 #include <rte_malloc.h>
51
52 #include "mlx5.h"
53 #include "mlx5_prm.h"
54
55 /* Number of Work Queue necessary for the DROP queue. */
56 #define MLX5_DROP_WQ_N 4
57
58 static int
59 mlx5_flow_create_eth(const struct rte_flow_item *item,
60                      const void *default_mask,
61                      void *data);
62
63 static int
64 mlx5_flow_create_vlan(const struct rte_flow_item *item,
65                       const void *default_mask,
66                       void *data);
67
68 static int
69 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
70                       const void *default_mask,
71                       void *data);
72
73 static int
74 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
75                       const void *default_mask,
76                       void *data);
77
78 static int
79 mlx5_flow_create_udp(const struct rte_flow_item *item,
80                      const void *default_mask,
81                      void *data);
82
83 static int
84 mlx5_flow_create_tcp(const struct rte_flow_item *item,
85                      const void *default_mask,
86                      void *data);
87
88 static int
89 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
90                        const void *default_mask,
91                        void *data);
92
93 struct rte_flow {
94         LIST_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
95         struct ibv_exp_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
96         struct ibv_exp_rwq_ind_table *ind_table; /**< Indirection table. */
97         struct ibv_qp *qp; /**< Verbs queue pair. */
98         struct ibv_exp_flow *ibv_flow; /**< Verbs flow. */
99         struct ibv_exp_wq *wq; /**< Verbs work queue. */
100         struct ibv_cq *cq; /**< Verbs completion queue. */
101         uint16_t rxqs_n; /**< Number of queues in this flow, 0 if drop queue. */
102         uint32_t mark:1; /**< Set if the flow is marked. */
103         uint32_t drop:1; /**< Drop queue. */
104         uint64_t hash_fields; /**< Fields that participate in the hash. */
105         struct rxq *rxqs[]; /**< Pointer to the queues array. */
106 };
107
108 /** Static initializer for items. */
109 #define ITEMS(...) \
110         (const enum rte_flow_item_type []){ \
111                 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
112         }
113
114 /** Structure to generate a simple graph of layers supported by the NIC. */
115 struct mlx5_flow_items {
116         /** List of possible actions for these items. */
117         const enum rte_flow_action_type *const actions;
118         /** Bit-masks corresponding to the possibilities for the item. */
119         const void *mask;
120         /**
121          * Default bit-masks to use when item->mask is not provided. When
122          * \default_mask is also NULL, the full supported bit-mask (\mask) is
123          * used instead.
124          */
125         const void *default_mask;
126         /** Bit-masks size in bytes. */
127         const unsigned int mask_sz;
128         /**
129          * Conversion function from rte_flow to NIC specific flow.
130          *
131          * @param item
132          *   rte_flow item to convert.
133          * @param default_mask
134          *   Default bit-masks to use when item->mask is not provided.
135          * @param data
136          *   Internal structure to store the conversion.
137          *
138          * @return
139          *   0 on success, negative value otherwise.
140          */
141         int (*convert)(const struct rte_flow_item *item,
142                        const void *default_mask,
143                        void *data);
144         /** Size in bytes of the destination structure. */
145         const unsigned int dst_sz;
146         /** List of possible following items.  */
147         const enum rte_flow_item_type *const items;
148 };
149
150 /** Valid action for this PMD. */
151 static const enum rte_flow_action_type valid_actions[] = {
152         RTE_FLOW_ACTION_TYPE_DROP,
153         RTE_FLOW_ACTION_TYPE_QUEUE,
154         RTE_FLOW_ACTION_TYPE_MARK,
155         RTE_FLOW_ACTION_TYPE_FLAG,
156         RTE_FLOW_ACTION_TYPE_END,
157 };
158
159 /** Graph of supported items and associated actions. */
160 static const struct mlx5_flow_items mlx5_flow_items[] = {
161         [RTE_FLOW_ITEM_TYPE_END] = {
162                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
163                                RTE_FLOW_ITEM_TYPE_VXLAN),
164         },
165         [RTE_FLOW_ITEM_TYPE_ETH] = {
166                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
167                                RTE_FLOW_ITEM_TYPE_IPV4,
168                                RTE_FLOW_ITEM_TYPE_IPV6),
169                 .actions = valid_actions,
170                 .mask = &(const struct rte_flow_item_eth){
171                         .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
172                         .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
173                         .type = -1,
174                 },
175                 .default_mask = &rte_flow_item_eth_mask,
176                 .mask_sz = sizeof(struct rte_flow_item_eth),
177                 .convert = mlx5_flow_create_eth,
178                 .dst_sz = sizeof(struct ibv_exp_flow_spec_eth),
179         },
180         [RTE_FLOW_ITEM_TYPE_VLAN] = {
181                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
182                                RTE_FLOW_ITEM_TYPE_IPV6),
183                 .actions = valid_actions,
184                 .mask = &(const struct rte_flow_item_vlan){
185                         .tci = -1,
186                 },
187                 .default_mask = &rte_flow_item_vlan_mask,
188                 .mask_sz = sizeof(struct rte_flow_item_vlan),
189                 .convert = mlx5_flow_create_vlan,
190                 .dst_sz = 0,
191         },
192         [RTE_FLOW_ITEM_TYPE_IPV4] = {
193                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
194                                RTE_FLOW_ITEM_TYPE_TCP),
195                 .actions = valid_actions,
196                 .mask = &(const struct rte_flow_item_ipv4){
197                         .hdr = {
198                                 .src_addr = -1,
199                                 .dst_addr = -1,
200                                 .type_of_service = -1,
201                                 .next_proto_id = -1,
202                         },
203                 },
204                 .default_mask = &rte_flow_item_ipv4_mask,
205                 .mask_sz = sizeof(struct rte_flow_item_ipv4),
206                 .convert = mlx5_flow_create_ipv4,
207                 .dst_sz = sizeof(struct ibv_exp_flow_spec_ipv4_ext),
208         },
209         [RTE_FLOW_ITEM_TYPE_IPV6] = {
210                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
211                                RTE_FLOW_ITEM_TYPE_TCP),
212                 .actions = valid_actions,
213                 .mask = &(const struct rte_flow_item_ipv6){
214                         .hdr = {
215                                 .src_addr = {
216                                         0xff, 0xff, 0xff, 0xff,
217                                         0xff, 0xff, 0xff, 0xff,
218                                         0xff, 0xff, 0xff, 0xff,
219                                         0xff, 0xff, 0xff, 0xff,
220                                 },
221                                 .dst_addr = {
222                                         0xff, 0xff, 0xff, 0xff,
223                                         0xff, 0xff, 0xff, 0xff,
224                                         0xff, 0xff, 0xff, 0xff,
225                                         0xff, 0xff, 0xff, 0xff,
226                                 },
227                                 .vtc_flow = -1,
228                                 .proto = -1,
229                                 .hop_limits = -1,
230                         },
231                 },
232                 .default_mask = &rte_flow_item_ipv6_mask,
233                 .mask_sz = sizeof(struct rte_flow_item_ipv6),
234                 .convert = mlx5_flow_create_ipv6,
235                 .dst_sz = sizeof(struct ibv_exp_flow_spec_ipv6_ext),
236         },
237         [RTE_FLOW_ITEM_TYPE_UDP] = {
238                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN),
239                 .actions = valid_actions,
240                 .mask = &(const struct rte_flow_item_udp){
241                         .hdr = {
242                                 .src_port = -1,
243                                 .dst_port = -1,
244                         },
245                 },
246                 .default_mask = &rte_flow_item_udp_mask,
247                 .mask_sz = sizeof(struct rte_flow_item_udp),
248                 .convert = mlx5_flow_create_udp,
249                 .dst_sz = sizeof(struct ibv_exp_flow_spec_tcp_udp),
250         },
251         [RTE_FLOW_ITEM_TYPE_TCP] = {
252                 .actions = valid_actions,
253                 .mask = &(const struct rte_flow_item_tcp){
254                         .hdr = {
255                                 .src_port = -1,
256                                 .dst_port = -1,
257                         },
258                 },
259                 .default_mask = &rte_flow_item_tcp_mask,
260                 .mask_sz = sizeof(struct rte_flow_item_tcp),
261                 .convert = mlx5_flow_create_tcp,
262                 .dst_sz = sizeof(struct ibv_exp_flow_spec_tcp_udp),
263         },
264         [RTE_FLOW_ITEM_TYPE_VXLAN] = {
265                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
266                 .actions = valid_actions,
267                 .mask = &(const struct rte_flow_item_vxlan){
268                         .vni = "\xff\xff\xff",
269                 },
270                 .default_mask = &rte_flow_item_vxlan_mask,
271                 .mask_sz = sizeof(struct rte_flow_item_vxlan),
272                 .convert = mlx5_flow_create_vxlan,
273                 .dst_sz = sizeof(struct ibv_exp_flow_spec_tunnel),
274         },
275 };
276
277 /** Structure to pass to the conversion function. */
278 struct mlx5_flow {
279         struct ibv_exp_flow_attr *ibv_attr; /**< Verbs attribute. */
280         unsigned int offset; /**< Offset in bytes in the ibv_attr buffer. */
281         uint32_t inner; /**< Set once VXLAN is encountered. */
282         uint64_t hash_fields; /**< Fields that participate in the hash. */
283 };
284
285 /** Structure for Drop queue. */
286 struct rte_flow_drop {
287         struct ibv_exp_rwq_ind_table *ind_table; /**< Indirection table. */
288         struct ibv_qp *qp; /**< Verbs queue pair. */
289         struct ibv_exp_wq *wqs[MLX5_DROP_WQ_N]; /**< Verbs work queue. */
290         struct ibv_cq *cq; /**< Verbs completion queue. */
291 };
292
293 struct mlx5_flow_action {
294         uint32_t queue:1; /**< Target is a receive queue. */
295         uint32_t drop:1; /**< Target is a drop queue. */
296         uint32_t mark:1; /**< Mark is present in the flow. */
297         uint32_t mark_id; /**< Mark identifier. */
298         uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
299         uint16_t queues_n; /**< Number of entries in queue[]. */
300 };
301
302 /**
303  * Check support for a given item.
304  *
305  * @param item[in]
306  *   Item specification.
307  * @param mask[in]
308  *   Bit-masks covering supported fields to compare with spec, last and mask in
309  *   \item.
310  * @param size
311  *   Bit-Mask size in bytes.
312  *
313  * @return
314  *   0 on success.
315  */
316 static int
317 mlx5_flow_item_validate(const struct rte_flow_item *item,
318                         const uint8_t *mask, unsigned int size)
319 {
320         int ret = 0;
321
322         if (!item->spec && (item->mask || item->last))
323                 return -1;
324         if (item->spec && !item->mask) {
325                 unsigned int i;
326                 const uint8_t *spec = item->spec;
327
328                 for (i = 0; i < size; ++i)
329                         if ((spec[i] | mask[i]) != mask[i])
330                                 return -1;
331         }
332         if (item->last && !item->mask) {
333                 unsigned int i;
334                 const uint8_t *spec = item->last;
335
336                 for (i = 0; i < size; ++i)
337                         if ((spec[i] | mask[i]) != mask[i])
338                                 return -1;
339         }
340         if (item->mask) {
341                 unsigned int i;
342                 const uint8_t *spec = item->mask;
343
344                 for (i = 0; i < size; ++i)
345                         if ((spec[i] | mask[i]) != mask[i])
346                                 return -1;
347         }
348         if (item->spec && item->last) {
349                 uint8_t spec[size];
350                 uint8_t last[size];
351                 const uint8_t *apply = mask;
352                 unsigned int i;
353
354                 if (item->mask)
355                         apply = item->mask;
356                 for (i = 0; i < size; ++i) {
357                         spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
358                         last[i] = ((const uint8_t *)item->last)[i] & apply[i];
359                 }
360                 ret = memcmp(spec, last, size);
361         }
362         return ret;
363 }
364
365 /**
366  * Validate a flow supported by the NIC.
367  *
368  * @param priv
369  *   Pointer to private structure.
370  * @param[in] attr
371  *   Flow rule attributes.
372  * @param[in] pattern
373  *   Pattern specification (list terminated by the END pattern item).
374  * @param[in] actions
375  *   Associated actions (list terminated by the END action).
376  * @param[out] error
377  *   Perform verbose error reporting if not NULL.
378  * @param[in, out] flow
379  *   Flow structure to update.
380  * @param[in, out] action
381  *   Action structure to update.
382  *
383  * @return
384  *   0 on success, a negative errno value otherwise and rte_errno is set.
385  */
386 static int
387 priv_flow_validate(struct priv *priv,
388                    const struct rte_flow_attr *attr,
389                    const struct rte_flow_item items[],
390                    const struct rte_flow_action actions[],
391                    struct rte_flow_error *error,
392                    struct mlx5_flow *flow,
393                    struct mlx5_flow_action *action)
394 {
395         const struct mlx5_flow_items *cur_item = mlx5_flow_items;
396
397         (void)priv;
398         if (attr->group) {
399                 rte_flow_error_set(error, ENOTSUP,
400                                    RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
401                                    NULL,
402                                    "groups are not supported");
403                 return -rte_errno;
404         }
405         if (attr->priority) {
406                 rte_flow_error_set(error, ENOTSUP,
407                                    RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
408                                    NULL,
409                                    "priorities are not supported");
410                 return -rte_errno;
411         }
412         if (attr->egress) {
413                 rte_flow_error_set(error, ENOTSUP,
414                                    RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
415                                    NULL,
416                                    "egress is not supported");
417                 return -rte_errno;
418         }
419         if (!attr->ingress) {
420                 rte_flow_error_set(error, ENOTSUP,
421                                    RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
422                                    NULL,
423                                    "only ingress is supported");
424                 return -rte_errno;
425         }
426         for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
427                 const struct mlx5_flow_items *token = NULL;
428                 unsigned int i;
429                 int err;
430
431                 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
432                         continue;
433                 for (i = 0;
434                      cur_item->items &&
435                      cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
436                      ++i) {
437                         if (cur_item->items[i] == items->type) {
438                                 token = &mlx5_flow_items[items->type];
439                                 break;
440                         }
441                 }
442                 if (!token)
443                         goto exit_item_not_supported;
444                 cur_item = token;
445                 err = mlx5_flow_item_validate(items,
446                                               (const uint8_t *)cur_item->mask,
447                                               cur_item->mask_sz);
448                 if (err)
449                         goto exit_item_not_supported;
450                 if (flow->ibv_attr && cur_item->convert) {
451                         err = cur_item->convert(items,
452                                                 (cur_item->default_mask ?
453                                                  cur_item->default_mask :
454                                                  cur_item->mask),
455                                                 flow);
456                         if (err)
457                                 goto exit_item_not_supported;
458                 } else if (items->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
459                         if (flow->inner) {
460                                 rte_flow_error_set(error, ENOTSUP,
461                                                    RTE_FLOW_ERROR_TYPE_ITEM,
462                                                    items,
463                                                    "cannot recognize multiple"
464                                                    " VXLAN encapsulations");
465                                 return -rte_errno;
466                         }
467                         flow->inner = 1;
468                 }
469                 flow->offset += cur_item->dst_sz;
470         }
471         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
472                 if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
473                         continue;
474                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
475                         action->drop = 1;
476                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
477                         const struct rte_flow_action_queue *queue =
478                                 (const struct rte_flow_action_queue *)
479                                 actions->conf;
480                         uint16_t n;
481                         uint16_t found = 0;
482
483                         if (!queue || (queue->index > (priv->rxqs_n - 1)))
484                                 goto exit_action_not_supported;
485                         for (n = 0; n < action->queues_n; ++n) {
486                                 if (action->queues[n] == queue->index) {
487                                         found = 1;
488                                         break;
489                                 }
490                         }
491                         if (action->queues_n > 1 && !found) {
492                                 rte_flow_error_set(error, ENOTSUP,
493                                            RTE_FLOW_ERROR_TYPE_ACTION,
494                                            actions,
495                                            "queue action not in RSS queues");
496                                 return -rte_errno;
497                         }
498                         if (!found) {
499                                 action->queue = 1;
500                                 action->queues_n = 1;
501                                 action->queues[0] = queue->index;
502                         }
503                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
504                         const struct rte_flow_action_rss *rss =
505                                 (const struct rte_flow_action_rss *)
506                                 actions->conf;
507                         uint16_t n;
508
509                         if (!rss || !rss->num) {
510                                 rte_flow_error_set(error, EINVAL,
511                                                    RTE_FLOW_ERROR_TYPE_ACTION,
512                                                    actions,
513                                                    "no valid queues");
514                                 return -rte_errno;
515                         }
516                         if (action->queues_n == 1) {
517                                 uint16_t found = 0;
518
519                                 assert(action->queues_n);
520                                 for (n = 0; n < rss->num; ++n) {
521                                         if (action->queues[0] ==
522                                             rss->queue[n]) {
523                                                 found = 1;
524                                                 break;
525                                         }
526                                 }
527                                 if (!found) {
528                                         rte_flow_error_set(error, ENOTSUP,
529                                                    RTE_FLOW_ERROR_TYPE_ACTION,
530                                                    actions,
531                                                    "queue action not in RSS"
532                                                    " queues");
533                                         return -rte_errno;
534                                 }
535                         }
536                         for (n = 0; n < rss->num; ++n) {
537                                 if (rss->queue[n] >= priv->rxqs_n) {
538                                         rte_flow_error_set(error, EINVAL,
539                                                    RTE_FLOW_ERROR_TYPE_ACTION,
540                                                    actions,
541                                                    "queue id > number of"
542                                                    " queues");
543                                         return -rte_errno;
544                                 }
545                         }
546                         action->queue = 1;
547                         for (n = 0; n < rss->num; ++n)
548                                 action->queues[n] = rss->queue[n];
549                         action->queues_n = rss->num;
550                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
551                         const struct rte_flow_action_mark *mark =
552                                 (const struct rte_flow_action_mark *)
553                                 actions->conf;
554
555                         if (!mark) {
556                                 rte_flow_error_set(error, EINVAL,
557                                                    RTE_FLOW_ERROR_TYPE_ACTION,
558                                                    actions,
559                                                    "mark must be defined");
560                                 return -rte_errno;
561                         } else if (mark->id >= MLX5_FLOW_MARK_MAX) {
562                                 rte_flow_error_set(error, ENOTSUP,
563                                                    RTE_FLOW_ERROR_TYPE_ACTION,
564                                                    actions,
565                                                    "mark must be between 0"
566                                                    " and 16777199");
567                                 return -rte_errno;
568                         }
569                         action->mark = 1;
570                         action->mark_id = mark->id;
571                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
572                         action->mark = 1;
573                 } else {
574                         goto exit_action_not_supported;
575                 }
576         }
577         if (action->mark && !flow->ibv_attr && !action->drop)
578                 flow->offset += sizeof(struct ibv_exp_flow_spec_action_tag);
579         if (!action->queue && !action->drop) {
580                 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
581                                    NULL, "no valid action");
582                 return -rte_errno;
583         }
584         return 0;
585 exit_item_not_supported:
586         rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
587                            items, "item not supported");
588         return -rte_errno;
589 exit_action_not_supported:
590         rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
591                            actions, "action not supported");
592         return -rte_errno;
593 }
594
595 /**
596  * Validate a flow supported by the NIC.
597  *
598  * @see rte_flow_validate()
599  * @see rte_flow_ops
600  */
601 int
602 mlx5_flow_validate(struct rte_eth_dev *dev,
603                    const struct rte_flow_attr *attr,
604                    const struct rte_flow_item items[],
605                    const struct rte_flow_action actions[],
606                    struct rte_flow_error *error)
607 {
608         struct priv *priv = dev->data->dev_private;
609         int ret;
610         struct mlx5_flow flow = { .offset = sizeof(struct ibv_exp_flow_attr) };
611         struct mlx5_flow_action action = {
612                 .queue = 0,
613                 .drop = 0,
614                 .mark = 0,
615                 .mark_id = MLX5_FLOW_MARK_DEFAULT,
616                 .queues_n = 0,
617         };
618
619         priv_lock(priv);
620         ret = priv_flow_validate(priv, attr, items, actions, error, &flow,
621                                  &action);
622         priv_unlock(priv);
623         return ret;
624 }
625
626 /**
627  * Convert Ethernet item to Verbs specification.
628  *
629  * @param item[in]
630  *   Item specification.
631  * @param default_mask[in]
632  *   Default bit-masks to use when item->mask is not provided.
633  * @param data[in, out]
634  *   User structure.
635  */
636 static int
637 mlx5_flow_create_eth(const struct rte_flow_item *item,
638                      const void *default_mask,
639                      void *data)
640 {
641         const struct rte_flow_item_eth *spec = item->spec;
642         const struct rte_flow_item_eth *mask = item->mask;
643         struct mlx5_flow *flow = (struct mlx5_flow *)data;
644         struct ibv_exp_flow_spec_eth *eth;
645         const unsigned int eth_size = sizeof(struct ibv_exp_flow_spec_eth);
646         unsigned int i;
647
648         ++flow->ibv_attr->num_of_specs;
649         flow->ibv_attr->priority = 2;
650         flow->hash_fields = 0;
651         eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
652         *eth = (struct ibv_exp_flow_spec_eth) {
653                 .type = flow->inner | IBV_EXP_FLOW_SPEC_ETH,
654                 .size = eth_size,
655         };
656         if (!spec)
657                 return 0;
658         if (!mask)
659                 mask = default_mask;
660         memcpy(eth->val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
661         memcpy(eth->val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
662         eth->val.ether_type = spec->type;
663         memcpy(eth->mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
664         memcpy(eth->mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
665         eth->mask.ether_type = mask->type;
666         /* Remove unwanted bits from values. */
667         for (i = 0; i < ETHER_ADDR_LEN; ++i) {
668                 eth->val.dst_mac[i] &= eth->mask.dst_mac[i];
669                 eth->val.src_mac[i] &= eth->mask.src_mac[i];
670         }
671         eth->val.ether_type &= eth->mask.ether_type;
672         return 0;
673 }
674
675 /**
676  * Convert VLAN item to Verbs specification.
677  *
678  * @param item[in]
679  *   Item specification.
680  * @param default_mask[in]
681  *   Default bit-masks to use when item->mask is not provided.
682  * @param data[in, out]
683  *   User structure.
684  */
685 static int
686 mlx5_flow_create_vlan(const struct rte_flow_item *item,
687                       const void *default_mask,
688                       void *data)
689 {
690         const struct rte_flow_item_vlan *spec = item->spec;
691         const struct rte_flow_item_vlan *mask = item->mask;
692         struct mlx5_flow *flow = (struct mlx5_flow *)data;
693         struct ibv_exp_flow_spec_eth *eth;
694         const unsigned int eth_size = sizeof(struct ibv_exp_flow_spec_eth);
695
696         eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset - eth_size);
697         if (!spec)
698                 return 0;
699         if (!mask)
700                 mask = default_mask;
701         eth->val.vlan_tag = spec->tci;
702         eth->mask.vlan_tag = mask->tci;
703         eth->val.vlan_tag &= eth->mask.vlan_tag;
704         return 0;
705 }
706
707 /**
708  * Convert IPv4 item to Verbs specification.
709  *
710  * @param item[in]
711  *   Item specification.
712  * @param default_mask[in]
713  *   Default bit-masks to use when item->mask is not provided.
714  * @param data[in, out]
715  *   User structure.
716  */
717 static int
718 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
719                       const void *default_mask,
720                       void *data)
721 {
722         const struct rte_flow_item_ipv4 *spec = item->spec;
723         const struct rte_flow_item_ipv4 *mask = item->mask;
724         struct mlx5_flow *flow = (struct mlx5_flow *)data;
725         struct ibv_exp_flow_spec_ipv4_ext *ipv4;
726         unsigned int ipv4_size = sizeof(struct ibv_exp_flow_spec_ipv4_ext);
727
728         ++flow->ibv_attr->num_of_specs;
729         flow->ibv_attr->priority = 1;
730         flow->hash_fields = (IBV_EXP_RX_HASH_SRC_IPV4 |
731                              IBV_EXP_RX_HASH_DST_IPV4);
732         ipv4 = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
733         *ipv4 = (struct ibv_exp_flow_spec_ipv4_ext) {
734                 .type = flow->inner | IBV_EXP_FLOW_SPEC_IPV4_EXT,
735                 .size = ipv4_size,
736         };
737         if (!spec)
738                 return 0;
739         if (!mask)
740                 mask = default_mask;
741         ipv4->val = (struct ibv_exp_flow_ipv4_ext_filter){
742                 .src_ip = spec->hdr.src_addr,
743                 .dst_ip = spec->hdr.dst_addr,
744                 .proto = spec->hdr.next_proto_id,
745                 .tos = spec->hdr.type_of_service,
746         };
747         ipv4->mask = (struct ibv_exp_flow_ipv4_ext_filter){
748                 .src_ip = mask->hdr.src_addr,
749                 .dst_ip = mask->hdr.dst_addr,
750                 .proto = mask->hdr.next_proto_id,
751                 .tos = mask->hdr.type_of_service,
752         };
753         /* Remove unwanted bits from values. */
754         ipv4->val.src_ip &= ipv4->mask.src_ip;
755         ipv4->val.dst_ip &= ipv4->mask.dst_ip;
756         ipv4->val.proto &= ipv4->mask.proto;
757         ipv4->val.tos &= ipv4->mask.tos;
758         return 0;
759 }
760
761 /**
762  * Convert IPv6 item to Verbs specification.
763  *
764  * @param item[in]
765  *   Item specification.
766  * @param default_mask[in]
767  *   Default bit-masks to use when item->mask is not provided.
768  * @param data[in, out]
769  *   User structure.
770  */
771 static int
772 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
773                       const void *default_mask,
774                       void *data)
775 {
776         const struct rte_flow_item_ipv6 *spec = item->spec;
777         const struct rte_flow_item_ipv6 *mask = item->mask;
778         struct mlx5_flow *flow = (struct mlx5_flow *)data;
779         struct ibv_exp_flow_spec_ipv6_ext *ipv6;
780         unsigned int ipv6_size = sizeof(struct ibv_exp_flow_spec_ipv6_ext);
781
782         ++flow->ibv_attr->num_of_specs;
783         flow->ibv_attr->priority = 1;
784         flow->hash_fields = (IBV_EXP_RX_HASH_SRC_IPV6 |
785                              IBV_EXP_RX_HASH_DST_IPV6);
786         ipv6 = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
787         *ipv6 = (struct ibv_exp_flow_spec_ipv6_ext) {
788                 .type = flow->inner | IBV_EXP_FLOW_SPEC_IPV6_EXT,
789                 .size = ipv6_size,
790         };
791         if (!spec)
792                 return 0;
793         if (!mask)
794                 mask = default_mask;
795         memcpy(ipv6->val.src_ip, spec->hdr.src_addr,
796                RTE_DIM(ipv6->val.src_ip));
797         memcpy(ipv6->val.dst_ip, spec->hdr.dst_addr,
798                RTE_DIM(ipv6->val.dst_ip));
799         memcpy(ipv6->mask.src_ip, mask->hdr.src_addr,
800                RTE_DIM(ipv6->mask.src_ip));
801         memcpy(ipv6->mask.dst_ip, mask->hdr.dst_addr,
802                RTE_DIM(ipv6->mask.dst_ip));
803         ipv6->mask.flow_label = mask->hdr.vtc_flow;
804         ipv6->mask.next_hdr = mask->hdr.proto;
805         ipv6->mask.hop_limit = mask->hdr.hop_limits;
806         ipv6->val.flow_label &= ipv6->mask.flow_label;
807         ipv6->val.next_hdr &= ipv6->mask.next_hdr;
808         ipv6->val.hop_limit &= ipv6->mask.hop_limit;
809         return 0;
810 }
811
812 /**
813  * Convert UDP item to Verbs specification.
814  *
815  * @param item[in]
816  *   Item specification.
817  * @param default_mask[in]
818  *   Default bit-masks to use when item->mask is not provided.
819  * @param data[in, out]
820  *   User structure.
821  */
822 static int
823 mlx5_flow_create_udp(const struct rte_flow_item *item,
824                      const void *default_mask,
825                      void *data)
826 {
827         const struct rte_flow_item_udp *spec = item->spec;
828         const struct rte_flow_item_udp *mask = item->mask;
829         struct mlx5_flow *flow = (struct mlx5_flow *)data;
830         struct ibv_exp_flow_spec_tcp_udp *udp;
831         unsigned int udp_size = sizeof(struct ibv_exp_flow_spec_tcp_udp);
832
833         ++flow->ibv_attr->num_of_specs;
834         flow->ibv_attr->priority = 0;
835         flow->hash_fields |= (IBV_EXP_RX_HASH_SRC_PORT_UDP |
836                               IBV_EXP_RX_HASH_DST_PORT_UDP);
837         udp = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
838         *udp = (struct ibv_exp_flow_spec_tcp_udp) {
839                 .type = flow->inner | IBV_EXP_FLOW_SPEC_UDP,
840                 .size = udp_size,
841         };
842         if (!spec)
843                 return 0;
844         if (!mask)
845                 mask = default_mask;
846         udp->val.dst_port = spec->hdr.dst_port;
847         udp->val.src_port = spec->hdr.src_port;
848         udp->mask.dst_port = mask->hdr.dst_port;
849         udp->mask.src_port = mask->hdr.src_port;
850         /* Remove unwanted bits from values. */
851         udp->val.src_port &= udp->mask.src_port;
852         udp->val.dst_port &= udp->mask.dst_port;
853         return 0;
854 }
855
856 /**
857  * Convert TCP item to Verbs specification.
858  *
859  * @param item[in]
860  *   Item specification.
861  * @param default_mask[in]
862  *   Default bit-masks to use when item->mask is not provided.
863  * @param data[in, out]
864  *   User structure.
865  */
866 static int
867 mlx5_flow_create_tcp(const struct rte_flow_item *item,
868                      const void *default_mask,
869                      void *data)
870 {
871         const struct rte_flow_item_tcp *spec = item->spec;
872         const struct rte_flow_item_tcp *mask = item->mask;
873         struct mlx5_flow *flow = (struct mlx5_flow *)data;
874         struct ibv_exp_flow_spec_tcp_udp *tcp;
875         unsigned int tcp_size = sizeof(struct ibv_exp_flow_spec_tcp_udp);
876
877         ++flow->ibv_attr->num_of_specs;
878         flow->ibv_attr->priority = 0;
879         flow->hash_fields |= (IBV_EXP_RX_HASH_SRC_PORT_TCP |
880                               IBV_EXP_RX_HASH_DST_PORT_TCP);
881         tcp = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
882         *tcp = (struct ibv_exp_flow_spec_tcp_udp) {
883                 .type = flow->inner | IBV_EXP_FLOW_SPEC_TCP,
884                 .size = tcp_size,
885         };
886         if (!spec)
887                 return 0;
888         if (!mask)
889                 mask = default_mask;
890         tcp->val.dst_port = spec->hdr.dst_port;
891         tcp->val.src_port = spec->hdr.src_port;
892         tcp->mask.dst_port = mask->hdr.dst_port;
893         tcp->mask.src_port = mask->hdr.src_port;
894         /* Remove unwanted bits from values. */
895         tcp->val.src_port &= tcp->mask.src_port;
896         tcp->val.dst_port &= tcp->mask.dst_port;
897         return 0;
898 }
899
900 /**
901  * Convert VXLAN item to Verbs specification.
902  *
903  * @param item[in]
904  *   Item specification.
905  * @param default_mask[in]
906  *   Default bit-masks to use when item->mask is not provided.
907  * @param data[in, out]
908  *   User structure.
909  */
910 static int
911 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
912                        const void *default_mask,
913                        void *data)
914 {
915         const struct rte_flow_item_vxlan *spec = item->spec;
916         const struct rte_flow_item_vxlan *mask = item->mask;
917         struct mlx5_flow *flow = (struct mlx5_flow *)data;
918         struct ibv_exp_flow_spec_tunnel *vxlan;
919         unsigned int size = sizeof(struct ibv_exp_flow_spec_tunnel);
920         union vni {
921                 uint32_t vlan_id;
922                 uint8_t vni[4];
923         } id;
924
925         ++flow->ibv_attr->num_of_specs;
926         flow->ibv_attr->priority = 0;
927         id.vni[0] = 0;
928         vxlan = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
929         *vxlan = (struct ibv_exp_flow_spec_tunnel) {
930                 .type = flow->inner | IBV_EXP_FLOW_SPEC_VXLAN_TUNNEL,
931                 .size = size,
932         };
933         flow->inner = IBV_EXP_FLOW_SPEC_INNER;
934         if (!spec)
935                 return 0;
936         if (!mask)
937                 mask = default_mask;
938         memcpy(&id.vni[1], spec->vni, 3);
939         vxlan->val.tunnel_id = id.vlan_id;
940         memcpy(&id.vni[1], mask->vni, 3);
941         vxlan->mask.tunnel_id = id.vlan_id;
942         /* Remove unwanted bits from values. */
943         vxlan->val.tunnel_id &= vxlan->mask.tunnel_id;
944         return 0;
945 }
946
947 /**
948  * Convert mark/flag action to Verbs specification.
949  *
950  * @param flow
951  *   Pointer to MLX5 flow structure.
952  * @param mark_id
953  *   Mark identifier.
954  */
955 static int
956 mlx5_flow_create_flag_mark(struct mlx5_flow *flow, uint32_t mark_id)
957 {
958         struct ibv_exp_flow_spec_action_tag *tag;
959         unsigned int size = sizeof(struct ibv_exp_flow_spec_action_tag);
960
961         tag = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
962         *tag = (struct ibv_exp_flow_spec_action_tag){
963                 .type = IBV_EXP_FLOW_SPEC_ACTION_TAG,
964                 .size = size,
965                 .tag_id = mlx5_flow_mark_set(mark_id),
966         };
967         ++flow->ibv_attr->num_of_specs;
968         return 0;
969 }
970
971 /**
972  * Complete flow rule creation with a drop queue.
973  *
974  * @param priv
975  *   Pointer to private structure.
976  * @param flow
977  *   MLX5 flow attributes (filled by mlx5_flow_validate()).
978  * @param[out] error
979  *   Perform verbose error reporting if not NULL.
980  *
981  * @return
982  *   A flow if the rule could be created.
983  */
984 static struct rte_flow *
985 priv_flow_create_action_queue_drop(struct priv *priv,
986                                    struct mlx5_flow *flow,
987                                    struct rte_flow_error *error)
988 {
989         struct rte_flow *rte_flow;
990
991         assert(priv->pd);
992         assert(priv->ctx);
993         rte_flow = rte_calloc(__func__, 1, sizeof(*rte_flow), 0);
994         if (!rte_flow) {
995                 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
996                                    NULL, "cannot allocate flow memory");
997                 return NULL;
998         }
999         rte_flow->drop = 1;
1000         rte_flow->ibv_attr = flow->ibv_attr;
1001         rte_flow->qp = priv->flow_drop_queue->qp;
1002         if (!priv->started)
1003                 return rte_flow;
1004         rte_flow->ibv_flow = ibv_exp_create_flow(rte_flow->qp,
1005                                                  rte_flow->ibv_attr);
1006         if (!rte_flow->ibv_flow) {
1007                 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1008                                    NULL, "flow rule creation failure");
1009                 goto error;
1010         }
1011         return rte_flow;
1012 error:
1013         assert(rte_flow);
1014         rte_free(rte_flow);
1015         return NULL;
1016 }
1017
1018 /**
1019  * Complete flow rule creation.
1020  *
1021  * @param priv
1022  *   Pointer to private structure.
1023  * @param flow
1024  *   MLX5 flow attributes (filled by mlx5_flow_validate()).
1025  * @param action
1026  *   Target action structure.
1027  * @param[out] error
1028  *   Perform verbose error reporting if not NULL.
1029  *
1030  * @return
1031  *   A flow if the rule could be created.
1032  */
1033 static struct rte_flow *
1034 priv_flow_create_action_queue(struct priv *priv,
1035                               struct mlx5_flow *flow,
1036                               struct mlx5_flow_action *action,
1037                               struct rte_flow_error *error)
1038 {
1039         struct rte_flow *rte_flow;
1040         unsigned int i;
1041         unsigned int j;
1042         const unsigned int wqs_n = 1 << log2above(action->queues_n);
1043         struct ibv_exp_wq *wqs[wqs_n];
1044
1045         assert(priv->pd);
1046         assert(priv->ctx);
1047         assert(!action->drop);
1048         rte_flow = rte_calloc(__func__, 1, sizeof(*rte_flow) +
1049                               sizeof(*rte_flow->rxqs) * action->queues_n, 0);
1050         if (!rte_flow) {
1051                 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1052                                    NULL, "cannot allocate flow memory");
1053                 return NULL;
1054         }
1055         for (i = 0; i < action->queues_n; ++i) {
1056                 struct rxq_ctrl *rxq;
1057
1058                 rxq = container_of((*priv->rxqs)[action->queues[i]],
1059                                    struct rxq_ctrl, rxq);
1060                 wqs[i] = rxq->wq;
1061                 rte_flow->rxqs[i] = &rxq->rxq;
1062                 ++rte_flow->rxqs_n;
1063                 rxq->rxq.mark |= action->mark;
1064         }
1065         /* finalise indirection table. */
1066         for (j = 0; i < wqs_n; ++i, ++j) {
1067                 wqs[i] = wqs[j];
1068                 if (j == action->queues_n)
1069                         j = 0;
1070         }
1071         rte_flow->mark = action->mark;
1072         rte_flow->ibv_attr = flow->ibv_attr;
1073         rte_flow->hash_fields = flow->hash_fields;
1074         rte_flow->ind_table = ibv_exp_create_rwq_ind_table(
1075                 priv->ctx,
1076                 &(struct ibv_exp_rwq_ind_table_init_attr){
1077                         .pd = priv->pd,
1078                         .log_ind_tbl_size = log2above(action->queues_n),
1079                         .ind_tbl = wqs,
1080                         .comp_mask = 0,
1081                 });
1082         if (!rte_flow->ind_table) {
1083                 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1084                                    NULL, "cannot allocate indirection table");
1085                 goto error;
1086         }
1087         rte_flow->qp = ibv_exp_create_qp(
1088                 priv->ctx,
1089                 &(struct ibv_exp_qp_init_attr){
1090                         .qp_type = IBV_QPT_RAW_PACKET,
1091                         .comp_mask =
1092                                 IBV_EXP_QP_INIT_ATTR_PD |
1093                                 IBV_EXP_QP_INIT_ATTR_PORT |
1094                                 IBV_EXP_QP_INIT_ATTR_RX_HASH,
1095                         .pd = priv->pd,
1096                         .rx_hash_conf = &(struct ibv_exp_rx_hash_conf){
1097                                 .rx_hash_function =
1098                                         IBV_EXP_RX_HASH_FUNC_TOEPLITZ,
1099                                 .rx_hash_key_len = rss_hash_default_key_len,
1100                                 .rx_hash_key = rss_hash_default_key,
1101                                 .rx_hash_fields_mask = rte_flow->hash_fields,
1102                                 .rwq_ind_tbl = rte_flow->ind_table,
1103                         },
1104                         .port_num = priv->port,
1105                 });
1106         if (!rte_flow->qp) {
1107                 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1108                                    NULL, "cannot allocate QP");
1109                 goto error;
1110         }
1111         if (!priv->started)
1112                 return rte_flow;
1113         rte_flow->ibv_flow = ibv_exp_create_flow(rte_flow->qp,
1114                                                  rte_flow->ibv_attr);
1115         if (!rte_flow->ibv_flow) {
1116                 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1117                                    NULL, "flow rule creation failure");
1118                 goto error;
1119         }
1120         return rte_flow;
1121 error:
1122         assert(rte_flow);
1123         if (rte_flow->qp)
1124                 ibv_destroy_qp(rte_flow->qp);
1125         if (rte_flow->ind_table)
1126                 ibv_exp_destroy_rwq_ind_table(rte_flow->ind_table);
1127         rte_free(rte_flow);
1128         return NULL;
1129 }
1130
1131 /**
1132  * Convert a flow.
1133  *
1134  * @param priv
1135  *   Pointer to private structure.
1136  * @param[in] attr
1137  *   Flow rule attributes.
1138  * @param[in] pattern
1139  *   Pattern specification (list terminated by the END pattern item).
1140  * @param[in] actions
1141  *   Associated actions (list terminated by the END action).
1142  * @param[out] error
1143  *   Perform verbose error reporting if not NULL.
1144  *
1145  * @return
1146  *   A flow on success, NULL otherwise.
1147  */
1148 static struct rte_flow *
1149 priv_flow_create(struct priv *priv,
1150                  const struct rte_flow_attr *attr,
1151                  const struct rte_flow_item items[],
1152                  const struct rte_flow_action actions[],
1153                  struct rte_flow_error *error)
1154 {
1155         struct rte_flow *rte_flow;
1156         struct mlx5_flow flow = { .offset = sizeof(struct ibv_exp_flow_attr), };
1157         struct mlx5_flow_action action = {
1158                 .queue = 0,
1159                 .drop = 0,
1160                 .mark = 0,
1161                 .mark_id = MLX5_FLOW_MARK_DEFAULT,
1162                 .queues_n = 0,
1163         };
1164         int err;
1165
1166         err = priv_flow_validate(priv, attr, items, actions, error, &flow,
1167                                  &action);
1168         if (err)
1169                 goto exit;
1170         flow.ibv_attr = rte_malloc(__func__, flow.offset, 0);
1171         flow.offset = sizeof(struct ibv_exp_flow_attr);
1172         if (!flow.ibv_attr) {
1173                 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1174                                    NULL, "cannot allocate ibv_attr memory");
1175                 goto exit;
1176         }
1177         *flow.ibv_attr = (struct ibv_exp_flow_attr){
1178                 .type = IBV_EXP_FLOW_ATTR_NORMAL,
1179                 .size = sizeof(struct ibv_exp_flow_attr),
1180                 .priority = attr->priority,
1181                 .num_of_specs = 0,
1182                 .port = 0,
1183                 .flags = 0,
1184                 .reserved = 0,
1185         };
1186         flow.inner = 0;
1187         flow.hash_fields = 0;
1188         claim_zero(priv_flow_validate(priv, attr, items, actions,
1189                                       error, &flow, &action));
1190         if (action.mark && !action.drop) {
1191                 mlx5_flow_create_flag_mark(&flow, action.mark_id);
1192                 flow.offset += sizeof(struct ibv_exp_flow_spec_action_tag);
1193         }
1194         if (action.drop)
1195                 rte_flow =
1196                         priv_flow_create_action_queue_drop(priv, &flow, error);
1197         else
1198                 rte_flow = priv_flow_create_action_queue(priv, &flow, &action,
1199                                                          error);
1200         if (!rte_flow)
1201                 goto exit;
1202         return rte_flow;
1203 exit:
1204         rte_free(flow.ibv_attr);
1205         return NULL;
1206 }
1207
1208 /**
1209  * Create a flow.
1210  *
1211  * @see rte_flow_create()
1212  * @see rte_flow_ops
1213  */
1214 struct rte_flow *
1215 mlx5_flow_create(struct rte_eth_dev *dev,
1216                  const struct rte_flow_attr *attr,
1217                  const struct rte_flow_item items[],
1218                  const struct rte_flow_action actions[],
1219                  struct rte_flow_error *error)
1220 {
1221         struct priv *priv = dev->data->dev_private;
1222         struct rte_flow *flow;
1223
1224         priv_lock(priv);
1225         flow = priv_flow_create(priv, attr, items, actions, error);
1226         if (flow) {
1227                 LIST_INSERT_HEAD(&priv->flows, flow, next);
1228                 DEBUG("Flow created %p", (void *)flow);
1229         }
1230         priv_unlock(priv);
1231         return flow;
1232 }
1233
1234 /**
1235  * Destroy a flow.
1236  *
1237  * @param priv
1238  *   Pointer to private structure.
1239  * @param[in] flow
1240  *   Flow to destroy.
1241  */
1242 static void
1243 priv_flow_destroy(struct priv *priv,
1244                   struct rte_flow *flow)
1245 {
1246         (void)priv;
1247         LIST_REMOVE(flow, next);
1248         if (flow->ibv_flow)
1249                 claim_zero(ibv_exp_destroy_flow(flow->ibv_flow));
1250         if (flow->drop)
1251                 goto free;
1252         if (flow->qp)
1253                 claim_zero(ibv_destroy_qp(flow->qp));
1254         if (flow->ind_table)
1255                 claim_zero(ibv_exp_destroy_rwq_ind_table(flow->ind_table));
1256         if (flow->drop && flow->wq)
1257                 claim_zero(ibv_exp_destroy_wq(flow->wq));
1258         if (flow->drop && flow->cq)
1259                 claim_zero(ibv_destroy_cq(flow->cq));
1260         if (flow->mark) {
1261                 struct rte_flow *tmp;
1262                 struct rxq *rxq;
1263                 uint32_t mark_n = 0;
1264                 uint32_t queue_n;
1265
1266                 /*
1267                  * To remove the mark from the queue, the queue must not be
1268                  * present in any other marked flow (RSS or not).
1269                  */
1270                 for (queue_n = 0; queue_n < flow->rxqs_n; ++queue_n) {
1271                         rxq = flow->rxqs[queue_n];
1272                         for (tmp = LIST_FIRST(&priv->flows);
1273                              tmp;
1274                              tmp = LIST_NEXT(tmp, next)) {
1275                                 uint32_t tqueue_n;
1276
1277                                 if (tmp->drop)
1278                                         continue;
1279                                 for (tqueue_n = 0;
1280                                      tqueue_n < tmp->rxqs_n;
1281                                      ++tqueue_n) {
1282                                         struct rxq *trxq;
1283
1284                                         trxq = tmp->rxqs[tqueue_n];
1285                                         if (rxq == trxq)
1286                                                 ++mark_n;
1287                                 }
1288                         }
1289                         rxq->mark = !!mark_n;
1290                 }
1291         }
1292 free:
1293         rte_free(flow->ibv_attr);
1294         DEBUG("Flow destroyed %p", (void *)flow);
1295         rte_free(flow);
1296 }
1297
1298 /**
1299  * Destroy a flow.
1300  *
1301  * @see rte_flow_destroy()
1302  * @see rte_flow_ops
1303  */
1304 int
1305 mlx5_flow_destroy(struct rte_eth_dev *dev,
1306                   struct rte_flow *flow,
1307                   struct rte_flow_error *error)
1308 {
1309         struct priv *priv = dev->data->dev_private;
1310
1311         (void)error;
1312         priv_lock(priv);
1313         priv_flow_destroy(priv, flow);
1314         priv_unlock(priv);
1315         return 0;
1316 }
1317
1318 /**
1319  * Destroy all flows.
1320  *
1321  * @param priv
1322  *   Pointer to private structure.
1323  */
1324 static void
1325 priv_flow_flush(struct priv *priv)
1326 {
1327         while (!LIST_EMPTY(&priv->flows)) {
1328                 struct rte_flow *flow;
1329
1330                 flow = LIST_FIRST(&priv->flows);
1331                 priv_flow_destroy(priv, flow);
1332         }
1333 }
1334
1335 /**
1336  * Destroy all flows.
1337  *
1338  * @see rte_flow_flush()
1339  * @see rte_flow_ops
1340  */
1341 int
1342 mlx5_flow_flush(struct rte_eth_dev *dev,
1343                 struct rte_flow_error *error)
1344 {
1345         struct priv *priv = dev->data->dev_private;
1346
1347         (void)error;
1348         priv_lock(priv);
1349         priv_flow_flush(priv);
1350         priv_unlock(priv);
1351         return 0;
1352 }
1353
1354 /**
1355  * Create drop queue.
1356  *
1357  * @param priv
1358  *   Pointer to private structure.
1359  *
1360  * @return
1361  *   0 on success.
1362  */
1363 static int
1364 priv_flow_create_drop_queue(struct priv *priv)
1365 {
1366         struct rte_flow_drop *fdq = NULL;
1367         unsigned int i;
1368
1369         assert(priv->pd);
1370         assert(priv->ctx);
1371         fdq = rte_calloc(__func__, 1, sizeof(*fdq), 0);
1372         if (!fdq) {
1373                 WARN("cannot allocate memory for drop queue");
1374                 goto error;
1375         }
1376         fdq->cq = ibv_exp_create_cq(priv->ctx, 1, NULL, NULL, 0,
1377                         &(struct ibv_exp_cq_init_attr){
1378                         .comp_mask = 0,
1379                         });
1380         if (!fdq->cq) {
1381                 WARN("cannot allocate CQ for drop queue");
1382                 goto error;
1383         }
1384         for (i = 0; i != MLX5_DROP_WQ_N; ++i) {
1385                 fdq->wqs[i] = ibv_exp_create_wq(priv->ctx,
1386                                 &(struct ibv_exp_wq_init_attr){
1387                                 .wq_type = IBV_EXP_WQT_RQ,
1388                                 .max_recv_wr = 1,
1389                                 .max_recv_sge = 1,
1390                                 .pd = priv->pd,
1391                                 .cq = fdq->cq,
1392                                 });
1393                 if (!fdq->wqs[i]) {
1394                         WARN("cannot allocate WQ for drop queue");
1395                         goto error;
1396                 }
1397         }
1398         fdq->ind_table = ibv_exp_create_rwq_ind_table(priv->ctx,
1399                         &(struct ibv_exp_rwq_ind_table_init_attr){
1400                         .pd = priv->pd,
1401                         .log_ind_tbl_size = 0,
1402                         .ind_tbl = fdq->wqs,
1403                         .comp_mask = 0,
1404                         });
1405         if (!fdq->ind_table) {
1406                 WARN("cannot allocate indirection table for drop queue");
1407                 goto error;
1408         }
1409         fdq->qp = ibv_exp_create_qp(priv->ctx,
1410                 &(struct ibv_exp_qp_init_attr){
1411                         .qp_type = IBV_QPT_RAW_PACKET,
1412                         .comp_mask =
1413                                 IBV_EXP_QP_INIT_ATTR_PD |
1414                                 IBV_EXP_QP_INIT_ATTR_PORT |
1415                                 IBV_EXP_QP_INIT_ATTR_RX_HASH,
1416                         .pd = priv->pd,
1417                         .rx_hash_conf = &(struct ibv_exp_rx_hash_conf){
1418                                 .rx_hash_function =
1419                                         IBV_EXP_RX_HASH_FUNC_TOEPLITZ,
1420                                 .rx_hash_key_len = rss_hash_default_key_len,
1421                                 .rx_hash_key = rss_hash_default_key,
1422                                 .rx_hash_fields_mask = 0,
1423                                 .rwq_ind_tbl = fdq->ind_table,
1424                                 },
1425                         .port_num = priv->port,
1426                         });
1427         if (!fdq->qp) {
1428                 WARN("cannot allocate QP for drop queue");
1429                 goto error;
1430         }
1431         priv->flow_drop_queue = fdq;
1432         return 0;
1433 error:
1434         if (fdq->qp)
1435                 claim_zero(ibv_destroy_qp(fdq->qp));
1436         if (fdq->ind_table)
1437                 claim_zero(ibv_exp_destroy_rwq_ind_table(fdq->ind_table));
1438         for (i = 0; i != MLX5_DROP_WQ_N; ++i) {
1439                 if (fdq->wqs[i])
1440                         claim_zero(ibv_exp_destroy_wq(fdq->wqs[i]));
1441         }
1442         if (fdq->cq)
1443                 claim_zero(ibv_destroy_cq(fdq->cq));
1444         if (fdq)
1445                 rte_free(fdq);
1446         priv->flow_drop_queue = NULL;
1447         return -1;
1448 }
1449
1450 /**
1451  * Delete drop queue.
1452  *
1453  * @param priv
1454  *   Pointer to private structure.
1455  */
1456 static void
1457 priv_flow_delete_drop_queue(struct priv *priv)
1458 {
1459         struct rte_flow_drop *fdq = priv->flow_drop_queue;
1460         unsigned int i;
1461
1462         claim_zero(ibv_destroy_qp(fdq->qp));
1463         claim_zero(ibv_exp_destroy_rwq_ind_table(fdq->ind_table));
1464         for (i = 0; i != MLX5_DROP_WQ_N; ++i) {
1465                 assert(fdq->wqs[i]);
1466                 claim_zero(ibv_exp_destroy_wq(fdq->wqs[i]));
1467         }
1468         claim_zero(ibv_destroy_cq(fdq->cq));
1469         rte_free(fdq);
1470         priv->flow_drop_queue = NULL;
1471 }
1472
1473 /**
1474  * Remove all flows.
1475  *
1476  * Called by dev_stop() to remove all flows.
1477  *
1478  * @param priv
1479  *   Pointer to private structure.
1480  */
1481 void
1482 priv_flow_stop(struct priv *priv)
1483 {
1484         struct rte_flow *flow;
1485
1486         for (flow = LIST_FIRST(&priv->flows);
1487              flow;
1488              flow = LIST_NEXT(flow, next)) {
1489                 claim_zero(ibv_exp_destroy_flow(flow->ibv_flow));
1490                 flow->ibv_flow = NULL;
1491                 if (flow->mark) {
1492                         unsigned int n;
1493
1494                         for (n = 0; n < flow->rxqs_n; ++n)
1495                                 flow->rxqs[n]->mark = 0;
1496                 }
1497                 DEBUG("Flow %p removed", (void *)flow);
1498         }
1499         priv_flow_delete_drop_queue(priv);
1500 }
1501
1502 /**
1503  * Add all flows.
1504  *
1505  * @param priv
1506  *   Pointer to private structure.
1507  *
1508  * @return
1509  *   0 on success, a errno value otherwise and rte_errno is set.
1510  */
1511 int
1512 priv_flow_start(struct priv *priv)
1513 {
1514         int ret;
1515         struct rte_flow *flow;
1516
1517         ret = priv_flow_create_drop_queue(priv);
1518         if (ret)
1519                 return -1;
1520         for (flow = LIST_FIRST(&priv->flows);
1521              flow;
1522              flow = LIST_NEXT(flow, next)) {
1523                 struct ibv_qp *qp;
1524
1525                 if (flow->drop)
1526                         qp = priv->flow_drop_queue->qp;
1527                 else
1528                         qp = flow->qp;
1529                 flow->ibv_flow = ibv_exp_create_flow(qp, flow->ibv_attr);
1530                 if (!flow->ibv_flow) {
1531                         DEBUG("Flow %p cannot be applied", (void *)flow);
1532                         rte_errno = EINVAL;
1533                         return rte_errno;
1534                 }
1535                 DEBUG("Flow %p applied", (void *)flow);
1536                 if (flow->mark) {
1537                         unsigned int n;
1538
1539                         for (n = 0; n < flow->rxqs_n; ++n)
1540                                 flow->rxqs[n]->mark = 1;
1541                 }
1542         }
1543         return 0;
1544 }
1545
1546 /**
1547  * Verify if the Rx queue is used in a flow.
1548  *
1549  * @param priv
1550  *   Pointer to private structure.
1551  * @param rxq
1552  *   Pointer to the queue to search.
1553  *
1554  * @return
1555  *   Nonzero if the queue is used by a flow.
1556  */
1557 int
1558 priv_flow_rxq_in_use(struct priv *priv, struct rxq *rxq)
1559 {
1560         struct rte_flow *flow;
1561
1562         for (flow = LIST_FIRST(&priv->flows);
1563              flow;
1564              flow = LIST_NEXT(flow, next)) {
1565                 unsigned int n;
1566
1567                 if (flow->drop)
1568                         continue;
1569                 for (n = 0; n < flow->rxqs_n; ++n) {
1570                         if (flow->rxqs[n] == rxq)
1571                                 return 1;
1572                 }
1573         }
1574         return 0;
1575 }