net/mlx5: support RSS action flow rule
[dpdk.git] / drivers / net / mlx5 / mlx5_flow.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright 2016 6WIND S.A.
5  *   Copyright 2016 Mellanox.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of 6WIND S.A. nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <sys/queue.h>
35 #include <string.h>
36
37 /* Verbs header. */
38 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
39 #ifdef PEDANTIC
40 #pragma GCC diagnostic ignored "-Wpedantic"
41 #endif
42 #include <infiniband/verbs.h>
43 #ifdef PEDANTIC
44 #pragma GCC diagnostic error "-Wpedantic"
45 #endif
46
47 #include <rte_ethdev.h>
48 #include <rte_flow.h>
49 #include <rte_flow_driver.h>
50 #include <rte_malloc.h>
51
52 #include "mlx5.h"
53 #include "mlx5_prm.h"
54
55 static int
56 mlx5_flow_create_eth(const struct rte_flow_item *item,
57                      const void *default_mask,
58                      void *data);
59
60 static int
61 mlx5_flow_create_vlan(const struct rte_flow_item *item,
62                       const void *default_mask,
63                       void *data);
64
65 static int
66 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
67                       const void *default_mask,
68                       void *data);
69
70 static int
71 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
72                       const void *default_mask,
73                       void *data);
74
75 static int
76 mlx5_flow_create_udp(const struct rte_flow_item *item,
77                      const void *default_mask,
78                      void *data);
79
80 static int
81 mlx5_flow_create_tcp(const struct rte_flow_item *item,
82                      const void *default_mask,
83                      void *data);
84
85 static int
86 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
87                        const void *default_mask,
88                        void *data);
89
90 struct rte_flow {
91         LIST_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
92         struct ibv_exp_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
93         struct ibv_exp_rwq_ind_table *ind_table; /**< Indirection table. */
94         struct ibv_qp *qp; /**< Verbs queue pair. */
95         struct ibv_exp_flow *ibv_flow; /**< Verbs flow. */
96         struct ibv_exp_wq *wq; /**< Verbs work queue. */
97         struct ibv_cq *cq; /**< Verbs completion queue. */
98         struct rxq *(*rxqs)[]; /**< Pointer to the queues array. */
99         uint16_t rxqs_n; /**< Number of queues in this flow, 0 if drop queue. */
100         uint32_t mark:1; /**< Set if the flow is marked. */
101         uint32_t drop:1; /**< Drop queue. */
102         uint64_t hash_fields; /**< Fields that participate in the hash. */
103 };
104
105 /** Static initializer for items. */
106 #define ITEMS(...) \
107         (const enum rte_flow_item_type []){ \
108                 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
109         }
110
111 /** Structure to generate a simple graph of layers supported by the NIC. */
112 struct mlx5_flow_items {
113         /** List of possible actions for these items. */
114         const enum rte_flow_action_type *const actions;
115         /** Bit-masks corresponding to the possibilities for the item. */
116         const void *mask;
117         /**
118          * Default bit-masks to use when item->mask is not provided. When
119          * \default_mask is also NULL, the full supported bit-mask (\mask) is
120          * used instead.
121          */
122         const void *default_mask;
123         /** Bit-masks size in bytes. */
124         const unsigned int mask_sz;
125         /**
126          * Conversion function from rte_flow to NIC specific flow.
127          *
128          * @param item
129          *   rte_flow item to convert.
130          * @param default_mask
131          *   Default bit-masks to use when item->mask is not provided.
132          * @param data
133          *   Internal structure to store the conversion.
134          *
135          * @return
136          *   0 on success, negative value otherwise.
137          */
138         int (*convert)(const struct rte_flow_item *item,
139                        const void *default_mask,
140                        void *data);
141         /** Size in bytes of the destination structure. */
142         const unsigned int dst_sz;
143         /** List of possible following items.  */
144         const enum rte_flow_item_type *const items;
145 };
146
147 /** Valid action for this PMD. */
148 static const enum rte_flow_action_type valid_actions[] = {
149         RTE_FLOW_ACTION_TYPE_DROP,
150         RTE_FLOW_ACTION_TYPE_QUEUE,
151         RTE_FLOW_ACTION_TYPE_MARK,
152         RTE_FLOW_ACTION_TYPE_FLAG,
153         RTE_FLOW_ACTION_TYPE_END,
154 };
155
156 /** Graph of supported items and associated actions. */
157 static const struct mlx5_flow_items mlx5_flow_items[] = {
158         [RTE_FLOW_ITEM_TYPE_END] = {
159                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
160                                RTE_FLOW_ITEM_TYPE_VXLAN),
161         },
162         [RTE_FLOW_ITEM_TYPE_ETH] = {
163                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
164                                RTE_FLOW_ITEM_TYPE_IPV4,
165                                RTE_FLOW_ITEM_TYPE_IPV6),
166                 .actions = valid_actions,
167                 .mask = &(const struct rte_flow_item_eth){
168                         .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
169                         .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
170                         .type = -1,
171                 },
172                 .default_mask = &rte_flow_item_eth_mask,
173                 .mask_sz = sizeof(struct rte_flow_item_eth),
174                 .convert = mlx5_flow_create_eth,
175                 .dst_sz = sizeof(struct ibv_exp_flow_spec_eth),
176         },
177         [RTE_FLOW_ITEM_TYPE_VLAN] = {
178                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
179                                RTE_FLOW_ITEM_TYPE_IPV6),
180                 .actions = valid_actions,
181                 .mask = &(const struct rte_flow_item_vlan){
182                         .tci = -1,
183                 },
184                 .default_mask = &rte_flow_item_vlan_mask,
185                 .mask_sz = sizeof(struct rte_flow_item_vlan),
186                 .convert = mlx5_flow_create_vlan,
187                 .dst_sz = 0,
188         },
189         [RTE_FLOW_ITEM_TYPE_IPV4] = {
190                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
191                                RTE_FLOW_ITEM_TYPE_TCP),
192                 .actions = valid_actions,
193                 .mask = &(const struct rte_flow_item_ipv4){
194                         .hdr = {
195                                 .src_addr = -1,
196                                 .dst_addr = -1,
197                                 .type_of_service = -1,
198                                 .next_proto_id = -1,
199                         },
200                 },
201                 .default_mask = &rte_flow_item_ipv4_mask,
202                 .mask_sz = sizeof(struct rte_flow_item_ipv4),
203                 .convert = mlx5_flow_create_ipv4,
204                 .dst_sz = sizeof(struct ibv_exp_flow_spec_ipv4_ext),
205         },
206         [RTE_FLOW_ITEM_TYPE_IPV6] = {
207                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
208                                RTE_FLOW_ITEM_TYPE_TCP),
209                 .actions = valid_actions,
210                 .mask = &(const struct rte_flow_item_ipv6){
211                         .hdr = {
212                                 .src_addr = {
213                                         0xff, 0xff, 0xff, 0xff,
214                                         0xff, 0xff, 0xff, 0xff,
215                                         0xff, 0xff, 0xff, 0xff,
216                                         0xff, 0xff, 0xff, 0xff,
217                                 },
218                                 .dst_addr = {
219                                         0xff, 0xff, 0xff, 0xff,
220                                         0xff, 0xff, 0xff, 0xff,
221                                         0xff, 0xff, 0xff, 0xff,
222                                         0xff, 0xff, 0xff, 0xff,
223                                 },
224                                 .vtc_flow = -1,
225                                 .proto = -1,
226                                 .hop_limits = -1,
227                         },
228                 },
229                 .default_mask = &rte_flow_item_ipv6_mask,
230                 .mask_sz = sizeof(struct rte_flow_item_ipv6),
231                 .convert = mlx5_flow_create_ipv6,
232                 .dst_sz = sizeof(struct ibv_exp_flow_spec_ipv6_ext),
233         },
234         [RTE_FLOW_ITEM_TYPE_UDP] = {
235                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN),
236                 .actions = valid_actions,
237                 .mask = &(const struct rte_flow_item_udp){
238                         .hdr = {
239                                 .src_port = -1,
240                                 .dst_port = -1,
241                         },
242                 },
243                 .default_mask = &rte_flow_item_udp_mask,
244                 .mask_sz = sizeof(struct rte_flow_item_udp),
245                 .convert = mlx5_flow_create_udp,
246                 .dst_sz = sizeof(struct ibv_exp_flow_spec_tcp_udp),
247         },
248         [RTE_FLOW_ITEM_TYPE_TCP] = {
249                 .actions = valid_actions,
250                 .mask = &(const struct rte_flow_item_tcp){
251                         .hdr = {
252                                 .src_port = -1,
253                                 .dst_port = -1,
254                         },
255                 },
256                 .default_mask = &rte_flow_item_tcp_mask,
257                 .mask_sz = sizeof(struct rte_flow_item_tcp),
258                 .convert = mlx5_flow_create_tcp,
259                 .dst_sz = sizeof(struct ibv_exp_flow_spec_tcp_udp),
260         },
261         [RTE_FLOW_ITEM_TYPE_VXLAN] = {
262                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
263                 .actions = valid_actions,
264                 .mask = &(const struct rte_flow_item_vxlan){
265                         .vni = "\xff\xff\xff",
266                 },
267                 .default_mask = &rte_flow_item_vxlan_mask,
268                 .mask_sz = sizeof(struct rte_flow_item_vxlan),
269                 .convert = mlx5_flow_create_vxlan,
270                 .dst_sz = sizeof(struct ibv_exp_flow_spec_tunnel),
271         },
272 };
273
274 /** Structure to pass to the conversion function. */
275 struct mlx5_flow {
276         struct ibv_exp_flow_attr *ibv_attr; /**< Verbs attribute. */
277         unsigned int offset; /**< Offset in bytes in the ibv_attr buffer. */
278         uint32_t inner; /**< Set once VXLAN is encountered. */
279         uint64_t hash_fields; /**< Fields that participate in the hash. */
280 };
281
282 struct mlx5_flow_action {
283         uint32_t queue:1; /**< Target is a receive queue. */
284         uint32_t drop:1; /**< Target is a drop queue. */
285         uint32_t mark:1; /**< Mark is present in the flow. */
286         uint32_t mark_id; /**< Mark identifier. */
287         uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
288         uint16_t queues_n; /**< Number of entries in queue[]. */
289 };
290
291 /**
292  * Check support for a given item.
293  *
294  * @param item[in]
295  *   Item specification.
296  * @param mask[in]
297  *   Bit-masks covering supported fields to compare with spec, last and mask in
298  *   \item.
299  * @param size
300  *   Bit-Mask size in bytes.
301  *
302  * @return
303  *   0 on success.
304  */
305 static int
306 mlx5_flow_item_validate(const struct rte_flow_item *item,
307                         const uint8_t *mask, unsigned int size)
308 {
309         int ret = 0;
310
311         if (!item->spec && (item->mask || item->last))
312                 return -1;
313         if (item->spec && !item->mask) {
314                 unsigned int i;
315                 const uint8_t *spec = item->spec;
316
317                 for (i = 0; i < size; ++i)
318                         if ((spec[i] | mask[i]) != mask[i])
319                                 return -1;
320         }
321         if (item->last && !item->mask) {
322                 unsigned int i;
323                 const uint8_t *spec = item->last;
324
325                 for (i = 0; i < size; ++i)
326                         if ((spec[i] | mask[i]) != mask[i])
327                                 return -1;
328         }
329         if (item->mask) {
330                 unsigned int i;
331                 const uint8_t *spec = item->mask;
332
333                 for (i = 0; i < size; ++i)
334                         if ((spec[i] | mask[i]) != mask[i])
335                                 return -1;
336         }
337         if (item->spec && item->last) {
338                 uint8_t spec[size];
339                 uint8_t last[size];
340                 const uint8_t *apply = mask;
341                 unsigned int i;
342
343                 if (item->mask)
344                         apply = item->mask;
345                 for (i = 0; i < size; ++i) {
346                         spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
347                         last[i] = ((const uint8_t *)item->last)[i] & apply[i];
348                 }
349                 ret = memcmp(spec, last, size);
350         }
351         return ret;
352 }
353
354 /**
355  * Validate a flow supported by the NIC.
356  *
357  * @param priv
358  *   Pointer to private structure.
359  * @param[in] attr
360  *   Flow rule attributes.
361  * @param[in] pattern
362  *   Pattern specification (list terminated by the END pattern item).
363  * @param[in] actions
364  *   Associated actions (list terminated by the END action).
365  * @param[out] error
366  *   Perform verbose error reporting if not NULL.
367  * @param[in, out] flow
368  *   Flow structure to update.
369  *
370  * @return
371  *   0 on success, a negative errno value otherwise and rte_errno is set.
372  */
373 static int
374 priv_flow_validate(struct priv *priv,
375                    const struct rte_flow_attr *attr,
376                    const struct rte_flow_item items[],
377                    const struct rte_flow_action actions[],
378                    struct rte_flow_error *error,
379                    struct mlx5_flow *flow)
380 {
381         const struct mlx5_flow_items *cur_item = mlx5_flow_items;
382         struct mlx5_flow_action action = {
383                 .queue = 0,
384                 .drop = 0,
385                 .mark = 0,
386         };
387
388         (void)priv;
389         if (attr->group) {
390                 rte_flow_error_set(error, ENOTSUP,
391                                    RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
392                                    NULL,
393                                    "groups are not supported");
394                 return -rte_errno;
395         }
396         if (attr->priority) {
397                 rte_flow_error_set(error, ENOTSUP,
398                                    RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
399                                    NULL,
400                                    "priorities are not supported");
401                 return -rte_errno;
402         }
403         if (attr->egress) {
404                 rte_flow_error_set(error, ENOTSUP,
405                                    RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
406                                    NULL,
407                                    "egress is not supported");
408                 return -rte_errno;
409         }
410         if (!attr->ingress) {
411                 rte_flow_error_set(error, ENOTSUP,
412                                    RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
413                                    NULL,
414                                    "only ingress is supported");
415                 return -rte_errno;
416         }
417         for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
418                 const struct mlx5_flow_items *token = NULL;
419                 unsigned int i;
420                 int err;
421
422                 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
423                         continue;
424                 for (i = 0;
425                      cur_item->items &&
426                      cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
427                      ++i) {
428                         if (cur_item->items[i] == items->type) {
429                                 token = &mlx5_flow_items[items->type];
430                                 break;
431                         }
432                 }
433                 if (!token)
434                         goto exit_item_not_supported;
435                 cur_item = token;
436                 err = mlx5_flow_item_validate(items,
437                                               (const uint8_t *)cur_item->mask,
438                                               cur_item->mask_sz);
439                 if (err)
440                         goto exit_item_not_supported;
441                 if (flow->ibv_attr && cur_item->convert) {
442                         err = cur_item->convert(items,
443                                                 (cur_item->default_mask ?
444                                                  cur_item->default_mask :
445                                                  cur_item->mask),
446                                                 flow);
447                         if (err)
448                                 goto exit_item_not_supported;
449                 }
450                 flow->offset += cur_item->dst_sz;
451         }
452         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
453                 if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
454                         continue;
455                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
456                         action.drop = 1;
457                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
458                         const struct rte_flow_action_queue *queue =
459                                 (const struct rte_flow_action_queue *)
460                                 actions->conf;
461                         uint16_t n;
462                         uint16_t found = 0;
463
464                         if (!queue || (queue->index > (priv->rxqs_n - 1)))
465                                 goto exit_action_not_supported;
466                         for (n = 0; n < action.queues_n; ++n) {
467                                 if (action.queues[n] == queue->index) {
468                                         found = 1;
469                                         break;
470                                 }
471                         }
472                         if (action.queues_n && !found) {
473                                 rte_flow_error_set(error, ENOTSUP,
474                                            RTE_FLOW_ERROR_TYPE_ACTION,
475                                            actions,
476                                            "queue action not in RSS queues");
477                                 return -rte_errno;
478                         }
479                         action.queue = 1;
480                         action.queues_n = 1;
481                         action.queues[0] = queue->index;
482                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
483                         const struct rte_flow_action_rss *rss =
484                                 (const struct rte_flow_action_rss *)
485                                 actions->conf;
486                         uint16_t n;
487
488                         if (action.queues_n == 1) {
489                                 uint16_t found = 0;
490
491                                 assert(action.queues_n);
492                                 for (n = 0; n < rss->num; ++n) {
493                                         if (action.queues[0] == rss->queue[n]) {
494                                                 found = 1;
495                                                 break;
496                                         }
497                                 }
498                                 if (!found) {
499                                         rte_flow_error_set(error, ENOTSUP,
500                                                    RTE_FLOW_ERROR_TYPE_ACTION,
501                                                    actions,
502                                                    "queue action not in RSS"
503                                                    " queues");
504                                         return -rte_errno;
505                                 }
506                         }
507                         action.queue = 1;
508                         for (n = 0; n < rss->num; ++n)
509                                 action.queues[n] = rss->queue[n];
510                         action.queues_n = rss->num;
511                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
512                         const struct rte_flow_action_mark *mark =
513                                 (const struct rte_flow_action_mark *)
514                                 actions->conf;
515
516                         if (!mark) {
517                                 rte_flow_error_set(error, EINVAL,
518                                                    RTE_FLOW_ERROR_TYPE_ACTION,
519                                                    actions,
520                                                    "mark must be defined");
521                                 return -rte_errno;
522                         } else if (mark->id >= MLX5_FLOW_MARK_MAX) {
523                                 rte_flow_error_set(error, ENOTSUP,
524                                                    RTE_FLOW_ERROR_TYPE_ACTION,
525                                                    actions,
526                                                    "mark must be between 0"
527                                                    " and 16777199");
528                                 return -rte_errno;
529                         }
530                         action.mark = 1;
531                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
532                         action.mark = 1;
533                 } else {
534                         goto exit_action_not_supported;
535                 }
536         }
537         if (action.mark && !flow->ibv_attr && !action.drop)
538                 flow->offset += sizeof(struct ibv_exp_flow_spec_action_tag);
539         if (!action.queue && !action.drop) {
540                 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
541                                    NULL, "no valid action");
542                 return -rte_errno;
543         }
544         return 0;
545 exit_item_not_supported:
546         rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
547                            items, "item not supported");
548         return -rte_errno;
549 exit_action_not_supported:
550         rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
551                            actions, "action not supported");
552         return -rte_errno;
553 }
554
555 /**
556  * Validate a flow supported by the NIC.
557  *
558  * @see rte_flow_validate()
559  * @see rte_flow_ops
560  */
561 int
562 mlx5_flow_validate(struct rte_eth_dev *dev,
563                    const struct rte_flow_attr *attr,
564                    const struct rte_flow_item items[],
565                    const struct rte_flow_action actions[],
566                    struct rte_flow_error *error)
567 {
568         struct priv *priv = dev->data->dev_private;
569         int ret;
570         struct mlx5_flow flow = { .offset = sizeof(struct ibv_exp_flow_attr) };
571
572         priv_lock(priv);
573         ret = priv_flow_validate(priv, attr, items, actions, error, &flow);
574         priv_unlock(priv);
575         return ret;
576 }
577
578 /**
579  * Convert Ethernet item to Verbs specification.
580  *
581  * @param item[in]
582  *   Item specification.
583  * @param default_mask[in]
584  *   Default bit-masks to use when item->mask is not provided.
585  * @param data[in, out]
586  *   User structure.
587  */
588 static int
589 mlx5_flow_create_eth(const struct rte_flow_item *item,
590                      const void *default_mask,
591                      void *data)
592 {
593         const struct rte_flow_item_eth *spec = item->spec;
594         const struct rte_flow_item_eth *mask = item->mask;
595         struct mlx5_flow *flow = (struct mlx5_flow *)data;
596         struct ibv_exp_flow_spec_eth *eth;
597         const unsigned int eth_size = sizeof(struct ibv_exp_flow_spec_eth);
598         unsigned int i;
599
600         ++flow->ibv_attr->num_of_specs;
601         flow->ibv_attr->priority = 2;
602         flow->hash_fields = 0;
603         eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
604         *eth = (struct ibv_exp_flow_spec_eth) {
605                 .type = flow->inner | IBV_EXP_FLOW_SPEC_ETH,
606                 .size = eth_size,
607         };
608         if (!spec)
609                 return 0;
610         if (!mask)
611                 mask = default_mask;
612         memcpy(eth->val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
613         memcpy(eth->val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
614         eth->val.ether_type = spec->type;
615         memcpy(eth->mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
616         memcpy(eth->mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
617         eth->mask.ether_type = mask->type;
618         /* Remove unwanted bits from values. */
619         for (i = 0; i < ETHER_ADDR_LEN; ++i) {
620                 eth->val.dst_mac[i] &= eth->mask.dst_mac[i];
621                 eth->val.src_mac[i] &= eth->mask.src_mac[i];
622         }
623         eth->val.ether_type &= eth->mask.ether_type;
624         return 0;
625 }
626
627 /**
628  * Convert VLAN item to Verbs specification.
629  *
630  * @param item[in]
631  *   Item specification.
632  * @param default_mask[in]
633  *   Default bit-masks to use when item->mask is not provided.
634  * @param data[in, out]
635  *   User structure.
636  */
637 static int
638 mlx5_flow_create_vlan(const struct rte_flow_item *item,
639                       const void *default_mask,
640                       void *data)
641 {
642         const struct rte_flow_item_vlan *spec = item->spec;
643         const struct rte_flow_item_vlan *mask = item->mask;
644         struct mlx5_flow *flow = (struct mlx5_flow *)data;
645         struct ibv_exp_flow_spec_eth *eth;
646         const unsigned int eth_size = sizeof(struct ibv_exp_flow_spec_eth);
647
648         eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset - eth_size);
649         if (!spec)
650                 return 0;
651         if (!mask)
652                 mask = default_mask;
653         eth->val.vlan_tag = spec->tci;
654         eth->mask.vlan_tag = mask->tci;
655         eth->val.vlan_tag &= eth->mask.vlan_tag;
656         return 0;
657 }
658
659 /**
660  * Convert IPv4 item to Verbs specification.
661  *
662  * @param item[in]
663  *   Item specification.
664  * @param default_mask[in]
665  *   Default bit-masks to use when item->mask is not provided.
666  * @param data[in, out]
667  *   User structure.
668  */
669 static int
670 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
671                       const void *default_mask,
672                       void *data)
673 {
674         const struct rte_flow_item_ipv4 *spec = item->spec;
675         const struct rte_flow_item_ipv4 *mask = item->mask;
676         struct mlx5_flow *flow = (struct mlx5_flow *)data;
677         struct ibv_exp_flow_spec_ipv4_ext *ipv4;
678         unsigned int ipv4_size = sizeof(struct ibv_exp_flow_spec_ipv4_ext);
679
680         ++flow->ibv_attr->num_of_specs;
681         flow->ibv_attr->priority = 1;
682         flow->hash_fields = (IBV_EXP_RX_HASH_SRC_IPV4 |
683                              IBV_EXP_RX_HASH_DST_IPV4);
684         ipv4 = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
685         *ipv4 = (struct ibv_exp_flow_spec_ipv4_ext) {
686                 .type = flow->inner | IBV_EXP_FLOW_SPEC_IPV4_EXT,
687                 .size = ipv4_size,
688         };
689         if (!spec)
690                 return 0;
691         if (!mask)
692                 mask = default_mask;
693         ipv4->val = (struct ibv_exp_flow_ipv4_ext_filter){
694                 .src_ip = spec->hdr.src_addr,
695                 .dst_ip = spec->hdr.dst_addr,
696                 .proto = spec->hdr.next_proto_id,
697                 .tos = spec->hdr.type_of_service,
698         };
699         ipv4->mask = (struct ibv_exp_flow_ipv4_ext_filter){
700                 .src_ip = mask->hdr.src_addr,
701                 .dst_ip = mask->hdr.dst_addr,
702                 .proto = mask->hdr.next_proto_id,
703                 .tos = mask->hdr.type_of_service,
704         };
705         /* Remove unwanted bits from values. */
706         ipv4->val.src_ip &= ipv4->mask.src_ip;
707         ipv4->val.dst_ip &= ipv4->mask.dst_ip;
708         ipv4->val.proto &= ipv4->mask.proto;
709         ipv4->val.tos &= ipv4->mask.tos;
710         return 0;
711 }
712
713 /**
714  * Convert IPv6 item to Verbs specification.
715  *
716  * @param item[in]
717  *   Item specification.
718  * @param default_mask[in]
719  *   Default bit-masks to use when item->mask is not provided.
720  * @param data[in, out]
721  *   User structure.
722  */
723 static int
724 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
725                       const void *default_mask,
726                       void *data)
727 {
728         const struct rte_flow_item_ipv6 *spec = item->spec;
729         const struct rte_flow_item_ipv6 *mask = item->mask;
730         struct mlx5_flow *flow = (struct mlx5_flow *)data;
731         struct ibv_exp_flow_spec_ipv6_ext *ipv6;
732         unsigned int ipv6_size = sizeof(struct ibv_exp_flow_spec_ipv6_ext);
733
734         ++flow->ibv_attr->num_of_specs;
735         flow->ibv_attr->priority = 1;
736         flow->hash_fields = (IBV_EXP_RX_HASH_SRC_IPV6 |
737                              IBV_EXP_RX_HASH_DST_IPV6);
738         ipv6 = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
739         *ipv6 = (struct ibv_exp_flow_spec_ipv6_ext) {
740                 .type = flow->inner | IBV_EXP_FLOW_SPEC_IPV6_EXT,
741                 .size = ipv6_size,
742         };
743         if (!spec)
744                 return 0;
745         if (!mask)
746                 mask = default_mask;
747         memcpy(ipv6->val.src_ip, spec->hdr.src_addr,
748                RTE_DIM(ipv6->val.src_ip));
749         memcpy(ipv6->val.dst_ip, spec->hdr.dst_addr,
750                RTE_DIM(ipv6->val.dst_ip));
751         memcpy(ipv6->mask.src_ip, mask->hdr.src_addr,
752                RTE_DIM(ipv6->mask.src_ip));
753         memcpy(ipv6->mask.dst_ip, mask->hdr.dst_addr,
754                RTE_DIM(ipv6->mask.dst_ip));
755         ipv6->mask.flow_label = mask->hdr.vtc_flow;
756         ipv6->mask.next_hdr = mask->hdr.proto;
757         ipv6->mask.hop_limit = mask->hdr.hop_limits;
758         ipv6->val.flow_label &= ipv6->mask.flow_label;
759         ipv6->val.next_hdr &= ipv6->mask.next_hdr;
760         ipv6->val.hop_limit &= ipv6->mask.hop_limit;
761         return 0;
762 }
763
764 /**
765  * Convert UDP item to Verbs specification.
766  *
767  * @param item[in]
768  *   Item specification.
769  * @param default_mask[in]
770  *   Default bit-masks to use when item->mask is not provided.
771  * @param data[in, out]
772  *   User structure.
773  */
774 static int
775 mlx5_flow_create_udp(const struct rte_flow_item *item,
776                      const void *default_mask,
777                      void *data)
778 {
779         const struct rte_flow_item_udp *spec = item->spec;
780         const struct rte_flow_item_udp *mask = item->mask;
781         struct mlx5_flow *flow = (struct mlx5_flow *)data;
782         struct ibv_exp_flow_spec_tcp_udp *udp;
783         unsigned int udp_size = sizeof(struct ibv_exp_flow_spec_tcp_udp);
784
785         ++flow->ibv_attr->num_of_specs;
786         flow->ibv_attr->priority = 0;
787         flow->hash_fields |= (IBV_EXP_RX_HASH_SRC_PORT_UDP |
788                               IBV_EXP_RX_HASH_DST_PORT_UDP);
789         udp = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
790         *udp = (struct ibv_exp_flow_spec_tcp_udp) {
791                 .type = flow->inner | IBV_EXP_FLOW_SPEC_UDP,
792                 .size = udp_size,
793         };
794         if (!spec)
795                 return 0;
796         if (!mask)
797                 mask = default_mask;
798         udp->val.dst_port = spec->hdr.dst_port;
799         udp->val.src_port = spec->hdr.src_port;
800         udp->mask.dst_port = mask->hdr.dst_port;
801         udp->mask.src_port = mask->hdr.src_port;
802         /* Remove unwanted bits from values. */
803         udp->val.src_port &= udp->mask.src_port;
804         udp->val.dst_port &= udp->mask.dst_port;
805         return 0;
806 }
807
808 /**
809  * Convert TCP item to Verbs specification.
810  *
811  * @param item[in]
812  *   Item specification.
813  * @param default_mask[in]
814  *   Default bit-masks to use when item->mask is not provided.
815  * @param data[in, out]
816  *   User structure.
817  */
818 static int
819 mlx5_flow_create_tcp(const struct rte_flow_item *item,
820                      const void *default_mask,
821                      void *data)
822 {
823         const struct rte_flow_item_tcp *spec = item->spec;
824         const struct rte_flow_item_tcp *mask = item->mask;
825         struct mlx5_flow *flow = (struct mlx5_flow *)data;
826         struct ibv_exp_flow_spec_tcp_udp *tcp;
827         unsigned int tcp_size = sizeof(struct ibv_exp_flow_spec_tcp_udp);
828
829         ++flow->ibv_attr->num_of_specs;
830         flow->ibv_attr->priority = 0;
831         flow->hash_fields |= (IBV_EXP_RX_HASH_SRC_PORT_TCP |
832                               IBV_EXP_RX_HASH_DST_PORT_TCP);
833         tcp = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
834         *tcp = (struct ibv_exp_flow_spec_tcp_udp) {
835                 .type = flow->inner | IBV_EXP_FLOW_SPEC_TCP,
836                 .size = tcp_size,
837         };
838         if (!spec)
839                 return 0;
840         if (!mask)
841                 mask = default_mask;
842         tcp->val.dst_port = spec->hdr.dst_port;
843         tcp->val.src_port = spec->hdr.src_port;
844         tcp->mask.dst_port = mask->hdr.dst_port;
845         tcp->mask.src_port = mask->hdr.src_port;
846         /* Remove unwanted bits from values. */
847         tcp->val.src_port &= tcp->mask.src_port;
848         tcp->val.dst_port &= tcp->mask.dst_port;
849         return 0;
850 }
851
852 /**
853  * Convert VXLAN item to Verbs specification.
854  *
855  * @param item[in]
856  *   Item specification.
857  * @param default_mask[in]
858  *   Default bit-masks to use when item->mask is not provided.
859  * @param data[in, out]
860  *   User structure.
861  */
862 static int
863 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
864                        const void *default_mask,
865                        void *data)
866 {
867         const struct rte_flow_item_vxlan *spec = item->spec;
868         const struct rte_flow_item_vxlan *mask = item->mask;
869         struct mlx5_flow *flow = (struct mlx5_flow *)data;
870         struct ibv_exp_flow_spec_tunnel *vxlan;
871         unsigned int size = sizeof(struct ibv_exp_flow_spec_tunnel);
872         union vni {
873                 uint32_t vlan_id;
874                 uint8_t vni[4];
875         } id;
876
877         ++flow->ibv_attr->num_of_specs;
878         flow->ibv_attr->priority = 0;
879         id.vni[0] = 0;
880         vxlan = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
881         *vxlan = (struct ibv_exp_flow_spec_tunnel) {
882                 .type = flow->inner | IBV_EXP_FLOW_SPEC_VXLAN_TUNNEL,
883                 .size = size,
884         };
885         flow->inner = IBV_EXP_FLOW_SPEC_INNER;
886         if (!spec)
887                 return 0;
888         if (!mask)
889                 mask = default_mask;
890         memcpy(&id.vni[1], spec->vni, 3);
891         vxlan->val.tunnel_id = id.vlan_id;
892         memcpy(&id.vni[1], mask->vni, 3);
893         vxlan->mask.tunnel_id = id.vlan_id;
894         /* Remove unwanted bits from values. */
895         vxlan->val.tunnel_id &= vxlan->mask.tunnel_id;
896         return 0;
897 }
898
899 /**
900  * Convert mark/flag action to Verbs specification.
901  *
902  * @param flow
903  *   Pointer to MLX5 flow structure.
904  * @param mark_id
905  *   Mark identifier.
906  */
907 static int
908 mlx5_flow_create_flag_mark(struct mlx5_flow *flow, uint32_t mark_id)
909 {
910         struct ibv_exp_flow_spec_action_tag *tag;
911         unsigned int size = sizeof(struct ibv_exp_flow_spec_action_tag);
912
913         tag = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
914         *tag = (struct ibv_exp_flow_spec_action_tag){
915                 .type = IBV_EXP_FLOW_SPEC_ACTION_TAG,
916                 .size = size,
917                 .tag_id = mlx5_flow_mark_set(mark_id),
918         };
919         ++flow->ibv_attr->num_of_specs;
920         return 0;
921 }
922
923 /**
924  * Complete flow rule creation with a drop queue.
925  *
926  * @param priv
927  *   Pointer to private structure.
928  * @param flow
929  *   MLX5 flow attributes (filled by mlx5_flow_validate()).
930  * @param[out] error
931  *   Perform verbose error reporting if not NULL.
932  *
933  * @return
934  *   A flow if the rule could be created.
935  */
936 static struct rte_flow *
937 priv_flow_create_action_queue_drop(struct priv *priv,
938                                    struct mlx5_flow *flow,
939                                    struct rte_flow_error *error)
940 {
941         struct rte_flow *rte_flow;
942
943         assert(priv->pd);
944         assert(priv->ctx);
945         rte_flow = rte_calloc(__func__, 1, sizeof(*rte_flow), 0);
946         if (!rte_flow) {
947                 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
948                                    NULL, "cannot allocate flow memory");
949                 return NULL;
950         }
951         rte_flow->cq =
952                 ibv_exp_create_cq(priv->ctx, 1, NULL, NULL, 0,
953                                   &(struct ibv_exp_cq_init_attr){
954                                           .comp_mask = 0,
955                                   });
956         if (!rte_flow->cq) {
957                 rte_flow_error_set(error, ENOMEM,
958                                    RTE_FLOW_ERROR_TYPE_HANDLE,
959                                    NULL, "cannot allocate CQ");
960                 goto error;
961         }
962         rte_flow->wq = ibv_exp_create_wq(priv->ctx,
963                                          &(struct ibv_exp_wq_init_attr){
964                                          .wq_type = IBV_EXP_WQT_RQ,
965                                          .max_recv_wr = 1,
966                                          .max_recv_sge = 1,
967                                          .pd = priv->pd,
968                                          .cq = rte_flow->cq,
969                                          });
970         if (!rte_flow->wq) {
971                 rte_flow_error_set(error, ENOMEM,
972                                    RTE_FLOW_ERROR_TYPE_HANDLE,
973                                    NULL, "cannot allocate WQ");
974                 goto error;
975         }
976         rte_flow->drop = 1;
977         rte_flow->ibv_attr = flow->ibv_attr;
978         rte_flow->ind_table = ibv_exp_create_rwq_ind_table(
979                 priv->ctx,
980                 &(struct ibv_exp_rwq_ind_table_init_attr){
981                         .pd = priv->pd,
982                         .log_ind_tbl_size = 0,
983                         .ind_tbl = &rte_flow->wq,
984                         .comp_mask = 0,
985                 });
986         if (!rte_flow->ind_table) {
987                 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
988                                    NULL, "cannot allocate indirection table");
989                 goto error;
990         }
991         rte_flow->qp = ibv_exp_create_qp(
992                 priv->ctx,
993                 &(struct ibv_exp_qp_init_attr){
994                         .qp_type = IBV_QPT_RAW_PACKET,
995                         .comp_mask =
996                                 IBV_EXP_QP_INIT_ATTR_PD |
997                                 IBV_EXP_QP_INIT_ATTR_PORT |
998                                 IBV_EXP_QP_INIT_ATTR_RX_HASH,
999                         .pd = priv->pd,
1000                         .rx_hash_conf = &(struct ibv_exp_rx_hash_conf){
1001                                 .rx_hash_function =
1002                                         IBV_EXP_RX_HASH_FUNC_TOEPLITZ,
1003                                 .rx_hash_key_len = rss_hash_default_key_len,
1004                                 .rx_hash_key = rss_hash_default_key,
1005                                 .rx_hash_fields_mask = 0,
1006                                 .rwq_ind_tbl = rte_flow->ind_table,
1007                         },
1008                         .port_num = priv->port,
1009                 });
1010         if (!rte_flow->qp) {
1011                 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1012                                    NULL, "cannot allocate QP");
1013                 goto error;
1014         }
1015         if (!priv->started)
1016                 return rte_flow;
1017         rte_flow->ibv_flow = ibv_exp_create_flow(rte_flow->qp,
1018                                                  rte_flow->ibv_attr);
1019         if (!rte_flow->ibv_flow) {
1020                 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1021                                    NULL, "flow rule creation failure");
1022                 goto error;
1023         }
1024         return rte_flow;
1025 error:
1026         assert(rte_flow);
1027         if (rte_flow->qp)
1028                 ibv_destroy_qp(rte_flow->qp);
1029         if (rte_flow->ind_table)
1030                 ibv_exp_destroy_rwq_ind_table(rte_flow->ind_table);
1031         if (rte_flow->wq)
1032                 ibv_exp_destroy_wq(rte_flow->wq);
1033         if (rte_flow->cq)
1034                 ibv_destroy_cq(rte_flow->cq);
1035         rte_free(rte_flow);
1036         return NULL;
1037 }
1038
1039 /**
1040  * Complete flow rule creation.
1041  *
1042  * @param priv
1043  *   Pointer to private structure.
1044  * @param flow
1045  *   MLX5 flow attributes (filled by mlx5_flow_validate()).
1046  * @param action
1047  *   Target action structure.
1048  * @param[out] error
1049  *   Perform verbose error reporting if not NULL.
1050  *
1051  * @return
1052  *   A flow if the rule could be created.
1053  */
1054 static struct rte_flow *
1055 priv_flow_create_action_queue(struct priv *priv,
1056                               struct mlx5_flow *flow,
1057                               struct mlx5_flow_action *action,
1058                               struct rte_flow_error *error)
1059 {
1060         struct rte_flow *rte_flow;
1061         unsigned int i;
1062         unsigned int j;
1063         const unsigned int wqs_n = 1 << log2above(action->queues_n);
1064         struct ibv_exp_wq *wqs[wqs_n];
1065
1066         assert(priv->pd);
1067         assert(priv->ctx);
1068         assert(!action->drop);
1069         rte_flow = rte_calloc(__func__, 1,
1070                               sizeof(*rte_flow) + sizeof(struct rxq *) *
1071                               action->queues_n, 0);
1072         if (!rte_flow) {
1073                 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1074                                    NULL, "cannot allocate flow memory");
1075                 return NULL;
1076         }
1077         rte_flow->rxqs = (struct rxq *(*)[])((uintptr_t)rte_flow +
1078                                              sizeof(struct rxq *) *
1079                                              action->queues_n);
1080         for (i = 0; i < action->queues_n; ++i) {
1081                 struct rxq_ctrl *rxq;
1082
1083                 rxq = container_of((*priv->rxqs)[action->queues[i]],
1084                                    struct rxq_ctrl, rxq);
1085                 wqs[i] = rxq->wq;
1086                 (*rte_flow->rxqs)[i] = &rxq->rxq;
1087                 ++rte_flow->rxqs_n;
1088                 rxq->rxq.mark |= action->mark;
1089         }
1090         /* finalise indirection table. */
1091         for (j = 0; i < wqs_n; ++i, ++j) {
1092                 wqs[i] = wqs[j];
1093                 if (j == action->queues_n)
1094                         j = 0;
1095         }
1096         rte_flow->mark = action->mark;
1097         rte_flow->ibv_attr = flow->ibv_attr;
1098         rte_flow->hash_fields = flow->hash_fields;
1099         rte_flow->ind_table = ibv_exp_create_rwq_ind_table(
1100                 priv->ctx,
1101                 &(struct ibv_exp_rwq_ind_table_init_attr){
1102                         .pd = priv->pd,
1103                         .log_ind_tbl_size = log2above(action->queues_n),
1104                         .ind_tbl = wqs,
1105                         .comp_mask = 0,
1106                 });
1107         if (!rte_flow->ind_table) {
1108                 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1109                                    NULL, "cannot allocate indirection table");
1110                 goto error;
1111         }
1112         rte_flow->qp = ibv_exp_create_qp(
1113                 priv->ctx,
1114                 &(struct ibv_exp_qp_init_attr){
1115                         .qp_type = IBV_QPT_RAW_PACKET,
1116                         .comp_mask =
1117                                 IBV_EXP_QP_INIT_ATTR_PD |
1118                                 IBV_EXP_QP_INIT_ATTR_PORT |
1119                                 IBV_EXP_QP_INIT_ATTR_RX_HASH,
1120                         .pd = priv->pd,
1121                         .rx_hash_conf = &(struct ibv_exp_rx_hash_conf){
1122                                 .rx_hash_function =
1123                                         IBV_EXP_RX_HASH_FUNC_TOEPLITZ,
1124                                 .rx_hash_key_len = rss_hash_default_key_len,
1125                                 .rx_hash_key = rss_hash_default_key,
1126                                 .rx_hash_fields_mask = rte_flow->hash_fields,
1127                                 .rwq_ind_tbl = rte_flow->ind_table,
1128                         },
1129                         .port_num = priv->port,
1130                 });
1131         if (!rte_flow->qp) {
1132                 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1133                                    NULL, "cannot allocate QP");
1134                 goto error;
1135         }
1136         if (!priv->started)
1137                 return rte_flow;
1138         rte_flow->ibv_flow = ibv_exp_create_flow(rte_flow->qp,
1139                                                  rte_flow->ibv_attr);
1140         if (!rte_flow->ibv_flow) {
1141                 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1142                                    NULL, "flow rule creation failure");
1143                 goto error;
1144         }
1145         return rte_flow;
1146 error:
1147         assert(rte_flow);
1148         if (rte_flow->qp)
1149                 ibv_destroy_qp(rte_flow->qp);
1150         if (rte_flow->ind_table)
1151                 ibv_exp_destroy_rwq_ind_table(rte_flow->ind_table);
1152         rte_free(rte_flow);
1153         return NULL;
1154 }
1155
1156 /**
1157  * Convert a flow.
1158  *
1159  * @param priv
1160  *   Pointer to private structure.
1161  * @param[in] attr
1162  *   Flow rule attributes.
1163  * @param[in] pattern
1164  *   Pattern specification (list terminated by the END pattern item).
1165  * @param[in] actions
1166  *   Associated actions (list terminated by the END action).
1167  * @param[out] error
1168  *   Perform verbose error reporting if not NULL.
1169  *
1170  * @return
1171  *   A flow on success, NULL otherwise.
1172  */
1173 static struct rte_flow *
1174 priv_flow_create(struct priv *priv,
1175                  const struct rte_flow_attr *attr,
1176                  const struct rte_flow_item items[],
1177                  const struct rte_flow_action actions[],
1178                  struct rte_flow_error *error)
1179 {
1180         struct rte_flow *rte_flow;
1181         struct mlx5_flow_action action;
1182         struct mlx5_flow flow = { .offset = sizeof(struct ibv_exp_flow_attr), };
1183         int err;
1184
1185         err = priv_flow_validate(priv, attr, items, actions, error, &flow);
1186         if (err)
1187                 goto exit;
1188         flow.ibv_attr = rte_malloc(__func__, flow.offset, 0);
1189         flow.offset = sizeof(struct ibv_exp_flow_attr);
1190         if (!flow.ibv_attr) {
1191                 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1192                                    NULL, "cannot allocate ibv_attr memory");
1193                 goto exit;
1194         }
1195         *flow.ibv_attr = (struct ibv_exp_flow_attr){
1196                 .type = IBV_EXP_FLOW_ATTR_NORMAL,
1197                 .size = sizeof(struct ibv_exp_flow_attr),
1198                 .priority = attr->priority,
1199                 .num_of_specs = 0,
1200                 .port = 0,
1201                 .flags = 0,
1202                 .reserved = 0,
1203         };
1204         flow.inner = 0;
1205         flow.hash_fields = 0;
1206         claim_zero(priv_flow_validate(priv, attr, items, actions,
1207                                       error, &flow));
1208         action = (struct mlx5_flow_action){
1209                 .queue = 0,
1210                 .drop = 0,
1211                 .mark = 0,
1212                 .mark_id = MLX5_FLOW_MARK_DEFAULT,
1213         };
1214         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
1215                 if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
1216                         continue;
1217                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
1218                         action.queue = 1;
1219                         action.queues[action.queues_n++] =
1220                                 ((const struct rte_flow_action_queue *)
1221                                  actions->conf)->index;
1222                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
1223                         const struct rte_flow_action_rss *rss =
1224                                 (const struct rte_flow_action_rss *)
1225                                  actions->conf;
1226                         uint16_t n;
1227
1228                         action.queue = 1;
1229                         action.queues_n = rss->num;
1230                         for (n = 0; n < rss->num; ++n)
1231                                 action.queues[n] = rss->queue[n];
1232                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
1233                         action.drop = 1;
1234                         action.mark = 0;
1235                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
1236                         const struct rte_flow_action_mark *mark =
1237                                 (const struct rte_flow_action_mark *)
1238                                 actions->conf;
1239
1240                         if (mark)
1241                                 action.mark_id = mark->id;
1242                         action.mark = !action.drop;
1243                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
1244                         action.mark = 1;
1245                 } else {
1246                         rte_flow_error_set(error, ENOTSUP,
1247                                            RTE_FLOW_ERROR_TYPE_ACTION,
1248                                            actions, "unsupported action");
1249                         goto exit;
1250                 }
1251         }
1252         if (action.mark) {
1253                 mlx5_flow_create_flag_mark(&flow, action.mark_id);
1254                 flow.offset += sizeof(struct ibv_exp_flow_spec_action_tag);
1255         }
1256         if (action.drop)
1257                 rte_flow =
1258                         priv_flow_create_action_queue_drop(priv, &flow, error);
1259         else
1260                 rte_flow = priv_flow_create_action_queue(priv, &flow, &action,
1261                                                          error);
1262         if (!rte_flow)
1263                 goto exit;
1264         return rte_flow;
1265 exit:
1266         rte_free(flow.ibv_attr);
1267         return NULL;
1268 }
1269
1270 /**
1271  * Create a flow.
1272  *
1273  * @see rte_flow_create()
1274  * @see rte_flow_ops
1275  */
1276 struct rte_flow *
1277 mlx5_flow_create(struct rte_eth_dev *dev,
1278                  const struct rte_flow_attr *attr,
1279                  const struct rte_flow_item items[],
1280                  const struct rte_flow_action actions[],
1281                  struct rte_flow_error *error)
1282 {
1283         struct priv *priv = dev->data->dev_private;
1284         struct rte_flow *flow;
1285
1286         priv_lock(priv);
1287         flow = priv_flow_create(priv, attr, items, actions, error);
1288         if (flow) {
1289                 LIST_INSERT_HEAD(&priv->flows, flow, next);
1290                 DEBUG("Flow created %p", (void *)flow);
1291         }
1292         priv_unlock(priv);
1293         return flow;
1294 }
1295
1296 /**
1297  * Destroy a flow.
1298  *
1299  * @param priv
1300  *   Pointer to private structure.
1301  * @param[in] flow
1302  *   Flow to destroy.
1303  */
1304 static void
1305 priv_flow_destroy(struct priv *priv,
1306                   struct rte_flow *flow)
1307 {
1308         (void)priv;
1309         LIST_REMOVE(flow, next);
1310         if (flow->ibv_flow)
1311                 claim_zero(ibv_exp_destroy_flow(flow->ibv_flow));
1312         if (flow->qp)
1313                 claim_zero(ibv_destroy_qp(flow->qp));
1314         if (flow->ind_table)
1315                 claim_zero(ibv_exp_destroy_rwq_ind_table(flow->ind_table));
1316         if (flow->drop && flow->wq)
1317                 claim_zero(ibv_exp_destroy_wq(flow->wq));
1318         if (flow->drop && flow->cq)
1319                 claim_zero(ibv_destroy_cq(flow->cq));
1320         if (flow->mark) {
1321                 struct rte_flow *tmp;
1322                 struct rxq *rxq;
1323                 uint32_t mark_n = 0;
1324                 uint32_t queue_n;
1325
1326                 /*
1327                  * To remove the mark from the queue, the queue must not be
1328                  * present in any other marked flow (RSS or not).
1329                  */
1330                 for (queue_n = 0; queue_n < flow->rxqs_n; ++queue_n) {
1331                         rxq = (*flow->rxqs)[queue_n];
1332                         for (tmp = LIST_FIRST(&priv->flows);
1333                              tmp;
1334                              tmp = LIST_NEXT(tmp, next)) {
1335                                 uint32_t tqueue_n;
1336
1337                                 if (tmp->drop)
1338                                         continue;
1339                                 for (tqueue_n = 0;
1340                                      tqueue_n < tmp->rxqs_n;
1341                                      ++tqueue_n) {
1342                                         struct rxq *trxq;
1343
1344                                         trxq = (*tmp->rxqs)[tqueue_n];
1345                                         if (rxq == trxq)
1346                                                 ++mark_n;
1347                                 }
1348                         }
1349                         rxq->mark = !!mark_n;
1350                 }
1351         }
1352         rte_free(flow->ibv_attr);
1353         DEBUG("Flow destroyed %p", (void *)flow);
1354         rte_free(flow);
1355 }
1356
1357 /**
1358  * Destroy a flow.
1359  *
1360  * @see rte_flow_destroy()
1361  * @see rte_flow_ops
1362  */
1363 int
1364 mlx5_flow_destroy(struct rte_eth_dev *dev,
1365                   struct rte_flow *flow,
1366                   struct rte_flow_error *error)
1367 {
1368         struct priv *priv = dev->data->dev_private;
1369
1370         (void)error;
1371         priv_lock(priv);
1372         priv_flow_destroy(priv, flow);
1373         priv_unlock(priv);
1374         return 0;
1375 }
1376
1377 /**
1378  * Destroy all flows.
1379  *
1380  * @param priv
1381  *   Pointer to private structure.
1382  */
1383 static void
1384 priv_flow_flush(struct priv *priv)
1385 {
1386         while (!LIST_EMPTY(&priv->flows)) {
1387                 struct rte_flow *flow;
1388
1389                 flow = LIST_FIRST(&priv->flows);
1390                 priv_flow_destroy(priv, flow);
1391         }
1392 }
1393
1394 /**
1395  * Destroy all flows.
1396  *
1397  * @see rte_flow_flush()
1398  * @see rte_flow_ops
1399  */
1400 int
1401 mlx5_flow_flush(struct rte_eth_dev *dev,
1402                 struct rte_flow_error *error)
1403 {
1404         struct priv *priv = dev->data->dev_private;
1405
1406         (void)error;
1407         priv_lock(priv);
1408         priv_flow_flush(priv);
1409         priv_unlock(priv);
1410         return 0;
1411 }
1412
1413 /**
1414  * Remove all flows.
1415  *
1416  * Called by dev_stop() to remove all flows.
1417  *
1418  * @param priv
1419  *   Pointer to private structure.
1420  */
1421 void
1422 priv_flow_stop(struct priv *priv)
1423 {
1424         struct rte_flow *flow;
1425
1426         for (flow = LIST_FIRST(&priv->flows);
1427              flow;
1428              flow = LIST_NEXT(flow, next)) {
1429                 claim_zero(ibv_exp_destroy_flow(flow->ibv_flow));
1430                 flow->ibv_flow = NULL;
1431                 if (flow->mark) {
1432                         unsigned int n;
1433
1434                         for (n = 0; n < flow->rxqs_n; ++n)
1435                                 (*flow->rxqs)[n]->mark = 0;
1436                 }
1437                 DEBUG("Flow %p removed", (void *)flow);
1438         }
1439 }
1440
1441 /**
1442  * Add all flows.
1443  *
1444  * @param priv
1445  *   Pointer to private structure.
1446  *
1447  * @return
1448  *   0 on success, a errno value otherwise and rte_errno is set.
1449  */
1450 int
1451 priv_flow_start(struct priv *priv)
1452 {
1453         struct rte_flow *flow;
1454
1455         for (flow = LIST_FIRST(&priv->flows);
1456              flow;
1457              flow = LIST_NEXT(flow, next)) {
1458                 flow->ibv_flow = ibv_exp_create_flow(flow->qp,
1459                                                      flow->ibv_attr);
1460                 if (!flow->ibv_flow) {
1461                         DEBUG("Flow %p cannot be applied", (void *)flow);
1462                         rte_errno = EINVAL;
1463                         return rte_errno;
1464                 }
1465                 DEBUG("Flow %p applied", (void *)flow);
1466                 if (flow->mark) {
1467                         unsigned int n;
1468
1469                         for (n = 0; n < flow->rxqs_n; ++n)
1470                                 (*flow->rxqs)[n]->mark = 1;
1471                 }
1472         }
1473         return 0;
1474 }