net/mlx5: support VXLAN flow item
[dpdk.git] / drivers / net / mlx5 / mlx5_flow.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright 2016 6WIND S.A.
5  *   Copyright 2016 Mellanox.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of 6WIND S.A. nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <sys/queue.h>
35 #include <string.h>
36
37 /* Verbs header. */
38 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
39 #ifdef PEDANTIC
40 #pragma GCC diagnostic ignored "-Wpedantic"
41 #endif
42 #include <infiniband/verbs.h>
43 #ifdef PEDANTIC
44 #pragma GCC diagnostic error "-Wpedantic"
45 #endif
46
47 #include <rte_ethdev.h>
48 #include <rte_flow.h>
49 #include <rte_flow_driver.h>
50 #include <rte_malloc.h>
51
52 #include "mlx5.h"
53
54 static int
55 mlx5_flow_create_eth(const struct rte_flow_item *item,
56                      const void *default_mask,
57                      void *data);
58
59 static int
60 mlx5_flow_create_vlan(const struct rte_flow_item *item,
61                       const void *default_mask,
62                       void *data);
63
64 static int
65 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
66                       const void *default_mask,
67                       void *data);
68
69 static int
70 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
71                       const void *default_mask,
72                       void *data);
73
74 static int
75 mlx5_flow_create_udp(const struct rte_flow_item *item,
76                      const void *default_mask,
77                      void *data);
78
79 static int
80 mlx5_flow_create_tcp(const struct rte_flow_item *item,
81                      const void *default_mask,
82                      void *data);
83
84 static int
85 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
86                        const void *default_mask,
87                        void *data);
88
89 struct rte_flow {
90         LIST_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
91         struct ibv_exp_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
92         struct ibv_exp_rwq_ind_table *ind_table; /**< Indirection table. */
93         struct ibv_qp *qp; /**< Verbs queue pair. */
94         struct ibv_exp_flow *ibv_flow; /**< Verbs flow. */
95         struct ibv_exp_wq *wq; /**< Verbs work queue. */
96         struct ibv_cq *cq; /**< Verbs completion queue. */
97         struct rxq *rxq; /**< Pointer to the queue, NULL if drop queue. */
98 };
99
100 /** Static initializer for items. */
101 #define ITEMS(...) \
102         (const enum rte_flow_item_type []){ \
103                 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
104         }
105
106 /** Structure to generate a simple graph of layers supported by the NIC. */
107 struct mlx5_flow_items {
108         /** List of possible actions for these items. */
109         const enum rte_flow_action_type *const actions;
110         /** Bit-masks corresponding to the possibilities for the item. */
111         const void *mask;
112         /** Bit-masks size in bytes. */
113         const unsigned int mask_sz;
114         /**
115          * Conversion function from rte_flow to NIC specific flow.
116          *
117          * @param item
118          *   rte_flow item to convert.
119          * @param default_mask
120          *   Default bit-masks to use when item->mask is not provided.
121          * @param data
122          *   Internal structure to store the conversion.
123          *
124          * @return
125          *   0 on success, negative value otherwise.
126          */
127         int (*convert)(const struct rte_flow_item *item,
128                        const void *default_mask,
129                        void *data);
130         /** Size in bytes of the destination structure. */
131         const unsigned int dst_sz;
132         /** List of possible following items.  */
133         const enum rte_flow_item_type *const items;
134 };
135
136 /** Valid action for this PMD. */
137 static const enum rte_flow_action_type valid_actions[] = {
138         RTE_FLOW_ACTION_TYPE_DROP,
139         RTE_FLOW_ACTION_TYPE_QUEUE,
140         RTE_FLOW_ACTION_TYPE_END,
141 };
142
143 /** Graph of supported items and associated actions. */
144 static const struct mlx5_flow_items mlx5_flow_items[] = {
145         [RTE_FLOW_ITEM_TYPE_END] = {
146                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
147                                RTE_FLOW_ITEM_TYPE_VXLAN),
148         },
149         [RTE_FLOW_ITEM_TYPE_ETH] = {
150                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
151                                RTE_FLOW_ITEM_TYPE_IPV4,
152                                RTE_FLOW_ITEM_TYPE_IPV6),
153                 .actions = valid_actions,
154                 .mask = &(const struct rte_flow_item_eth){
155                         .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
156                         .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
157                 },
158                 .mask_sz = sizeof(struct rte_flow_item_eth),
159                 .convert = mlx5_flow_create_eth,
160                 .dst_sz = sizeof(struct ibv_exp_flow_spec_eth),
161         },
162         [RTE_FLOW_ITEM_TYPE_VLAN] = {
163                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
164                                RTE_FLOW_ITEM_TYPE_IPV6),
165                 .actions = valid_actions,
166                 .mask = &(const struct rte_flow_item_vlan){
167                         .tci = -1,
168                 },
169                 .mask_sz = sizeof(struct rte_flow_item_vlan),
170                 .convert = mlx5_flow_create_vlan,
171                 .dst_sz = 0,
172         },
173         [RTE_FLOW_ITEM_TYPE_IPV4] = {
174                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
175                                RTE_FLOW_ITEM_TYPE_TCP),
176                 .actions = valid_actions,
177                 .mask = &(const struct rte_flow_item_ipv4){
178                         .hdr = {
179                                 .src_addr = -1,
180                                 .dst_addr = -1,
181                         },
182                 },
183                 .mask_sz = sizeof(struct rte_flow_item_ipv4),
184                 .convert = mlx5_flow_create_ipv4,
185                 .dst_sz = sizeof(struct ibv_exp_flow_spec_ipv4),
186         },
187         [RTE_FLOW_ITEM_TYPE_IPV6] = {
188                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
189                                RTE_FLOW_ITEM_TYPE_TCP),
190                 .actions = valid_actions,
191                 .mask = &(const struct rte_flow_item_ipv6){
192                         .hdr = {
193                                 .src_addr = {
194                                         0xff, 0xff, 0xff, 0xff,
195                                         0xff, 0xff, 0xff, 0xff,
196                                         0xff, 0xff, 0xff, 0xff,
197                                         0xff, 0xff, 0xff, 0xff,
198                                 },
199                                 .dst_addr = {
200                                         0xff, 0xff, 0xff, 0xff,
201                                         0xff, 0xff, 0xff, 0xff,
202                                         0xff, 0xff, 0xff, 0xff,
203                                         0xff, 0xff, 0xff, 0xff,
204                                 },
205                         },
206                 },
207                 .mask_sz = sizeof(struct rte_flow_item_ipv6),
208                 .convert = mlx5_flow_create_ipv6,
209                 .dst_sz = sizeof(struct ibv_exp_flow_spec_ipv6),
210         },
211         [RTE_FLOW_ITEM_TYPE_UDP] = {
212                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN),
213                 .actions = valid_actions,
214                 .mask = &(const struct rte_flow_item_udp){
215                         .hdr = {
216                                 .src_port = -1,
217                                 .dst_port = -1,
218                         },
219                 },
220                 .mask_sz = sizeof(struct rte_flow_item_udp),
221                 .convert = mlx5_flow_create_udp,
222                 .dst_sz = sizeof(struct ibv_exp_flow_spec_tcp_udp),
223         },
224         [RTE_FLOW_ITEM_TYPE_TCP] = {
225                 .actions = valid_actions,
226                 .mask = &(const struct rte_flow_item_tcp){
227                         .hdr = {
228                                 .src_port = -1,
229                                 .dst_port = -1,
230                         },
231                 },
232                 .mask_sz = sizeof(struct rte_flow_item_tcp),
233                 .convert = mlx5_flow_create_tcp,
234                 .dst_sz = sizeof(struct ibv_exp_flow_spec_tcp_udp),
235         },
236         [RTE_FLOW_ITEM_TYPE_VXLAN] = {
237                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
238                 .actions = valid_actions,
239                 .mask = &(const struct rte_flow_item_vxlan){
240                         .vni = "\xff\xff\xff",
241                 },
242                 .mask_sz = sizeof(struct rte_flow_item_vxlan),
243                 .convert = mlx5_flow_create_vxlan,
244                 .dst_sz = sizeof(struct ibv_exp_flow_spec_tunnel),
245         },
246 };
247
248 /** Structure to pass to the conversion function. */
249 struct mlx5_flow {
250         struct ibv_exp_flow_attr *ibv_attr; /**< Verbs attribute. */
251         unsigned int offset; /**< Offset in bytes in the ibv_attr buffer. */
252         uint32_t inner; /**< Set once VXLAN is encountered. */
253 };
254
255 struct mlx5_flow_action {
256         uint32_t queue:1; /**< Target is a receive queue. */
257         uint32_t drop:1; /**< Target is a drop queue. */
258         uint32_t queue_id; /**< Identifier of the queue. */
259 };
260
261 /**
262  * Check support for a given item.
263  *
264  * @param item[in]
265  *   Item specification.
266  * @param mask[in]
267  *   Bit-masks covering supported fields to compare with spec, last and mask in
268  *   \item.
269  * @param size
270  *   Bit-Mask size in bytes.
271  *
272  * @return
273  *   0 on success.
274  */
275 static int
276 mlx5_flow_item_validate(const struct rte_flow_item *item,
277                         const uint8_t *mask, unsigned int size)
278 {
279         int ret = 0;
280
281         if (!item->spec && (item->mask || item->last))
282                 return -1;
283         if (item->spec && !item->mask) {
284                 unsigned int i;
285                 const uint8_t *spec = item->spec;
286
287                 for (i = 0; i < size; ++i)
288                         if ((spec[i] | mask[i]) != mask[i])
289                                 return -1;
290         }
291         if (item->last && !item->mask) {
292                 unsigned int i;
293                 const uint8_t *spec = item->last;
294
295                 for (i = 0; i < size; ++i)
296                         if ((spec[i] | mask[i]) != mask[i])
297                                 return -1;
298         }
299         if (item->mask) {
300                 unsigned int i;
301                 const uint8_t *spec = item->mask;
302
303                 for (i = 0; i < size; ++i)
304                         if ((spec[i] | mask[i]) != mask[i])
305                                 return -1;
306         }
307         if (item->spec && item->last) {
308                 uint8_t spec[size];
309                 uint8_t last[size];
310                 const uint8_t *apply = mask;
311                 unsigned int i;
312
313                 if (item->mask)
314                         apply = item->mask;
315                 for (i = 0; i < size; ++i) {
316                         spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
317                         last[i] = ((const uint8_t *)item->last)[i] & apply[i];
318                 }
319                 ret = memcmp(spec, last, size);
320         }
321         return ret;
322 }
323
324 /**
325  * Validate a flow supported by the NIC.
326  *
327  * @param priv
328  *   Pointer to private structure.
329  * @param[in] attr
330  *   Flow rule attributes.
331  * @param[in] pattern
332  *   Pattern specification (list terminated by the END pattern item).
333  * @param[in] actions
334  *   Associated actions (list terminated by the END action).
335  * @param[out] error
336  *   Perform verbose error reporting if not NULL.
337  * @param[in, out] flow
338  *   Flow structure to update.
339  *
340  * @return
341  *   0 on success, a negative errno value otherwise and rte_errno is set.
342  */
343 static int
344 priv_flow_validate(struct priv *priv,
345                    const struct rte_flow_attr *attr,
346                    const struct rte_flow_item items[],
347                    const struct rte_flow_action actions[],
348                    struct rte_flow_error *error,
349                    struct mlx5_flow *flow)
350 {
351         const struct mlx5_flow_items *cur_item = mlx5_flow_items;
352         struct mlx5_flow_action action = {
353                 .queue = 0,
354                 .drop = 0,
355         };
356
357         (void)priv;
358         if (attr->group) {
359                 rte_flow_error_set(error, ENOTSUP,
360                                    RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
361                                    NULL,
362                                    "groups are not supported");
363                 return -rte_errno;
364         }
365         if (attr->priority) {
366                 rte_flow_error_set(error, ENOTSUP,
367                                    RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
368                                    NULL,
369                                    "priorities are not supported");
370                 return -rte_errno;
371         }
372         if (attr->egress) {
373                 rte_flow_error_set(error, ENOTSUP,
374                                    RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
375                                    NULL,
376                                    "egress is not supported");
377                 return -rte_errno;
378         }
379         if (!attr->ingress) {
380                 rte_flow_error_set(error, ENOTSUP,
381                                    RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
382                                    NULL,
383                                    "only ingress is supported");
384                 return -rte_errno;
385         }
386         for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
387                 const struct mlx5_flow_items *token = NULL;
388                 unsigned int i;
389                 int err;
390
391                 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
392                         continue;
393                 /* Handle special situation for VLAN. */
394                 if (items->type == RTE_FLOW_ITEM_TYPE_VLAN) {
395                         if (((const struct rte_flow_item_vlan *)items)->tci >
396                             ETHER_MAX_VLAN_ID) {
397                                 rte_flow_error_set(error, ENOTSUP,
398                                                    RTE_FLOW_ERROR_TYPE_ITEM,
399                                                    items,
400                                                    "wrong VLAN id value");
401                                 return -rte_errno;
402                         }
403                 }
404                 for (i = 0;
405                      cur_item->items &&
406                      cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
407                      ++i) {
408                         if (cur_item->items[i] == items->type) {
409                                 token = &mlx5_flow_items[items->type];
410                                 break;
411                         }
412                 }
413                 if (!token)
414                         goto exit_item_not_supported;
415                 cur_item = token;
416                 err = mlx5_flow_item_validate(items,
417                                               (const uint8_t *)cur_item->mask,
418                                               sizeof(cur_item->mask_sz));
419                 if (err)
420                         goto exit_item_not_supported;
421                 if (flow->ibv_attr && cur_item->convert) {
422                         err = cur_item->convert(items, cur_item->mask, flow);
423                         if (err)
424                                 goto exit_item_not_supported;
425                 }
426                 flow->offset += cur_item->dst_sz;
427         }
428         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
429                 if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
430                         continue;
431                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
432                         action.drop = 1;
433                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
434                         const struct rte_flow_action_queue *queue =
435                                 (const struct rte_flow_action_queue *)
436                                 actions->conf;
437
438                         if (!queue || (queue->index > (priv->rxqs_n - 1)))
439                                 goto exit_action_not_supported;
440                         action.queue = 1;
441                 } else {
442                         goto exit_action_not_supported;
443                 }
444         }
445         if (!action.queue && !action.drop) {
446                 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
447                                    NULL, "no valid action");
448                 return -rte_errno;
449         }
450         return 0;
451 exit_item_not_supported:
452         rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
453                            items, "item not supported");
454         return -rte_errno;
455 exit_action_not_supported:
456         rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
457                            actions, "action not supported");
458         return -rte_errno;
459 }
460
461 /**
462  * Validate a flow supported by the NIC.
463  *
464  * @see rte_flow_validate()
465  * @see rte_flow_ops
466  */
467 int
468 mlx5_flow_validate(struct rte_eth_dev *dev,
469                    const struct rte_flow_attr *attr,
470                    const struct rte_flow_item items[],
471                    const struct rte_flow_action actions[],
472                    struct rte_flow_error *error)
473 {
474         struct priv *priv = dev->data->dev_private;
475         int ret;
476         struct mlx5_flow flow = { .offset = sizeof(struct ibv_exp_flow_attr) };
477
478         priv_lock(priv);
479         ret = priv_flow_validate(priv, attr, items, actions, error, &flow);
480         priv_unlock(priv);
481         return ret;
482 }
483
484 /**
485  * Convert Ethernet item to Verbs specification.
486  *
487  * @param item[in]
488  *   Item specification.
489  * @param default_mask[in]
490  *   Default bit-masks to use when item->mask is not provided.
491  * @param data[in, out]
492  *   User structure.
493  */
494 static int
495 mlx5_flow_create_eth(const struct rte_flow_item *item,
496                      const void *default_mask,
497                      void *data)
498 {
499         const struct rte_flow_item_eth *spec = item->spec;
500         const struct rte_flow_item_eth *mask = item->mask;
501         struct mlx5_flow *flow = (struct mlx5_flow *)data;
502         struct ibv_exp_flow_spec_eth *eth;
503         const unsigned int eth_size = sizeof(struct ibv_exp_flow_spec_eth);
504         unsigned int i;
505
506         ++flow->ibv_attr->num_of_specs;
507         flow->ibv_attr->priority = 2;
508         eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
509         *eth = (struct ibv_exp_flow_spec_eth) {
510                 .type = flow->inner | IBV_EXP_FLOW_SPEC_ETH,
511                 .size = eth_size,
512         };
513         if (!spec)
514                 return 0;
515         if (!mask)
516                 mask = default_mask;
517         memcpy(eth->val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
518         memcpy(eth->val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
519         memcpy(eth->mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
520         memcpy(eth->mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
521         /* Remove unwanted bits from values. */
522         for (i = 0; i < ETHER_ADDR_LEN; ++i) {
523                 eth->val.dst_mac[i] &= eth->mask.dst_mac[i];
524                 eth->val.src_mac[i] &= eth->mask.src_mac[i];
525         }
526         return 0;
527 }
528
529 /**
530  * Convert VLAN item to Verbs specification.
531  *
532  * @param item[in]
533  *   Item specification.
534  * @param default_mask[in]
535  *   Default bit-masks to use when item->mask is not provided.
536  * @param data[in, out]
537  *   User structure.
538  */
539 static int
540 mlx5_flow_create_vlan(const struct rte_flow_item *item,
541                       const void *default_mask,
542                       void *data)
543 {
544         const struct rte_flow_item_vlan *spec = item->spec;
545         const struct rte_flow_item_vlan *mask = item->mask;
546         struct mlx5_flow *flow = (struct mlx5_flow *)data;
547         struct ibv_exp_flow_spec_eth *eth;
548         const unsigned int eth_size = sizeof(struct ibv_exp_flow_spec_eth);
549
550         eth = (void *)((uintptr_t)flow->ibv_attr + flow->offset - eth_size);
551         if (!spec)
552                 return 0;
553         if (!mask)
554                 mask = default_mask;
555         eth->val.vlan_tag = spec->tci;
556         eth->mask.vlan_tag = mask->tci;
557         eth->val.vlan_tag &= eth->mask.vlan_tag;
558         return 0;
559 }
560
561 /**
562  * Convert IPv4 item to Verbs specification.
563  *
564  * @param item[in]
565  *   Item specification.
566  * @param default_mask[in]
567  *   Default bit-masks to use when item->mask is not provided.
568  * @param data[in, out]
569  *   User structure.
570  */
571 static int
572 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
573                       const void *default_mask,
574                       void *data)
575 {
576         const struct rte_flow_item_ipv4 *spec = item->spec;
577         const struct rte_flow_item_ipv4 *mask = item->mask;
578         struct mlx5_flow *flow = (struct mlx5_flow *)data;
579         struct ibv_exp_flow_spec_ipv4 *ipv4;
580         unsigned int ipv4_size = sizeof(struct ibv_exp_flow_spec_ipv4);
581
582         ++flow->ibv_attr->num_of_specs;
583         flow->ibv_attr->priority = 1;
584         ipv4 = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
585         *ipv4 = (struct ibv_exp_flow_spec_ipv4) {
586                 .type = flow->inner | IBV_EXP_FLOW_SPEC_IPV4,
587                 .size = ipv4_size,
588         };
589         if (!spec)
590                 return 0;
591         if (!mask)
592                 mask = default_mask;
593         ipv4->val = (struct ibv_exp_flow_ipv4_filter){
594                 .src_ip = spec->hdr.src_addr,
595                 .dst_ip = spec->hdr.dst_addr,
596         };
597         ipv4->mask = (struct ibv_exp_flow_ipv4_filter){
598                 .src_ip = mask->hdr.src_addr,
599                 .dst_ip = mask->hdr.dst_addr,
600         };
601         /* Remove unwanted bits from values. */
602         ipv4->val.src_ip &= ipv4->mask.src_ip;
603         ipv4->val.dst_ip &= ipv4->mask.dst_ip;
604         return 0;
605 }
606
607 /**
608  * Convert IPv6 item to Verbs specification.
609  *
610  * @param item[in]
611  *   Item specification.
612  * @param default_mask[in]
613  *   Default bit-masks to use when item->mask is not provided.
614  * @param data[in, out]
615  *   User structure.
616  */
617 static int
618 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
619                       const void *default_mask,
620                       void *data)
621 {
622         const struct rte_flow_item_ipv6 *spec = item->spec;
623         const struct rte_flow_item_ipv6 *mask = item->mask;
624         struct mlx5_flow *flow = (struct mlx5_flow *)data;
625         struct ibv_exp_flow_spec_ipv6 *ipv6;
626         unsigned int ipv6_size = sizeof(struct ibv_exp_flow_spec_ipv6);
627         unsigned int i;
628
629         ++flow->ibv_attr->num_of_specs;
630         flow->ibv_attr->priority = 1;
631         ipv6 = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
632         *ipv6 = (struct ibv_exp_flow_spec_ipv6) {
633                 .type = flow->inner | IBV_EXP_FLOW_SPEC_IPV6,
634                 .size = ipv6_size,
635         };
636         if (!spec)
637                 return 0;
638         if (!mask)
639                 mask = default_mask;
640         memcpy(ipv6->val.src_ip, spec->hdr.src_addr,
641                RTE_DIM(ipv6->val.src_ip));
642         memcpy(ipv6->val.dst_ip, spec->hdr.dst_addr,
643                RTE_DIM(ipv6->val.dst_ip));
644         memcpy(ipv6->mask.src_ip, mask->hdr.src_addr,
645                RTE_DIM(ipv6->mask.src_ip));
646         memcpy(ipv6->mask.dst_ip, mask->hdr.dst_addr,
647                RTE_DIM(ipv6->mask.dst_ip));
648         /* Remove unwanted bits from values. */
649         for (i = 0; i < RTE_DIM(ipv6->val.src_ip); ++i) {
650                 ipv6->val.src_ip[i] &= ipv6->mask.src_ip[i];
651                 ipv6->val.dst_ip[i] &= ipv6->mask.dst_ip[i];
652         }
653         return 0;
654 }
655
656 /**
657  * Convert UDP item to Verbs specification.
658  *
659  * @param item[in]
660  *   Item specification.
661  * @param default_mask[in]
662  *   Default bit-masks to use when item->mask is not provided.
663  * @param data[in, out]
664  *   User structure.
665  */
666 static int
667 mlx5_flow_create_udp(const struct rte_flow_item *item,
668                      const void *default_mask,
669                      void *data)
670 {
671         const struct rte_flow_item_udp *spec = item->spec;
672         const struct rte_flow_item_udp *mask = item->mask;
673         struct mlx5_flow *flow = (struct mlx5_flow *)data;
674         struct ibv_exp_flow_spec_tcp_udp *udp;
675         unsigned int udp_size = sizeof(struct ibv_exp_flow_spec_tcp_udp);
676
677         ++flow->ibv_attr->num_of_specs;
678         flow->ibv_attr->priority = 0;
679         udp = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
680         *udp = (struct ibv_exp_flow_spec_tcp_udp) {
681                 .type = flow->inner | IBV_EXP_FLOW_SPEC_UDP,
682                 .size = udp_size,
683         };
684         if (!spec)
685                 return 0;
686         if (!mask)
687                 mask = default_mask;
688         udp->val.dst_port = spec->hdr.dst_port;
689         udp->val.src_port = spec->hdr.src_port;
690         udp->mask.dst_port = mask->hdr.dst_port;
691         udp->mask.src_port = mask->hdr.src_port;
692         /* Remove unwanted bits from values. */
693         udp->val.src_port &= udp->mask.src_port;
694         udp->val.dst_port &= udp->mask.dst_port;
695         return 0;
696 }
697
698 /**
699  * Convert TCP item to Verbs specification.
700  *
701  * @param item[in]
702  *   Item specification.
703  * @param default_mask[in]
704  *   Default bit-masks to use when item->mask is not provided.
705  * @param data[in, out]
706  *   User structure.
707  */
708 static int
709 mlx5_flow_create_tcp(const struct rte_flow_item *item,
710                      const void *default_mask,
711                      void *data)
712 {
713         const struct rte_flow_item_tcp *spec = item->spec;
714         const struct rte_flow_item_tcp *mask = item->mask;
715         struct mlx5_flow *flow = (struct mlx5_flow *)data;
716         struct ibv_exp_flow_spec_tcp_udp *tcp;
717         unsigned int tcp_size = sizeof(struct ibv_exp_flow_spec_tcp_udp);
718
719         ++flow->ibv_attr->num_of_specs;
720         flow->ibv_attr->priority = 0;
721         tcp = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
722         *tcp = (struct ibv_exp_flow_spec_tcp_udp) {
723                 .type = flow->inner | IBV_EXP_FLOW_SPEC_TCP,
724                 .size = tcp_size,
725         };
726         if (!spec)
727                 return 0;
728         if (!mask)
729                 mask = default_mask;
730         tcp->val.dst_port = spec->hdr.dst_port;
731         tcp->val.src_port = spec->hdr.src_port;
732         tcp->mask.dst_port = mask->hdr.dst_port;
733         tcp->mask.src_port = mask->hdr.src_port;
734         /* Remove unwanted bits from values. */
735         tcp->val.src_port &= tcp->mask.src_port;
736         tcp->val.dst_port &= tcp->mask.dst_port;
737         return 0;
738 }
739
740 /**
741  * Convert VXLAN item to Verbs specification.
742  *
743  * @param item[in]
744  *   Item specification.
745  * @param default_mask[in]
746  *   Default bit-masks to use when item->mask is not provided.
747  * @param data[in, out]
748  *   User structure.
749  */
750 static int
751 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
752                        const void *default_mask,
753                        void *data)
754 {
755         const struct rte_flow_item_vxlan *spec = item->spec;
756         const struct rte_flow_item_vxlan *mask = item->mask;
757         struct mlx5_flow *flow = (struct mlx5_flow *)data;
758         struct ibv_exp_flow_spec_tunnel *vxlan;
759         unsigned int size = sizeof(struct ibv_exp_flow_spec_tunnel);
760         union vni {
761                 uint32_t vlan_id;
762                 uint8_t vni[4];
763         } id;
764
765         ++flow->ibv_attr->num_of_specs;
766         flow->ibv_attr->priority = 0;
767         id.vni[0] = 0;
768         vxlan = (void *)((uintptr_t)flow->ibv_attr + flow->offset);
769         *vxlan = (struct ibv_exp_flow_spec_tunnel) {
770                 .type = flow->inner | IBV_EXP_FLOW_SPEC_VXLAN_TUNNEL,
771                 .size = size,
772         };
773         flow->inner = IBV_EXP_FLOW_SPEC_INNER;
774         if (!spec)
775                 return 0;
776         if (!mask)
777                 mask = default_mask;
778         memcpy(&id.vni[1], spec->vni, 3);
779         vxlan->val.tunnel_id = id.vlan_id;
780         memcpy(&id.vni[1], mask->vni, 3);
781         vxlan->mask.tunnel_id = id.vlan_id;
782         /* Remove unwanted bits from values. */
783         vxlan->val.tunnel_id &= vxlan->mask.tunnel_id;
784         return 0;
785 }
786
787 /**
788  * Complete flow rule creation.
789  *
790  * @param priv
791  *   Pointer to private structure.
792  * @param ibv_attr
793  *   Verbs flow attributes.
794  * @param action
795  *   Target action structure.
796  * @param[out] error
797  *   Perform verbose error reporting if not NULL.
798  *
799  * @return
800  *   A flow if the rule could be created.
801  */
802 static struct rte_flow *
803 priv_flow_create_action_queue(struct priv *priv,
804                               struct ibv_exp_flow_attr *ibv_attr,
805                               struct mlx5_flow_action *action,
806                               struct rte_flow_error *error)
807 {
808         struct rxq_ctrl *rxq;
809         struct rte_flow *rte_flow;
810
811         assert(priv->pd);
812         assert(priv->ctx);
813         rte_flow = rte_calloc(__func__, 1, sizeof(*rte_flow), 0);
814         if (!rte_flow) {
815                 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
816                                    NULL, "cannot allocate flow memory");
817                 return NULL;
818         }
819         if (action->drop) {
820                 rte_flow->cq =
821                         ibv_exp_create_cq(priv->ctx, 1, NULL, NULL, 0,
822                                           &(struct ibv_exp_cq_init_attr){
823                                                   .comp_mask = 0,
824                                           });
825                 if (!rte_flow->cq) {
826                         rte_flow_error_set(error, ENOMEM,
827                                            RTE_FLOW_ERROR_TYPE_HANDLE,
828                                            NULL, "cannot allocate CQ");
829                         goto error;
830                 }
831                 rte_flow->wq = ibv_exp_create_wq(priv->ctx,
832                                                  &(struct ibv_exp_wq_init_attr){
833                                                  .wq_type = IBV_EXP_WQT_RQ,
834                                                  .max_recv_wr = 1,
835                                                  .max_recv_sge = 1,
836                                                  .pd = priv->pd,
837                                                  .cq = rte_flow->cq,
838                                                  });
839         } else {
840                 rxq = container_of((*priv->rxqs)[action->queue_id],
841                                    struct rxq_ctrl, rxq);
842                 rte_flow->rxq = &rxq->rxq;
843                 rte_flow->wq = rxq->wq;
844         }
845         rte_flow->ibv_attr = ibv_attr;
846         rte_flow->ind_table = ibv_exp_create_rwq_ind_table(
847                 priv->ctx,
848                 &(struct ibv_exp_rwq_ind_table_init_attr){
849                         .pd = priv->pd,
850                         .log_ind_tbl_size = 0,
851                         .ind_tbl = &rte_flow->wq,
852                         .comp_mask = 0,
853                 });
854         if (!rte_flow->ind_table) {
855                 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
856                                    NULL, "cannot allocate indirection table");
857                 goto error;
858         }
859         rte_flow->qp = ibv_exp_create_qp(
860                 priv->ctx,
861                 &(struct ibv_exp_qp_init_attr){
862                         .qp_type = IBV_QPT_RAW_PACKET,
863                         .comp_mask =
864                                 IBV_EXP_QP_INIT_ATTR_PD |
865                                 IBV_EXP_QP_INIT_ATTR_PORT |
866                                 IBV_EXP_QP_INIT_ATTR_RX_HASH,
867                         .pd = priv->pd,
868                         .rx_hash_conf = &(struct ibv_exp_rx_hash_conf){
869                                 .rx_hash_function =
870                                         IBV_EXP_RX_HASH_FUNC_TOEPLITZ,
871                                 .rx_hash_key_len = rss_hash_default_key_len,
872                                 .rx_hash_key = rss_hash_default_key,
873                                 .rx_hash_fields_mask = 0,
874                                 .rwq_ind_tbl = rte_flow->ind_table,
875                         },
876                         .port_num = priv->port,
877                 });
878         if (!rte_flow->qp) {
879                 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
880                                    NULL, "cannot allocate QP");
881                 goto error;
882         }
883         rte_flow->ibv_flow = ibv_exp_create_flow(rte_flow->qp,
884                                                  rte_flow->ibv_attr);
885         if (!rte_flow->ibv_flow) {
886                 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
887                                    NULL, "flow rule creation failure");
888                 goto error;
889         }
890         return rte_flow;
891 error:
892         assert(rte_flow);
893         if (rte_flow->qp)
894                 ibv_destroy_qp(rte_flow->qp);
895         if (rte_flow->ind_table)
896                 ibv_exp_destroy_rwq_ind_table(rte_flow->ind_table);
897         if (!rte_flow->rxq && rte_flow->wq)
898                 ibv_exp_destroy_wq(rte_flow->wq);
899         if (!rte_flow->rxq && rte_flow->cq)
900                 ibv_destroy_cq(rte_flow->cq);
901         rte_free(rte_flow->ibv_attr);
902         rte_free(rte_flow);
903         return NULL;
904 }
905
906 /**
907  * Convert a flow.
908  *
909  * @param priv
910  *   Pointer to private structure.
911  * @param[in] attr
912  *   Flow rule attributes.
913  * @param[in] pattern
914  *   Pattern specification (list terminated by the END pattern item).
915  * @param[in] actions
916  *   Associated actions (list terminated by the END action).
917  * @param[out] error
918  *   Perform verbose error reporting if not NULL.
919  *
920  * @return
921  *   A flow on success, NULL otherwise.
922  */
923 static struct rte_flow *
924 priv_flow_create(struct priv *priv,
925                  const struct rte_flow_attr *attr,
926                  const struct rte_flow_item items[],
927                  const struct rte_flow_action actions[],
928                  struct rte_flow_error *error)
929 {
930         struct rte_flow *rte_flow;
931         struct mlx5_flow_action action;
932         struct mlx5_flow flow = { .offset = sizeof(struct ibv_exp_flow_attr), };
933         int err;
934
935         err = priv_flow_validate(priv, attr, items, actions, error, &flow);
936         if (err)
937                 goto exit;
938         flow.ibv_attr = rte_malloc(__func__, flow.offset, 0);
939         flow.offset = sizeof(struct ibv_exp_flow_attr);
940         if (!flow.ibv_attr) {
941                 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
942                                    NULL, "cannot allocate ibv_attr memory");
943                 goto exit;
944         }
945         *flow.ibv_attr = (struct ibv_exp_flow_attr){
946                 .type = IBV_EXP_FLOW_ATTR_NORMAL,
947                 .size = sizeof(struct ibv_exp_flow_attr),
948                 .priority = attr->priority,
949                 .num_of_specs = 0,
950                 .port = 0,
951                 .flags = 0,
952                 .reserved = 0,
953         };
954         flow.inner = 0;
955         claim_zero(priv_flow_validate(priv, attr, items, actions,
956                                       error, &flow));
957         action = (struct mlx5_flow_action){
958                 .queue = 0,
959                 .drop = 0,
960         };
961         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
962                 if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
963                         continue;
964                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
965                         action.queue = 1;
966                         action.queue_id =
967                                 ((const struct rte_flow_action_queue *)
968                                  actions->conf)->index;
969                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
970                         action.drop = 1;
971                 } else {
972                         rte_flow_error_set(error, ENOTSUP,
973                                            RTE_FLOW_ERROR_TYPE_ACTION,
974                                            actions, "unsupported action");
975                         goto exit;
976                 }
977         }
978         rte_flow = priv_flow_create_action_queue(priv, flow.ibv_attr,
979                                                  &action, error);
980         return rte_flow;
981 exit:
982         rte_free(flow.ibv_attr);
983         return NULL;
984 }
985
986 /**
987  * Create a flow.
988  *
989  * @see rte_flow_create()
990  * @see rte_flow_ops
991  */
992 struct rte_flow *
993 mlx5_flow_create(struct rte_eth_dev *dev,
994                  const struct rte_flow_attr *attr,
995                  const struct rte_flow_item items[],
996                  const struct rte_flow_action actions[],
997                  struct rte_flow_error *error)
998 {
999         struct priv *priv = dev->data->dev_private;
1000         struct rte_flow *flow;
1001
1002         priv_lock(priv);
1003         flow = priv_flow_create(priv, attr, items, actions, error);
1004         if (flow) {
1005                 LIST_INSERT_HEAD(&priv->flows, flow, next);
1006                 DEBUG("Flow created %p", (void *)flow);
1007         }
1008         priv_unlock(priv);
1009         return flow;
1010 }
1011
1012 /**
1013  * Destroy a flow.
1014  *
1015  * @param priv
1016  *   Pointer to private structure.
1017  * @param[in] flow
1018  *   Flow to destroy.
1019  */
1020 static void
1021 priv_flow_destroy(struct priv *priv,
1022                   struct rte_flow *flow)
1023 {
1024         (void)priv;
1025         LIST_REMOVE(flow, next);
1026         if (flow->ibv_flow)
1027                 claim_zero(ibv_exp_destroy_flow(flow->ibv_flow));
1028         if (flow->qp)
1029                 claim_zero(ibv_destroy_qp(flow->qp));
1030         if (flow->ind_table)
1031                 claim_zero(ibv_exp_destroy_rwq_ind_table(flow->ind_table));
1032         if (!flow->rxq && flow->wq)
1033                 claim_zero(ibv_exp_destroy_wq(flow->wq));
1034         if (!flow->rxq && flow->cq)
1035                 claim_zero(ibv_destroy_cq(flow->cq));
1036         rte_free(flow->ibv_attr);
1037         DEBUG("Flow destroyed %p", (void *)flow);
1038         rte_free(flow);
1039 }
1040
1041 /**
1042  * Destroy a flow.
1043  *
1044  * @see rte_flow_destroy()
1045  * @see rte_flow_ops
1046  */
1047 int
1048 mlx5_flow_destroy(struct rte_eth_dev *dev,
1049                   struct rte_flow *flow,
1050                   struct rte_flow_error *error)
1051 {
1052         struct priv *priv = dev->data->dev_private;
1053
1054         (void)error;
1055         priv_lock(priv);
1056         priv_flow_destroy(priv, flow);
1057         priv_unlock(priv);
1058         return 0;
1059 }
1060
1061 /**
1062  * Destroy all flows.
1063  *
1064  * @param priv
1065  *   Pointer to private structure.
1066  */
1067 static void
1068 priv_flow_flush(struct priv *priv)
1069 {
1070         while (!LIST_EMPTY(&priv->flows)) {
1071                 struct rte_flow *flow;
1072
1073                 flow = LIST_FIRST(&priv->flows);
1074                 priv_flow_destroy(priv, flow);
1075         }
1076 }
1077
1078 /**
1079  * Destroy all flows.
1080  *
1081  * @see rte_flow_flush()
1082  * @see rte_flow_ops
1083  */
1084 int
1085 mlx5_flow_flush(struct rte_eth_dev *dev,
1086                 struct rte_flow_error *error)
1087 {
1088         struct priv *priv = dev->data->dev_private;
1089
1090         (void)error;
1091         priv_lock(priv);
1092         priv_flow_flush(priv);
1093         priv_unlock(priv);
1094         return 0;
1095 }
1096
1097 /**
1098  * Remove all flows.
1099  *
1100  * Called by dev_stop() to remove all flows.
1101  *
1102  * @param priv
1103  *   Pointer to private structure.
1104  */
1105 void
1106 priv_flow_stop(struct priv *priv)
1107 {
1108         struct rte_flow *flow;
1109
1110         for (flow = LIST_FIRST(&priv->flows);
1111              flow;
1112              flow = LIST_NEXT(flow, next)) {
1113                 claim_zero(ibv_exp_destroy_flow(flow->ibv_flow));
1114                 flow->ibv_flow = NULL;
1115                 DEBUG("Flow %p removed", (void *)flow);
1116         }
1117 }
1118
1119 /**
1120  * Add all flows.
1121  *
1122  * @param priv
1123  *   Pointer to private structure.
1124  *
1125  * @return
1126  *   0 on success, a errno value otherwise and rte_errno is set.
1127  */
1128 int
1129 priv_flow_start(struct priv *priv)
1130 {
1131         struct rte_flow *flow;
1132
1133         for (flow = LIST_FIRST(&priv->flows);
1134              flow;
1135              flow = LIST_NEXT(flow, next)) {
1136                 flow->ibv_flow = ibv_exp_create_flow(flow->qp,
1137                                                      flow->ibv_attr);
1138                 if (!flow->ibv_flow) {
1139                         DEBUG("Flow %p cannot be applied", (void *)flow);
1140                         rte_errno = EINVAL;
1141                         return rte_errno;
1142                 }
1143                 DEBUG("Flow %p applied", (void *)flow);
1144         }
1145         return 0;
1146 }