net/mlx5: add new operations for isolated mode
[dpdk.git] / drivers / net / mlx5 / mlx5_flow.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright 2016 6WIND S.A.
5  *   Copyright 2016 Mellanox.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of 6WIND S.A. nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <sys/queue.h>
35 #include <string.h>
36
37 /* Verbs header. */
38 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
39 #ifdef PEDANTIC
40 #pragma GCC diagnostic ignored "-Wpedantic"
41 #endif
42 #include <infiniband/verbs.h>
43 #ifdef PEDANTIC
44 #pragma GCC diagnostic error "-Wpedantic"
45 #endif
46
47 #include <rte_ethdev.h>
48 #include <rte_flow.h>
49 #include <rte_flow_driver.h>
50 #include <rte_malloc.h>
51
52 #include "mlx5.h"
53 #include "mlx5_prm.h"
54
55 /* Define minimal priority for control plane flows. */
56 #define MLX5_CTRL_FLOW_PRIORITY 4
57
58 /* Internet Protocol versions. */
59 #define MLX5_IPV4 4
60 #define MLX5_IPV6 6
61
62 /* Dev ops structure defined in mlx5.c */
63 extern const struct eth_dev_ops mlx5_dev_ops;
64 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
65
66 static int
67 mlx5_flow_create_eth(const struct rte_flow_item *item,
68                      const void *default_mask,
69                      void *data);
70
71 static int
72 mlx5_flow_create_vlan(const struct rte_flow_item *item,
73                       const void *default_mask,
74                       void *data);
75
76 static int
77 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
78                       const void *default_mask,
79                       void *data);
80
81 static int
82 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
83                       const void *default_mask,
84                       void *data);
85
86 static int
87 mlx5_flow_create_udp(const struct rte_flow_item *item,
88                      const void *default_mask,
89                      void *data);
90
91 static int
92 mlx5_flow_create_tcp(const struct rte_flow_item *item,
93                      const void *default_mask,
94                      void *data);
95
96 static int
97 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
98                        const void *default_mask,
99                        void *data);
100
101 struct mlx5_flow_parse;
102
103 static void
104 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
105                       unsigned int size);
106
107 static int
108 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id);
109
110 /* Hash RX queue types. */
111 enum hash_rxq_type {
112         HASH_RXQ_TCPV4,
113         HASH_RXQ_UDPV4,
114         HASH_RXQ_IPV4,
115         HASH_RXQ_TCPV6,
116         HASH_RXQ_UDPV6,
117         HASH_RXQ_IPV6,
118         HASH_RXQ_ETH,
119 };
120
121 /* Initialization data for hash RX queue. */
122 struct hash_rxq_init {
123         uint64_t hash_fields; /* Fields that participate in the hash. */
124         uint64_t dpdk_rss_hf; /* Matching DPDK RSS hash fields. */
125         unsigned int flow_priority; /* Flow priority to use. */
126         unsigned int ip_version; /* Internet protocol. */
127 };
128
129 /* Initialization data for hash RX queues. */
130 const struct hash_rxq_init hash_rxq_init[] = {
131         [HASH_RXQ_TCPV4] = {
132                 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
133                                 IBV_RX_HASH_DST_IPV4 |
134                                 IBV_RX_HASH_SRC_PORT_TCP |
135                                 IBV_RX_HASH_DST_PORT_TCP),
136                 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
137                 .flow_priority = 0,
138                 .ip_version = MLX5_IPV4,
139         },
140         [HASH_RXQ_UDPV4] = {
141                 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
142                                 IBV_RX_HASH_DST_IPV4 |
143                                 IBV_RX_HASH_SRC_PORT_UDP |
144                                 IBV_RX_HASH_DST_PORT_UDP),
145                 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
146                 .flow_priority = 0,
147                 .ip_version = MLX5_IPV4,
148         },
149         [HASH_RXQ_IPV4] = {
150                 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
151                                 IBV_RX_HASH_DST_IPV4),
152                 .dpdk_rss_hf = (ETH_RSS_IPV4 |
153                                 ETH_RSS_FRAG_IPV4),
154                 .flow_priority = 1,
155                 .ip_version = MLX5_IPV4,
156         },
157         [HASH_RXQ_TCPV6] = {
158                 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
159                                 IBV_RX_HASH_DST_IPV6 |
160                                 IBV_RX_HASH_SRC_PORT_TCP |
161                                 IBV_RX_HASH_DST_PORT_TCP),
162                 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
163                 .flow_priority = 0,
164                 .ip_version = MLX5_IPV6,
165         },
166         [HASH_RXQ_UDPV6] = {
167                 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
168                                 IBV_RX_HASH_DST_IPV6 |
169                                 IBV_RX_HASH_SRC_PORT_UDP |
170                                 IBV_RX_HASH_DST_PORT_UDP),
171                 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
172                 .flow_priority = 0,
173                 .ip_version = MLX5_IPV6,
174         },
175         [HASH_RXQ_IPV6] = {
176                 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
177                                 IBV_RX_HASH_DST_IPV6),
178                 .dpdk_rss_hf = (ETH_RSS_IPV6 |
179                                 ETH_RSS_FRAG_IPV6),
180                 .flow_priority = 1,
181                 .ip_version = MLX5_IPV6,
182         },
183         [HASH_RXQ_ETH] = {
184                 .hash_fields = 0,
185                 .dpdk_rss_hf = 0,
186                 .flow_priority = 2,
187         },
188 };
189
190 /* Number of entries in hash_rxq_init[]. */
191 const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init);
192
193 /** Structure for Drop queue. */
194 struct mlx5_hrxq_drop {
195         struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
196         struct ibv_qp *qp; /**< Verbs queue pair. */
197         struct ibv_wq *wq; /**< Verbs work queue. */
198         struct ibv_cq *cq; /**< Verbs completion queue. */
199 };
200
201 /* Flows structures. */
202 struct mlx5_flow {
203         uint64_t hash_fields; /**< Fields that participate in the hash. */
204         struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
205         struct ibv_flow *ibv_flow; /**< Verbs flow. */
206         struct mlx5_hrxq *hrxq; /**< Hash Rx queues. */
207 };
208
209 /* Drop flows structures. */
210 struct mlx5_flow_drop {
211         struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
212         struct ibv_flow *ibv_flow; /**< Verbs flow. */
213 };
214
215 struct rte_flow {
216         TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
217         uint32_t mark:1; /**< Set if the flow is marked. */
218         uint32_t drop:1; /**< Drop queue. */
219         uint16_t queues_n; /**< Number of entries in queue[]. */
220         uint16_t (*queues)[]; /**< Queues indexes to use. */
221         struct rte_eth_rss_conf rss_conf; /**< RSS configuration */
222         uint8_t rss_key[40]; /**< copy of the RSS key. */
223         union {
224                 struct mlx5_flow frxq[RTE_DIM(hash_rxq_init)];
225                 /**< Flow with Rx queue. */
226                 struct mlx5_flow_drop drxq; /**< Flow with drop Rx queue. */
227         };
228 };
229
230 /** Static initializer for items. */
231 #define ITEMS(...) \
232         (const enum rte_flow_item_type []){ \
233                 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
234         }
235
236 /** Structure to generate a simple graph of layers supported by the NIC. */
237 struct mlx5_flow_items {
238         /** List of possible actions for these items. */
239         const enum rte_flow_action_type *const actions;
240         /** Bit-masks corresponding to the possibilities for the item. */
241         const void *mask;
242         /**
243          * Default bit-masks to use when item->mask is not provided. When
244          * \default_mask is also NULL, the full supported bit-mask (\mask) is
245          * used instead.
246          */
247         const void *default_mask;
248         /** Bit-masks size in bytes. */
249         const unsigned int mask_sz;
250         /**
251          * Conversion function from rte_flow to NIC specific flow.
252          *
253          * @param item
254          *   rte_flow item to convert.
255          * @param default_mask
256          *   Default bit-masks to use when item->mask is not provided.
257          * @param data
258          *   Internal structure to store the conversion.
259          *
260          * @return
261          *   0 on success, negative value otherwise.
262          */
263         int (*convert)(const struct rte_flow_item *item,
264                        const void *default_mask,
265                        void *data);
266         /** Size in bytes of the destination structure. */
267         const unsigned int dst_sz;
268         /** List of possible following items.  */
269         const enum rte_flow_item_type *const items;
270 };
271
272 /** Valid action for this PMD. */
273 static const enum rte_flow_action_type valid_actions[] = {
274         RTE_FLOW_ACTION_TYPE_DROP,
275         RTE_FLOW_ACTION_TYPE_QUEUE,
276         RTE_FLOW_ACTION_TYPE_MARK,
277         RTE_FLOW_ACTION_TYPE_FLAG,
278         RTE_FLOW_ACTION_TYPE_END,
279 };
280
281 /** Graph of supported items and associated actions. */
282 static const struct mlx5_flow_items mlx5_flow_items[] = {
283         [RTE_FLOW_ITEM_TYPE_END] = {
284                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
285                                RTE_FLOW_ITEM_TYPE_VXLAN),
286         },
287         [RTE_FLOW_ITEM_TYPE_ETH] = {
288                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
289                                RTE_FLOW_ITEM_TYPE_IPV4,
290                                RTE_FLOW_ITEM_TYPE_IPV6),
291                 .actions = valid_actions,
292                 .mask = &(const struct rte_flow_item_eth){
293                         .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
294                         .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
295                         .type = -1,
296                 },
297                 .default_mask = &rte_flow_item_eth_mask,
298                 .mask_sz = sizeof(struct rte_flow_item_eth),
299                 .convert = mlx5_flow_create_eth,
300                 .dst_sz = sizeof(struct ibv_flow_spec_eth),
301         },
302         [RTE_FLOW_ITEM_TYPE_VLAN] = {
303                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
304                                RTE_FLOW_ITEM_TYPE_IPV6),
305                 .actions = valid_actions,
306                 .mask = &(const struct rte_flow_item_vlan){
307                         .tci = -1,
308                 },
309                 .default_mask = &rte_flow_item_vlan_mask,
310                 .mask_sz = sizeof(struct rte_flow_item_vlan),
311                 .convert = mlx5_flow_create_vlan,
312                 .dst_sz = 0,
313         },
314         [RTE_FLOW_ITEM_TYPE_IPV4] = {
315                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
316                                RTE_FLOW_ITEM_TYPE_TCP),
317                 .actions = valid_actions,
318                 .mask = &(const struct rte_flow_item_ipv4){
319                         .hdr = {
320                                 .src_addr = -1,
321                                 .dst_addr = -1,
322                                 .type_of_service = -1,
323                                 .next_proto_id = -1,
324                         },
325                 },
326                 .default_mask = &rte_flow_item_ipv4_mask,
327                 .mask_sz = sizeof(struct rte_flow_item_ipv4),
328                 .convert = mlx5_flow_create_ipv4,
329                 .dst_sz = sizeof(struct ibv_flow_spec_ipv4_ext),
330         },
331         [RTE_FLOW_ITEM_TYPE_IPV6] = {
332                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
333                                RTE_FLOW_ITEM_TYPE_TCP),
334                 .actions = valid_actions,
335                 .mask = &(const struct rte_flow_item_ipv6){
336                         .hdr = {
337                                 .src_addr = {
338                                         0xff, 0xff, 0xff, 0xff,
339                                         0xff, 0xff, 0xff, 0xff,
340                                         0xff, 0xff, 0xff, 0xff,
341                                         0xff, 0xff, 0xff, 0xff,
342                                 },
343                                 .dst_addr = {
344                                         0xff, 0xff, 0xff, 0xff,
345                                         0xff, 0xff, 0xff, 0xff,
346                                         0xff, 0xff, 0xff, 0xff,
347                                         0xff, 0xff, 0xff, 0xff,
348                                 },
349                                 .vtc_flow = -1,
350                                 .proto = -1,
351                                 .hop_limits = -1,
352                         },
353                 },
354                 .default_mask = &rte_flow_item_ipv6_mask,
355                 .mask_sz = sizeof(struct rte_flow_item_ipv6),
356                 .convert = mlx5_flow_create_ipv6,
357                 .dst_sz = sizeof(struct ibv_flow_spec_ipv6),
358         },
359         [RTE_FLOW_ITEM_TYPE_UDP] = {
360                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN),
361                 .actions = valid_actions,
362                 .mask = &(const struct rte_flow_item_udp){
363                         .hdr = {
364                                 .src_port = -1,
365                                 .dst_port = -1,
366                         },
367                 },
368                 .default_mask = &rte_flow_item_udp_mask,
369                 .mask_sz = sizeof(struct rte_flow_item_udp),
370                 .convert = mlx5_flow_create_udp,
371                 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
372         },
373         [RTE_FLOW_ITEM_TYPE_TCP] = {
374                 .actions = valid_actions,
375                 .mask = &(const struct rte_flow_item_tcp){
376                         .hdr = {
377                                 .src_port = -1,
378                                 .dst_port = -1,
379                         },
380                 },
381                 .default_mask = &rte_flow_item_tcp_mask,
382                 .mask_sz = sizeof(struct rte_flow_item_tcp),
383                 .convert = mlx5_flow_create_tcp,
384                 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
385         },
386         [RTE_FLOW_ITEM_TYPE_VXLAN] = {
387                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
388                 .actions = valid_actions,
389                 .mask = &(const struct rte_flow_item_vxlan){
390                         .vni = "\xff\xff\xff",
391                 },
392                 .default_mask = &rte_flow_item_vxlan_mask,
393                 .mask_sz = sizeof(struct rte_flow_item_vxlan),
394                 .convert = mlx5_flow_create_vxlan,
395                 .dst_sz = sizeof(struct ibv_flow_spec_tunnel),
396         },
397 };
398
399 /** Structure to pass to the conversion function. */
400 struct mlx5_flow_parse {
401         uint32_t inner; /**< Set once VXLAN is encountered. */
402         uint32_t create:1;
403         /**< Whether resources should remain after a validate. */
404         uint32_t drop:1; /**< Target is a drop queue. */
405         uint32_t mark:1; /**< Mark is present in the flow. */
406         uint32_t mark_id; /**< Mark identifier. */
407         uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
408         uint16_t queues_n; /**< Number of entries in queue[]. */
409         struct rte_eth_rss_conf rss_conf; /**< RSS configuration */
410         uint8_t rss_key[40]; /**< copy of the RSS key. */
411         enum hash_rxq_type layer; /**< Last pattern layer detected. */
412         union {
413                 struct {
414                         struct ibv_flow_attr *ibv_attr;
415                         /**< Pointer to Verbs attributes. */
416                         unsigned int offset;
417                         /**< Current position or total size of the attribute. */
418                 } queue[RTE_DIM(hash_rxq_init)];
419                 struct {
420                         struct ibv_flow_attr *ibv_attr;
421                         /**< Pointer to Verbs attributes. */
422                         unsigned int offset;
423                         /**< Current position or total size of the attribute. */
424                 } drop_q;
425         };
426 };
427
428 static const struct rte_flow_ops mlx5_flow_ops = {
429         .validate = mlx5_flow_validate,
430         .create = mlx5_flow_create,
431         .destroy = mlx5_flow_destroy,
432         .flush = mlx5_flow_flush,
433         .query = NULL,
434         .isolate = mlx5_flow_isolate,
435 };
436
437 /* Convert FDIR request to Generic flow. */
438 struct mlx5_fdir {
439         struct rte_flow_attr attr;
440         struct rte_flow_action actions[2];
441         struct rte_flow_item items[4];
442         struct rte_flow_item_eth l2;
443         union {
444                 struct rte_flow_item_ipv4 ipv4;
445                 struct rte_flow_item_ipv6 ipv6;
446         } l3;
447         union {
448                 struct rte_flow_item_udp udp;
449                 struct rte_flow_item_tcp tcp;
450         } l4;
451         struct rte_flow_action_queue queue;
452 };
453
454 /* Verbs specification header. */
455 struct ibv_spec_header {
456         enum ibv_flow_spec_type type;
457         uint16_t size;
458 };
459
460 /**
461  * Check support for a given item.
462  *
463  * @param item[in]
464  *   Item specification.
465  * @param mask[in]
466  *   Bit-masks covering supported fields to compare with spec, last and mask in
467  *   \item.
468  * @param size
469  *   Bit-Mask size in bytes.
470  *
471  * @return
472  *   0 on success.
473  */
474 static int
475 mlx5_flow_item_validate(const struct rte_flow_item *item,
476                         const uint8_t *mask, unsigned int size)
477 {
478         int ret = 0;
479
480         if (!item->spec && (item->mask || item->last))
481                 return -1;
482         if (item->spec && !item->mask) {
483                 unsigned int i;
484                 const uint8_t *spec = item->spec;
485
486                 for (i = 0; i < size; ++i)
487                         if ((spec[i] | mask[i]) != mask[i])
488                                 return -1;
489         }
490         if (item->last && !item->mask) {
491                 unsigned int i;
492                 const uint8_t *spec = item->last;
493
494                 for (i = 0; i < size; ++i)
495                         if ((spec[i] | mask[i]) != mask[i])
496                                 return -1;
497         }
498         if (item->mask) {
499                 unsigned int i;
500                 const uint8_t *spec = item->mask;
501
502                 for (i = 0; i < size; ++i)
503                         if ((spec[i] | mask[i]) != mask[i])
504                                 return -1;
505         }
506         if (item->spec && item->last) {
507                 uint8_t spec[size];
508                 uint8_t last[size];
509                 const uint8_t *apply = mask;
510                 unsigned int i;
511
512                 if (item->mask)
513                         apply = item->mask;
514                 for (i = 0; i < size; ++i) {
515                         spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
516                         last[i] = ((const uint8_t *)item->last)[i] & apply[i];
517                 }
518                 ret = memcmp(spec, last, size);
519         }
520         return ret;
521 }
522
523 /**
524  * Copy the RSS configuration from the user ones.
525  *
526  * @param priv
527  *   Pointer to private structure.
528  * @param parser
529  *   Internal parser structure.
530  * @param rss_conf
531  *   User RSS configuration to save.
532  *
533  * @return
534  *   0 on success, errno value on failure.
535  */
536 static int
537 priv_flow_convert_rss_conf(struct priv *priv,
538                            struct mlx5_flow_parse *parser,
539                            const struct rte_eth_rss_conf *rss_conf)
540 {
541         const struct rte_eth_rss_conf *rss =
542                 rss_conf ? rss_conf : &priv->rss_conf;
543
544         if (rss->rss_key_len > 40)
545                 return EINVAL;
546         parser->rss_conf.rss_key_len = rss->rss_key_len;
547         parser->rss_conf.rss_hf = rss->rss_hf;
548         memcpy(parser->rss_key, rss->rss_key, rss->rss_key_len);
549         parser->rss_conf.rss_key = parser->rss_key;
550         return 0;
551 }
552
553 /**
554  * Extract attribute to the parser.
555  *
556  * @param priv
557  *   Pointer to private structure.
558  * @param[in] attr
559  *   Flow rule attributes.
560  * @param[out] error
561  *   Perform verbose error reporting if not NULL.
562  * @param[in, out] parser
563  *   Internal parser structure.
564  *
565  * @return
566  *   0 on success, a negative errno value otherwise and rte_errno is set.
567  */
568 static int
569 priv_flow_convert_attributes(struct priv *priv,
570                              const struct rte_flow_attr *attr,
571                              struct rte_flow_error *error,
572                              struct mlx5_flow_parse *parser)
573 {
574         (void)priv;
575         (void)parser;
576         if (attr->group) {
577                 rte_flow_error_set(error, ENOTSUP,
578                                    RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
579                                    NULL,
580                                    "groups are not supported");
581                 return -rte_errno;
582         }
583         if (attr->priority && attr->priority != MLX5_CTRL_FLOW_PRIORITY) {
584                 rte_flow_error_set(error, ENOTSUP,
585                                    RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
586                                    NULL,
587                                    "priorities are not supported");
588                 return -rte_errno;
589         }
590         if (attr->egress) {
591                 rte_flow_error_set(error, ENOTSUP,
592                                    RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
593                                    NULL,
594                                    "egress is not supported");
595                 return -rte_errno;
596         }
597         if (!attr->ingress) {
598                 rte_flow_error_set(error, ENOTSUP,
599                                    RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
600                                    NULL,
601                                    "only ingress is supported");
602                 return -rte_errno;
603         }
604         return 0;
605 }
606
607 /**
608  * Extract actions request to the parser.
609  *
610  * @param priv
611  *   Pointer to private structure.
612  * @param[in] actions
613  *   Associated actions (list terminated by the END action).
614  * @param[out] error
615  *   Perform verbose error reporting if not NULL.
616  * @param[in, out] parser
617  *   Internal parser structure.
618  *
619  * @return
620  *   0 on success, a negative errno value otherwise and rte_errno is set.
621  */
622 static int
623 priv_flow_convert_actions(struct priv *priv,
624                           const struct rte_flow_action actions[],
625                           struct rte_flow_error *error,
626                           struct mlx5_flow_parse *parser)
627 {
628         /*
629          * Add default RSS configuration necessary for Verbs to create QP even
630          * if no RSS is necessary.
631          */
632         priv_flow_convert_rss_conf(priv, parser,
633                                    (const struct rte_eth_rss_conf *)
634                                    &priv->rss_conf);
635         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
636                 if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
637                         continue;
638                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
639                         parser->drop = 1;
640                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
641                         const struct rte_flow_action_queue *queue =
642                                 (const struct rte_flow_action_queue *)
643                                 actions->conf;
644                         uint16_t n;
645                         uint16_t found = 0;
646
647                         if (!queue || (queue->index > (priv->rxqs_n - 1)))
648                                 goto exit_action_not_supported;
649                         for (n = 0; n < parser->queues_n; ++n) {
650                                 if (parser->queues[n] == queue->index) {
651                                         found = 1;
652                                         break;
653                                 }
654                         }
655                         if (parser->queues_n > 1 && !found) {
656                                 rte_flow_error_set(error, ENOTSUP,
657                                            RTE_FLOW_ERROR_TYPE_ACTION,
658                                            actions,
659                                            "queue action not in RSS queues");
660                                 return -rte_errno;
661                         }
662                         if (!found) {
663                                 parser->queues_n = 1;
664                                 parser->queues[0] = queue->index;
665                         }
666                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
667                         const struct rte_flow_action_rss *rss =
668                                 (const struct rte_flow_action_rss *)
669                                 actions->conf;
670                         uint16_t n;
671
672                         if (!rss || !rss->num) {
673                                 rte_flow_error_set(error, EINVAL,
674                                                    RTE_FLOW_ERROR_TYPE_ACTION,
675                                                    actions,
676                                                    "no valid queues");
677                                 return -rte_errno;
678                         }
679                         if (parser->queues_n == 1) {
680                                 uint16_t found = 0;
681
682                                 assert(parser->queues_n);
683                                 for (n = 0; n < rss->num; ++n) {
684                                         if (parser->queues[0] ==
685                                             rss->queue[n]) {
686                                                 found = 1;
687                                                 break;
688                                         }
689                                 }
690                                 if (!found) {
691                                         rte_flow_error_set(error, ENOTSUP,
692                                                    RTE_FLOW_ERROR_TYPE_ACTION,
693                                                    actions,
694                                                    "queue action not in RSS"
695                                                    " queues");
696                                         return -rte_errno;
697                                 }
698                         }
699                         for (n = 0; n < rss->num; ++n) {
700                                 if (rss->queue[n] >= priv->rxqs_n) {
701                                         rte_flow_error_set(error, EINVAL,
702                                                    RTE_FLOW_ERROR_TYPE_ACTION,
703                                                    actions,
704                                                    "queue id > number of"
705                                                    " queues");
706                                         return -rte_errno;
707                                 }
708                         }
709                         for (n = 0; n < rss->num; ++n)
710                                 parser->queues[n] = rss->queue[n];
711                         parser->queues_n = rss->num;
712                         if (priv_flow_convert_rss_conf(priv, parser,
713                                                        rss->rss_conf)) {
714                                 rte_flow_error_set(error, EINVAL,
715                                                    RTE_FLOW_ERROR_TYPE_ACTION,
716                                                    actions,
717                                                    "wrong RSS configuration");
718                                 return -rte_errno;
719                         }
720                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
721                         const struct rte_flow_action_mark *mark =
722                                 (const struct rte_flow_action_mark *)
723                                 actions->conf;
724
725                         if (!mark) {
726                                 rte_flow_error_set(error, EINVAL,
727                                                    RTE_FLOW_ERROR_TYPE_ACTION,
728                                                    actions,
729                                                    "mark must be defined");
730                                 return -rte_errno;
731                         } else if (mark->id >= MLX5_FLOW_MARK_MAX) {
732                                 rte_flow_error_set(error, ENOTSUP,
733                                                    RTE_FLOW_ERROR_TYPE_ACTION,
734                                                    actions,
735                                                    "mark must be between 0"
736                                                    " and 16777199");
737                                 return -rte_errno;
738                         }
739                         parser->mark = 1;
740                         parser->mark_id = mark->id;
741                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
742                         parser->mark = 1;
743                 } else {
744                         goto exit_action_not_supported;
745                 }
746         }
747         if (!parser->queues_n && !parser->drop) {
748                 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
749                                    NULL, "no valid action");
750                 return -rte_errno;
751         }
752         return 0;
753 exit_action_not_supported:
754         rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
755                            actions, "action not supported");
756         return -rte_errno;
757 }
758
759 /**
760  * Validate items.
761  *
762  * @param priv
763  *   Pointer to private structure.
764  * @param[in] items
765  *   Pattern specification (list terminated by the END pattern item).
766  * @param[out] error
767  *   Perform verbose error reporting if not NULL.
768  * @param[in, out] parser
769  *   Internal parser structure.
770  *
771  * @return
772  *   0 on success, a negative errno value otherwise and rte_errno is set.
773  */
774 static int
775 priv_flow_convert_items_validate(struct priv *priv,
776                                  const struct rte_flow_item items[],
777                                  struct rte_flow_error *error,
778                                  struct mlx5_flow_parse *parser)
779 {
780         const struct mlx5_flow_items *cur_item = mlx5_flow_items;
781         unsigned int i;
782
783         (void)priv;
784         /* Initialise the offsets to start after verbs attribute. */
785         if (parser->drop) {
786                 parser->drop_q.offset = sizeof(struct ibv_flow_attr);
787         } else {
788                 for (i = 0; i != hash_rxq_init_n; ++i)
789                         parser->queue[i].offset = sizeof(struct ibv_flow_attr);
790         }
791         for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
792                 const struct mlx5_flow_items *token = NULL;
793                 unsigned int n;
794                 int err;
795
796                 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
797                         continue;
798                 for (i = 0;
799                      cur_item->items &&
800                      cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
801                      ++i) {
802                         if (cur_item->items[i] == items->type) {
803                                 token = &mlx5_flow_items[items->type];
804                                 break;
805                         }
806                 }
807                 if (!token)
808                         goto exit_item_not_supported;
809                 cur_item = token;
810                 err = mlx5_flow_item_validate(items,
811                                               (const uint8_t *)cur_item->mask,
812                                               cur_item->mask_sz);
813                 if (err)
814                         goto exit_item_not_supported;
815                 if (items->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
816                         if (parser->inner) {
817                                 rte_flow_error_set(error, ENOTSUP,
818                                                    RTE_FLOW_ERROR_TYPE_ITEM,
819                                                    items,
820                                                    "cannot recognize multiple"
821                                                    " VXLAN encapsulations");
822                                 return -rte_errno;
823                         }
824                         parser->inner = 1;
825                 }
826                 if (parser->drop) {
827                         parser->drop_q.offset += cur_item->dst_sz;
828                 } else if (parser->queues_n == 1) {
829                         parser->queue[HASH_RXQ_ETH].offset += cur_item->dst_sz;
830                 } else {
831                         for (n = 0; n != hash_rxq_init_n; ++n)
832                                 parser->queue[n].offset += cur_item->dst_sz;
833                 }
834         }
835         if (parser->mark) {
836                 for (i = 0; i != hash_rxq_init_n; ++i)
837                         parser->queue[i].offset +=
838                                 sizeof(struct ibv_flow_spec_action_tag);
839         }
840         return 0;
841 exit_item_not_supported:
842         rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
843                            items, "item not supported");
844         return -rte_errno;
845 }
846
847 /**
848  * Allocate memory space to store verbs flow attributes.
849  *
850  * @param priv
851  *   Pointer to private structure.
852  * @param[in] priority
853  *   Flow priority.
854  * @param[in] size
855  *   Amount of byte to allocate.
856  * @param[out] error
857  *   Perform verbose error reporting if not NULL.
858  *
859  * @return
860  *   A verbs flow attribute on success, NULL otherwise.
861  */
862 static struct ibv_flow_attr*
863 priv_flow_convert_allocate(struct priv *priv,
864                            unsigned int priority,
865                            unsigned int size,
866                            struct rte_flow_error *error)
867 {
868         struct ibv_flow_attr *ibv_attr;
869
870         (void)priv;
871         ibv_attr = rte_calloc(__func__, 1, size, 0);
872         if (!ibv_attr) {
873                 rte_flow_error_set(error, ENOMEM,
874                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
875                                    NULL,
876                                    "cannot allocate verbs spec attributes.");
877                 return NULL;
878         }
879         ibv_attr->priority = priority;
880         return ibv_attr;
881 }
882
883 /**
884  * Finalise verbs flow attributes.
885  *
886  * @param priv
887  *   Pointer to private structure.
888  * @param[in, out] parser
889  *   Internal parser structure.
890  */
891 static void
892 priv_flow_convert_finalise(struct priv *priv, struct mlx5_flow_parse *parser)
893 {
894         const unsigned int ipv4 =
895                 hash_rxq_init[parser->layer].ip_version == MLX5_IPV4;
896         const enum hash_rxq_type hmin = ipv4 ? HASH_RXQ_TCPV4 : HASH_RXQ_TCPV6;
897         const enum hash_rxq_type hmax = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
898         const enum hash_rxq_type ohmin = ipv4 ? HASH_RXQ_TCPV6 : HASH_RXQ_TCPV4;
899         const enum hash_rxq_type ohmax = ipv4 ? HASH_RXQ_IPV6 : HASH_RXQ_IPV4;
900         const enum hash_rxq_type ip = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
901         unsigned int i;
902
903         (void)priv;
904         if (parser->layer == HASH_RXQ_ETH) {
905                 goto fill;
906         } else {
907                 /*
908                  * This layer becomes useless as the pattern define under
909                  * layers.
910                  */
911                 rte_free(parser->queue[HASH_RXQ_ETH].ibv_attr);
912                 parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
913         }
914         /* Remove opposite kind of layer e.g. IPv6 if the pattern is IPv4. */
915         for (i = ohmin; i != (ohmax + 1); ++i) {
916                 if (!parser->queue[i].ibv_attr)
917                         continue;
918                 rte_free(parser->queue[i].ibv_attr);
919                 parser->queue[i].ibv_attr = NULL;
920         }
921         /* Remove impossible flow according to the RSS configuration. */
922         if (hash_rxq_init[parser->layer].dpdk_rss_hf &
923             parser->rss_conf.rss_hf) {
924                 /* Remove any other flow. */
925                 for (i = hmin; i != (hmax + 1); ++i) {
926                         if ((i == parser->layer) ||
927                              (!parser->queue[i].ibv_attr))
928                                 continue;
929                         rte_free(parser->queue[i].ibv_attr);
930                         parser->queue[i].ibv_attr = NULL;
931                 }
932         } else  if (!parser->queue[ip].ibv_attr) {
933                 /* no RSS possible with the current configuration. */
934                 parser->queues_n = 1;
935                 return;
936         }
937 fill:
938         /*
939          * Fill missing layers in verbs specifications, or compute the correct
940          * offset to allocate the memory space for the attributes and
941          * specifications.
942          */
943         for (i = 0; i != hash_rxq_init_n - 1; ++i) {
944                 union {
945                         struct ibv_flow_spec_ipv4_ext ipv4;
946                         struct ibv_flow_spec_ipv6 ipv6;
947                         struct ibv_flow_spec_tcp_udp udp_tcp;
948                 } specs;
949                 void *dst;
950                 uint16_t size;
951
952                 if (i == parser->layer)
953                         continue;
954                 if (parser->layer == HASH_RXQ_ETH) {
955                         if (hash_rxq_init[i].ip_version == MLX5_IPV4) {
956                                 size = sizeof(struct ibv_flow_spec_ipv4_ext);
957                                 specs.ipv4 = (struct ibv_flow_spec_ipv4_ext){
958                                         .type = IBV_FLOW_SPEC_IPV4_EXT |
959                                                 parser->inner,
960                                         .size = size,
961                                 };
962                         } else {
963                                 size = sizeof(struct ibv_flow_spec_ipv6);
964                                 specs.ipv6 = (struct ibv_flow_spec_ipv6){
965                                         .type = IBV_FLOW_SPEC_IPV6 |
966                                                 parser->inner,
967                                         .size = size,
968                                 };
969                         }
970                         if (parser->queue[i].ibv_attr) {
971                                 dst = (void *)((uintptr_t)
972                                                parser->queue[i].ibv_attr +
973                                                parser->queue[i].offset);
974                                 memcpy(dst, &specs, size);
975                                 ++parser->queue[i].ibv_attr->num_of_specs;
976                         }
977                         parser->queue[i].offset += size;
978                 }
979                 if ((i == HASH_RXQ_UDPV4) || (i == HASH_RXQ_TCPV4) ||
980                     (i == HASH_RXQ_UDPV6) || (i == HASH_RXQ_TCPV6)) {
981                         size = sizeof(struct ibv_flow_spec_tcp_udp);
982                         specs.udp_tcp = (struct ibv_flow_spec_tcp_udp) {
983                                 .type = ((i == HASH_RXQ_UDPV4 ||
984                                           i == HASH_RXQ_UDPV6) ?
985                                          IBV_FLOW_SPEC_UDP :
986                                          IBV_FLOW_SPEC_TCP) |
987                                         parser->inner,
988                                 .size = size,
989                         };
990                         if (parser->queue[i].ibv_attr) {
991                                 dst = (void *)((uintptr_t)
992                                                parser->queue[i].ibv_attr +
993                                                parser->queue[i].offset);
994                                 memcpy(dst, &specs, size);
995                                 ++parser->queue[i].ibv_attr->num_of_specs;
996                         }
997                         parser->queue[i].offset += size;
998                 }
999         }
1000 }
1001
1002 /**
1003  * Validate and convert a flow supported by the NIC.
1004  *
1005  * @param priv
1006  *   Pointer to private structure.
1007  * @param[in] attr
1008  *   Flow rule attributes.
1009  * @param[in] pattern
1010  *   Pattern specification (list terminated by the END pattern item).
1011  * @param[in] actions
1012  *   Associated actions (list terminated by the END action).
1013  * @param[out] error
1014  *   Perform verbose error reporting if not NULL.
1015  * @param[in, out] parser
1016  *   Internal parser structure.
1017  *
1018  * @return
1019  *   0 on success, a negative errno value otherwise and rte_errno is set.
1020  */
1021 static int
1022 priv_flow_convert(struct priv *priv,
1023                   const struct rte_flow_attr *attr,
1024                   const struct rte_flow_item items[],
1025                   const struct rte_flow_action actions[],
1026                   struct rte_flow_error *error,
1027                   struct mlx5_flow_parse *parser)
1028 {
1029         const struct mlx5_flow_items *cur_item = mlx5_flow_items;
1030         unsigned int i;
1031         int ret;
1032
1033         /* First step. Validate the attributes, items and actions. */
1034         *parser = (struct mlx5_flow_parse){
1035                 .create = parser->create,
1036                 .layer = HASH_RXQ_ETH,
1037                 .mark_id = MLX5_FLOW_MARK_DEFAULT,
1038         };
1039         ret = priv_flow_convert_attributes(priv, attr, error, parser);
1040         if (ret)
1041                 return ret;
1042         ret = priv_flow_convert_actions(priv, actions, error, parser);
1043         if (ret)
1044                 return ret;
1045         ret = priv_flow_convert_items_validate(priv, items, error, parser);
1046         if (ret)
1047                 return ret;
1048         priv_flow_convert_finalise(priv, parser);
1049         /*
1050          * Second step.
1051          * Allocate the memory space to store verbs specifications.
1052          */
1053         if (parser->drop) {
1054                 parser->drop_q.ibv_attr =
1055                         priv_flow_convert_allocate(priv, attr->priority,
1056                                                    parser->drop_q.offset,
1057                                                    error);
1058                 if (!parser->drop_q.ibv_attr)
1059                         return ENOMEM;
1060                 parser->drop_q.offset = sizeof(struct ibv_flow_attr);
1061         } else if (parser->queues_n == 1) {
1062                 unsigned int priority =
1063                         attr->priority +
1064                         hash_rxq_init[HASH_RXQ_ETH].flow_priority;
1065                 unsigned int offset = parser->queue[HASH_RXQ_ETH].offset;
1066
1067                 parser->queue[HASH_RXQ_ETH].ibv_attr =
1068                         priv_flow_convert_allocate(priv, priority,
1069                                                    offset, error);
1070                 if (!parser->queue[HASH_RXQ_ETH].ibv_attr)
1071                         return ENOMEM;
1072                 parser->queue[HASH_RXQ_ETH].offset =
1073                         sizeof(struct ibv_flow_attr);
1074         } else {
1075                 for (i = 0; i != hash_rxq_init_n; ++i) {
1076                         unsigned int priority =
1077                                 attr->priority +
1078                                 hash_rxq_init[i].flow_priority;
1079                         unsigned int offset;
1080
1081                         if (!(parser->rss_conf.rss_hf &
1082                               hash_rxq_init[i].dpdk_rss_hf) &&
1083                             (i != HASH_RXQ_ETH))
1084                                 continue;
1085                         offset = parser->queue[i].offset;
1086                         parser->queue[i].ibv_attr =
1087                                 priv_flow_convert_allocate(priv, priority,
1088                                                            offset, error);
1089                         if (!parser->queue[i].ibv_attr)
1090                                 goto exit_enomem;
1091                         parser->queue[i].offset = sizeof(struct ibv_flow_attr);
1092                 }
1093         }
1094         /* Third step. Conversion parse, fill the specifications. */
1095         parser->inner = 0;
1096         for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
1097                 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
1098                         continue;
1099                 cur_item = &mlx5_flow_items[items->type];
1100                 ret = cur_item->convert(items,
1101                                         (cur_item->default_mask ?
1102                                          cur_item->default_mask :
1103                                          cur_item->mask),
1104                                         parser);
1105                 if (ret) {
1106                         rte_flow_error_set(error, ENOTSUP,
1107                                            RTE_FLOW_ERROR_TYPE_ITEM,
1108                                            items, "item not supported");
1109                         goto exit_free;
1110                 }
1111         }
1112         if (parser->mark)
1113                 mlx5_flow_create_flag_mark(parser, parser->mark_id);
1114         /*
1115          * Last step. Complete missing specification to reach the RSS
1116          * configuration.
1117          */
1118         if (parser->queues_n > 1)
1119                 priv_flow_convert_finalise(priv, parser);
1120 exit_free:
1121         /* Only verification is expected, all resources should be released. */
1122         if (!parser->create) {
1123                 if (parser->drop) {
1124                         rte_free(parser->drop_q.ibv_attr);
1125                         parser->drop_q.ibv_attr = NULL;
1126                 }
1127                 for (i = 0; i != hash_rxq_init_n; ++i) {
1128                         if (parser->queue[i].ibv_attr) {
1129                                 rte_free(parser->queue[i].ibv_attr);
1130                                 parser->queue[i].ibv_attr = NULL;
1131                         }
1132                 }
1133         }
1134         return ret;
1135 exit_enomem:
1136         for (i = 0; i != hash_rxq_init_n; ++i) {
1137                 if (parser->queue[i].ibv_attr) {
1138                         rte_free(parser->queue[i].ibv_attr);
1139                         parser->queue[i].ibv_attr = NULL;
1140                 }
1141         }
1142         rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1143                            NULL, "cannot allocate verbs spec attributes.");
1144         return ret;
1145 }
1146
1147 /**
1148  * Copy the specification created into the flow.
1149  *
1150  * @param parser
1151  *   Internal parser structure.
1152  * @param src
1153  *   Create specification.
1154  * @param size
1155  *   Size in bytes of the specification to copy.
1156  */
1157 static void
1158 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
1159                       unsigned int size)
1160 {
1161         unsigned int i;
1162         void *dst;
1163
1164         if (parser->drop) {
1165                 dst = (void *)((uintptr_t)parser->drop_q.ibv_attr +
1166                                 parser->drop_q.offset);
1167                 memcpy(dst, src, size);
1168                 ++parser->drop_q.ibv_attr->num_of_specs;
1169                 parser->drop_q.offset += size;
1170                 return;
1171         }
1172         for (i = 0; i != hash_rxq_init_n; ++i) {
1173                 if (!parser->queue[i].ibv_attr)
1174                         continue;
1175                 /* Specification must be the same l3 type or none. */
1176                 if (parser->layer == HASH_RXQ_ETH ||
1177                     (hash_rxq_init[parser->layer].ip_version ==
1178                      hash_rxq_init[i].ip_version) ||
1179                     (hash_rxq_init[i].ip_version == 0)) {
1180                         dst = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1181                                         parser->queue[i].offset);
1182                         memcpy(dst, src, size);
1183                         ++parser->queue[i].ibv_attr->num_of_specs;
1184                         parser->queue[i].offset += size;
1185                 }
1186         }
1187 }
1188
1189 /**
1190  * Convert Ethernet item to Verbs specification.
1191  *
1192  * @param item[in]
1193  *   Item specification.
1194  * @param default_mask[in]
1195  *   Default bit-masks to use when item->mask is not provided.
1196  * @param data[in, out]
1197  *   User structure.
1198  */
1199 static int
1200 mlx5_flow_create_eth(const struct rte_flow_item *item,
1201                      const void *default_mask,
1202                      void *data)
1203 {
1204         const struct rte_flow_item_eth *spec = item->spec;
1205         const struct rte_flow_item_eth *mask = item->mask;
1206         struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1207         const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1208         struct ibv_flow_spec_eth eth = {
1209                 .type = parser->inner | IBV_FLOW_SPEC_ETH,
1210                 .size = eth_size,
1211         };
1212
1213         parser->layer = HASH_RXQ_ETH;
1214         if (spec) {
1215                 unsigned int i;
1216
1217                 if (!mask)
1218                         mask = default_mask;
1219                 memcpy(&eth.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
1220                 memcpy(&eth.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
1221                 eth.val.ether_type = spec->type;
1222                 memcpy(&eth.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
1223                 memcpy(&eth.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
1224                 eth.mask.ether_type = mask->type;
1225                 /* Remove unwanted bits from values. */
1226                 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
1227                         eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
1228                         eth.val.src_mac[i] &= eth.mask.src_mac[i];
1229                 }
1230                 eth.val.ether_type &= eth.mask.ether_type;
1231         }
1232         mlx5_flow_create_copy(parser, &eth, eth_size);
1233         return 0;
1234 }
1235
1236 /**
1237  * Convert VLAN item to Verbs specification.
1238  *
1239  * @param item[in]
1240  *   Item specification.
1241  * @param default_mask[in]
1242  *   Default bit-masks to use when item->mask is not provided.
1243  * @param data[in, out]
1244  *   User structure.
1245  */
1246 static int
1247 mlx5_flow_create_vlan(const struct rte_flow_item *item,
1248                       const void *default_mask,
1249                       void *data)
1250 {
1251         const struct rte_flow_item_vlan *spec = item->spec;
1252         const struct rte_flow_item_vlan *mask = item->mask;
1253         struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1254         struct ibv_flow_spec_eth *eth;
1255         const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1256
1257         if (spec) {
1258                 unsigned int i;
1259                 if (!mask)
1260                         mask = default_mask;
1261
1262                 if (parser->drop) {
1263                         eth = (void *)((uintptr_t)parser->drop_q.ibv_attr +
1264                                        parser->drop_q.offset - eth_size);
1265                         eth->val.vlan_tag = spec->tci;
1266                         eth->mask.vlan_tag = mask->tci;
1267                         eth->val.vlan_tag &= eth->mask.vlan_tag;
1268                         return 0;
1269                 }
1270                 for (i = 0; i != hash_rxq_init_n; ++i) {
1271                         if (!parser->queue[i].ibv_attr)
1272                                 continue;
1273
1274                         eth = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1275                                        parser->queue[i].offset - eth_size);
1276                         eth->val.vlan_tag = spec->tci;
1277                         eth->mask.vlan_tag = mask->tci;
1278                         eth->val.vlan_tag &= eth->mask.vlan_tag;
1279                 }
1280         }
1281         return 0;
1282 }
1283
1284 /**
1285  * Convert IPv4 item to Verbs specification.
1286  *
1287  * @param item[in]
1288  *   Item specification.
1289  * @param default_mask[in]
1290  *   Default bit-masks to use when item->mask is not provided.
1291  * @param data[in, out]
1292  *   User structure.
1293  */
1294 static int
1295 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
1296                       const void *default_mask,
1297                       void *data)
1298 {
1299         const struct rte_flow_item_ipv4 *spec = item->spec;
1300         const struct rte_flow_item_ipv4 *mask = item->mask;
1301         struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1302         unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4_ext);
1303         struct ibv_flow_spec_ipv4_ext ipv4 = {
1304                 .type = parser->inner | IBV_FLOW_SPEC_IPV4_EXT,
1305                 .size = ipv4_size,
1306         };
1307
1308         parser->layer = HASH_RXQ_IPV4;
1309         if (spec) {
1310                 if (!mask)
1311                         mask = default_mask;
1312                 ipv4.val = (struct ibv_flow_ipv4_ext_filter){
1313                         .src_ip = spec->hdr.src_addr,
1314                         .dst_ip = spec->hdr.dst_addr,
1315                         .proto = spec->hdr.next_proto_id,
1316                         .tos = spec->hdr.type_of_service,
1317                 };
1318                 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
1319                         .src_ip = mask->hdr.src_addr,
1320                         .dst_ip = mask->hdr.dst_addr,
1321                         .proto = mask->hdr.next_proto_id,
1322                         .tos = mask->hdr.type_of_service,
1323                 };
1324                 /* Remove unwanted bits from values. */
1325                 ipv4.val.src_ip &= ipv4.mask.src_ip;
1326                 ipv4.val.dst_ip &= ipv4.mask.dst_ip;
1327                 ipv4.val.proto &= ipv4.mask.proto;
1328                 ipv4.val.tos &= ipv4.mask.tos;
1329         }
1330         mlx5_flow_create_copy(parser, &ipv4, ipv4_size);
1331         return 0;
1332 }
1333
1334 /**
1335  * Convert IPv6 item to Verbs specification.
1336  *
1337  * @param item[in]
1338  *   Item specification.
1339  * @param default_mask[in]
1340  *   Default bit-masks to use when item->mask is not provided.
1341  * @param data[in, out]
1342  *   User structure.
1343  */
1344 static int
1345 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
1346                       const void *default_mask,
1347                       void *data)
1348 {
1349         const struct rte_flow_item_ipv6 *spec = item->spec;
1350         const struct rte_flow_item_ipv6 *mask = item->mask;
1351         struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1352         unsigned int ipv6_size = sizeof(struct ibv_flow_spec_ipv6);
1353         struct ibv_flow_spec_ipv6 ipv6 = {
1354                 .type = parser->inner | IBV_FLOW_SPEC_IPV6,
1355                 .size = ipv6_size,
1356         };
1357
1358         parser->layer = HASH_RXQ_IPV6;
1359         if (spec) {
1360                 unsigned int i;
1361
1362                 if (!mask)
1363                         mask = default_mask;
1364                 memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
1365                        RTE_DIM(ipv6.val.src_ip));
1366                 memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
1367                        RTE_DIM(ipv6.val.dst_ip));
1368                 memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
1369                        RTE_DIM(ipv6.mask.src_ip));
1370                 memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
1371                        RTE_DIM(ipv6.mask.dst_ip));
1372                 ipv6.mask.flow_label = mask->hdr.vtc_flow;
1373                 ipv6.mask.next_hdr = mask->hdr.proto;
1374                 ipv6.mask.hop_limit = mask->hdr.hop_limits;
1375                 /* Remove unwanted bits from values. */
1376                 for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
1377                         ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
1378                         ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
1379                 }
1380                 ipv6.val.flow_label &= ipv6.mask.flow_label;
1381                 ipv6.val.next_hdr &= ipv6.mask.next_hdr;
1382                 ipv6.val.hop_limit &= ipv6.mask.hop_limit;
1383         }
1384         mlx5_flow_create_copy(parser, &ipv6, ipv6_size);
1385         return 0;
1386 }
1387
1388 /**
1389  * Convert UDP item to Verbs specification.
1390  *
1391  * @param item[in]
1392  *   Item specification.
1393  * @param default_mask[in]
1394  *   Default bit-masks to use when item->mask is not provided.
1395  * @param data[in, out]
1396  *   User structure.
1397  */
1398 static int
1399 mlx5_flow_create_udp(const struct rte_flow_item *item,
1400                      const void *default_mask,
1401                      void *data)
1402 {
1403         const struct rte_flow_item_udp *spec = item->spec;
1404         const struct rte_flow_item_udp *mask = item->mask;
1405         struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1406         unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1407         struct ibv_flow_spec_tcp_udp udp = {
1408                 .type = parser->inner | IBV_FLOW_SPEC_UDP,
1409                 .size = udp_size,
1410         };
1411
1412         if (parser->layer == HASH_RXQ_IPV4)
1413                 parser->layer = HASH_RXQ_UDPV4;
1414         else
1415                 parser->layer = HASH_RXQ_UDPV6;
1416         if (spec) {
1417                 if (!mask)
1418                         mask = default_mask;
1419                 udp.val.dst_port = spec->hdr.dst_port;
1420                 udp.val.src_port = spec->hdr.src_port;
1421                 udp.mask.dst_port = mask->hdr.dst_port;
1422                 udp.mask.src_port = mask->hdr.src_port;
1423                 /* Remove unwanted bits from values. */
1424                 udp.val.src_port &= udp.mask.src_port;
1425                 udp.val.dst_port &= udp.mask.dst_port;
1426         }
1427         mlx5_flow_create_copy(parser, &udp, udp_size);
1428         return 0;
1429 }
1430
1431 /**
1432  * Convert TCP item to Verbs specification.
1433  *
1434  * @param item[in]
1435  *   Item specification.
1436  * @param default_mask[in]
1437  *   Default bit-masks to use when item->mask is not provided.
1438  * @param data[in, out]
1439  *   User structure.
1440  */
1441 static int
1442 mlx5_flow_create_tcp(const struct rte_flow_item *item,
1443                      const void *default_mask,
1444                      void *data)
1445 {
1446         const struct rte_flow_item_tcp *spec = item->spec;
1447         const struct rte_flow_item_tcp *mask = item->mask;
1448         struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1449         unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1450         struct ibv_flow_spec_tcp_udp tcp = {
1451                 .type = parser->inner | IBV_FLOW_SPEC_TCP,
1452                 .size = tcp_size,
1453         };
1454
1455         if (parser->layer == HASH_RXQ_IPV4)
1456                 parser->layer = HASH_RXQ_TCPV4;
1457         else
1458                 parser->layer = HASH_RXQ_TCPV6;
1459         if (spec) {
1460                 if (!mask)
1461                         mask = default_mask;
1462                 tcp.val.dst_port = spec->hdr.dst_port;
1463                 tcp.val.src_port = spec->hdr.src_port;
1464                 tcp.mask.dst_port = mask->hdr.dst_port;
1465                 tcp.mask.src_port = mask->hdr.src_port;
1466                 /* Remove unwanted bits from values. */
1467                 tcp.val.src_port &= tcp.mask.src_port;
1468                 tcp.val.dst_port &= tcp.mask.dst_port;
1469         }
1470         mlx5_flow_create_copy(parser, &tcp, tcp_size);
1471         return 0;
1472 }
1473
1474 /**
1475  * Convert VXLAN item to Verbs specification.
1476  *
1477  * @param item[in]
1478  *   Item specification.
1479  * @param default_mask[in]
1480  *   Default bit-masks to use when item->mask is not provided.
1481  * @param data[in, out]
1482  *   User structure.
1483  */
1484 static int
1485 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
1486                        const void *default_mask,
1487                        void *data)
1488 {
1489         const struct rte_flow_item_vxlan *spec = item->spec;
1490         const struct rte_flow_item_vxlan *mask = item->mask;
1491         struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1492         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1493         struct ibv_flow_spec_tunnel vxlan = {
1494                 .type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1495                 .size = size,
1496         };
1497         union vni {
1498                 uint32_t vlan_id;
1499                 uint8_t vni[4];
1500         } id;
1501
1502         id.vni[0] = 0;
1503         parser->inner = IBV_FLOW_SPEC_INNER;
1504         if (spec) {
1505                 if (!mask)
1506                         mask = default_mask;
1507                 memcpy(&id.vni[1], spec->vni, 3);
1508                 vxlan.val.tunnel_id = id.vlan_id;
1509                 memcpy(&id.vni[1], mask->vni, 3);
1510                 vxlan.mask.tunnel_id = id.vlan_id;
1511                 /* Remove unwanted bits from values. */
1512                 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
1513         }
1514         mlx5_flow_create_copy(parser, &vxlan, size);
1515         return 0;
1516 }
1517
1518 /**
1519  * Convert mark/flag action to Verbs specification.
1520  *
1521  * @param parser
1522  *   Internal parser structure.
1523  * @param mark_id
1524  *   Mark identifier.
1525  */
1526 static int
1527 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id)
1528 {
1529         unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1530         struct ibv_flow_spec_action_tag tag = {
1531                 .type = IBV_FLOW_SPEC_ACTION_TAG,
1532                 .size = size,
1533                 .tag_id = mlx5_flow_mark_set(mark_id),
1534         };
1535
1536         assert(parser->mark);
1537         mlx5_flow_create_copy(parser, &tag, size);
1538         return 0;
1539 }
1540
1541 /**
1542  * Complete flow rule creation with a drop queue.
1543  *
1544  * @param priv
1545  *   Pointer to private structure.
1546  * @param parser
1547  *   Internal parser structure.
1548  * @param flow
1549  *   Pointer to the rte_flow.
1550  * @param[out] error
1551  *   Perform verbose error reporting if not NULL.
1552  *
1553  * @return
1554  *   0 on success, errno value on failure.
1555  */
1556 static int
1557 priv_flow_create_action_queue_drop(struct priv *priv,
1558                                    struct mlx5_flow_parse *parser,
1559                                    struct rte_flow *flow,
1560                                    struct rte_flow_error *error)
1561 {
1562         struct ibv_flow_spec_action_drop *drop;
1563         unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
1564         int err = 0;
1565
1566         assert(priv->pd);
1567         assert(priv->ctx);
1568         flow->drop = 1;
1569         drop = (void *)((uintptr_t)parser->drop_q.ibv_attr +
1570                         parser->drop_q.offset);
1571         *drop = (struct ibv_flow_spec_action_drop){
1572                         .type = IBV_FLOW_SPEC_ACTION_DROP,
1573                         .size = size,
1574         };
1575         ++parser->drop_q.ibv_attr->num_of_specs;
1576         parser->drop_q.offset += size;
1577         if (!priv->dev->data->dev_started)
1578                 return 0;
1579         flow->drxq.ibv_attr = parser->drop_q.ibv_attr;
1580         parser->drop_q.ibv_attr = NULL;
1581         flow->drxq.ibv_flow = ibv_create_flow(priv->flow_drop_queue->qp,
1582                                               flow->drxq.ibv_attr);
1583         if (!flow->drxq.ibv_flow) {
1584                 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1585                                    NULL, "flow rule creation failure");
1586                 err = ENOMEM;
1587                 goto error;
1588         }
1589         return 0;
1590 error:
1591         assert(flow);
1592         if (flow->drxq.ibv_flow) {
1593                 claim_zero(ibv_destroy_flow(flow->drxq.ibv_flow));
1594                 flow->drxq.ibv_flow = NULL;
1595         }
1596         if (flow->drxq.ibv_attr) {
1597                 rte_free(flow->drxq.ibv_attr);
1598                 flow->drxq.ibv_attr = NULL;
1599         }
1600         return err;
1601 }
1602
1603 /**
1604  * Create hash Rx queues when RSS is enabled.
1605  *
1606  * @param priv
1607  *   Pointer to private structure.
1608  * @param parser
1609  *   Internal parser structure.
1610  * @param flow
1611  *   Pointer to the rte_flow.
1612  * @param[out] error
1613  *   Perform verbose error reporting if not NULL.
1614  *
1615  * @return
1616  *   0 on success, a errno value otherwise and rte_errno is set.
1617  */
1618 static int
1619 priv_flow_create_action_queue_rss(struct priv *priv,
1620                                   struct mlx5_flow_parse *parser,
1621                                   struct rte_flow *flow,
1622                                   struct rte_flow_error *error)
1623 {
1624         unsigned int i;
1625
1626         for (i = 0; i != hash_rxq_init_n; ++i) {
1627                 uint64_t hash_fields;
1628
1629                 if (!parser->queue[i].ibv_attr)
1630                         continue;
1631                 flow->frxq[i].ibv_attr = parser->queue[i].ibv_attr;
1632                 parser->queue[i].ibv_attr = NULL;
1633                 hash_fields = hash_rxq_init[i].hash_fields;
1634                 flow->frxq[i].hrxq =
1635                         mlx5_priv_hrxq_get(priv,
1636                                            parser->rss_conf.rss_key,
1637                                            parser->rss_conf.rss_key_len,
1638                                            hash_fields,
1639                                            parser->queues,
1640                                            hash_fields ? parser->queues_n : 1);
1641                 if (flow->frxq[i].hrxq)
1642                         continue;
1643                 flow->frxq[i].hrxq =
1644                         mlx5_priv_hrxq_new(priv,
1645                                            parser->rss_conf.rss_key,
1646                                            parser->rss_conf.rss_key_len,
1647                                            hash_fields,
1648                                            parser->queues,
1649                                            hash_fields ? parser->queues_n : 1);
1650                 if (!flow->frxq[i].hrxq) {
1651                         rte_flow_error_set(error, ENOMEM,
1652                                            RTE_FLOW_ERROR_TYPE_HANDLE,
1653                                            NULL, "cannot create hash rxq");
1654                         return ENOMEM;
1655                 }
1656         }
1657         return 0;
1658 }
1659
1660 /**
1661  * Complete flow rule creation.
1662  *
1663  * @param priv
1664  *   Pointer to private structure.
1665  * @param parser
1666  *   Internal parser structure.
1667  * @param flow
1668  *   Pointer to the rte_flow.
1669  * @param[out] error
1670  *   Perform verbose error reporting if not NULL.
1671  *
1672  * @return
1673  *   0 on success, a errno value otherwise and rte_errno is set.
1674  */
1675 static int
1676 priv_flow_create_action_queue(struct priv *priv,
1677                               struct mlx5_flow_parse *parser,
1678                               struct rte_flow *flow,
1679                               struct rte_flow_error *error)
1680 {
1681         int err = 0;
1682         unsigned int i;
1683
1684         assert(priv->pd);
1685         assert(priv->ctx);
1686         assert(!parser->drop);
1687         err = priv_flow_create_action_queue_rss(priv, parser, flow, error);
1688         if (err)
1689                 goto error;
1690         if (!priv->dev->data->dev_started)
1691                 return 0;
1692         for (i = 0; i != hash_rxq_init_n; ++i) {
1693                 if (!flow->frxq[i].hrxq)
1694                         continue;
1695                 flow->frxq[i].ibv_flow =
1696                         ibv_create_flow(flow->frxq[i].hrxq->qp,
1697                                         flow->frxq[i].ibv_attr);
1698                 if (!flow->frxq[i].ibv_flow) {
1699                         rte_flow_error_set(error, ENOMEM,
1700                                            RTE_FLOW_ERROR_TYPE_HANDLE,
1701                                            NULL, "flow rule creation failure");
1702                         err = ENOMEM;
1703                         goto error;
1704                 }
1705                 DEBUG("%p type %d QP %p ibv_flow %p",
1706                       (void *)flow, i,
1707                       (void *)flow->frxq[i].hrxq,
1708                       (void *)flow->frxq[i].ibv_flow);
1709         }
1710         for (i = 0; i != parser->queues_n; ++i) {
1711                 struct mlx5_rxq_data *q =
1712                         (*priv->rxqs)[parser->queues[i]];
1713
1714                 q->mark |= parser->mark;
1715         }
1716         return 0;
1717 error:
1718         assert(flow);
1719         for (i = 0; i != hash_rxq_init_n; ++i) {
1720                 if (flow->frxq[i].ibv_flow) {
1721                         struct ibv_flow *ibv_flow = flow->frxq[i].ibv_flow;
1722
1723                         claim_zero(ibv_destroy_flow(ibv_flow));
1724                 }
1725                 if (flow->frxq[i].hrxq)
1726                         mlx5_priv_hrxq_release(priv, flow->frxq[i].hrxq);
1727                 if (flow->frxq[i].ibv_attr)
1728                         rte_free(flow->frxq[i].ibv_attr);
1729         }
1730         return err;
1731 }
1732
1733 /**
1734  * Convert a flow.
1735  *
1736  * @param priv
1737  *   Pointer to private structure.
1738  * @param list
1739  *   Pointer to a TAILQ flow list.
1740  * @param[in] attr
1741  *   Flow rule attributes.
1742  * @param[in] pattern
1743  *   Pattern specification (list terminated by the END pattern item).
1744  * @param[in] actions
1745  *   Associated actions (list terminated by the END action).
1746  * @param[out] error
1747  *   Perform verbose error reporting if not NULL.
1748  *
1749  * @return
1750  *   A flow on success, NULL otherwise.
1751  */
1752 static struct rte_flow *
1753 priv_flow_create(struct priv *priv,
1754                  struct mlx5_flows *list,
1755                  const struct rte_flow_attr *attr,
1756                  const struct rte_flow_item items[],
1757                  const struct rte_flow_action actions[],
1758                  struct rte_flow_error *error)
1759 {
1760         struct mlx5_flow_parse parser = { .create = 1, };
1761         struct rte_flow *flow = NULL;
1762         unsigned int i;
1763         int err;
1764
1765         err = priv_flow_convert(priv, attr, items, actions, error, &parser);
1766         if (err)
1767                 goto exit;
1768         flow = rte_calloc(__func__, 1,
1769                           sizeof(*flow) + parser.queues_n * sizeof(uint16_t),
1770                           0);
1771         if (!flow) {
1772                 rte_flow_error_set(error, ENOMEM,
1773                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1774                                    NULL,
1775                                    "cannot allocate flow memory");
1776                 return NULL;
1777         }
1778         /* Copy queues configuration. */
1779         flow->queues = (uint16_t (*)[])(flow + 1);
1780         memcpy(flow->queues, parser.queues, parser.queues_n * sizeof(uint16_t));
1781         flow->queues_n = parser.queues_n;
1782         /* Copy RSS configuration. */
1783         flow->rss_conf = parser.rss_conf;
1784         flow->rss_conf.rss_key = flow->rss_key;
1785         memcpy(flow->rss_key, parser.rss_key, parser.rss_conf.rss_key_len);
1786         /* finalise the flow. */
1787         if (parser.drop)
1788                 err = priv_flow_create_action_queue_drop(priv, &parser, flow,
1789                                                          error);
1790         else
1791                 err = priv_flow_create_action_queue(priv, &parser, flow, error);
1792         if (err)
1793                 goto exit;
1794         TAILQ_INSERT_TAIL(list, flow, next);
1795         DEBUG("Flow created %p", (void *)flow);
1796         return flow;
1797 exit:
1798         if (parser.drop) {
1799                 rte_free(parser.drop_q.ibv_attr);
1800         } else {
1801                 for (i = 0; i != hash_rxq_init_n; ++i) {
1802                         if (parser.queue[i].ibv_attr)
1803                                 rte_free(parser.queue[i].ibv_attr);
1804                 }
1805         }
1806         rte_free(flow);
1807         return NULL;
1808 }
1809
1810 /**
1811  * Validate a flow supported by the NIC.
1812  *
1813  * @see rte_flow_validate()
1814  * @see rte_flow_ops
1815  */
1816 int
1817 mlx5_flow_validate(struct rte_eth_dev *dev,
1818                    const struct rte_flow_attr *attr,
1819                    const struct rte_flow_item items[],
1820                    const struct rte_flow_action actions[],
1821                    struct rte_flow_error *error)
1822 {
1823         struct priv *priv = dev->data->dev_private;
1824         int ret;
1825         struct mlx5_flow_parse parser = { .create = 0, };
1826
1827         priv_lock(priv);
1828         ret = priv_flow_convert(priv, attr, items, actions, error, &parser);
1829         priv_unlock(priv);
1830         return ret;
1831 }
1832
1833 /**
1834  * Create a flow.
1835  *
1836  * @see rte_flow_create()
1837  * @see rte_flow_ops
1838  */
1839 struct rte_flow *
1840 mlx5_flow_create(struct rte_eth_dev *dev,
1841                  const struct rte_flow_attr *attr,
1842                  const struct rte_flow_item items[],
1843                  const struct rte_flow_action actions[],
1844                  struct rte_flow_error *error)
1845 {
1846         struct priv *priv = dev->data->dev_private;
1847         struct rte_flow *flow;
1848
1849         priv_lock(priv);
1850         flow = priv_flow_create(priv, &priv->flows, attr, items, actions,
1851                                 error);
1852         priv_unlock(priv);
1853         return flow;
1854 }
1855
1856 /**
1857  * Destroy a flow.
1858  *
1859  * @param priv
1860  *   Pointer to private structure.
1861  * @param list
1862  *   Pointer to a TAILQ flow list.
1863  * @param[in] flow
1864  *   Flow to destroy.
1865  */
1866 static void
1867 priv_flow_destroy(struct priv *priv,
1868                   struct mlx5_flows *list,
1869                   struct rte_flow *flow)
1870 {
1871         unsigned int i;
1872
1873         if (flow->drop || !flow->mark)
1874                 goto free;
1875         for (i = 0; i != flow->queues_n; ++i) {
1876                 struct rte_flow *tmp;
1877                 int mark = 0;
1878
1879                 /*
1880                  * To remove the mark from the queue, the queue must not be
1881                  * present in any other marked flow (RSS or not).
1882                  */
1883                 TAILQ_FOREACH(tmp, list, next) {
1884                         unsigned int j;
1885                         uint16_t *tqs = NULL;
1886                         uint16_t tq_n = 0;
1887
1888                         if (!tmp->mark)
1889                                 continue;
1890                         for (j = 0; j != hash_rxq_init_n; ++j) {
1891                                 if (!tmp->frxq[j].hrxq)
1892                                         continue;
1893                                 tqs = tmp->frxq[j].hrxq->ind_table->queues;
1894                                 tq_n = tmp->frxq[j].hrxq->ind_table->queues_n;
1895                         }
1896                         if (!tq_n)
1897                                 continue;
1898                         for (j = 0; (j != tq_n) && !mark; j++)
1899                                 if (tqs[j] == (*flow->queues)[i])
1900                                         mark = 1;
1901                 }
1902                 (*priv->rxqs)[(*flow->queues)[i]]->mark = mark;
1903         }
1904 free:
1905         if (flow->drop) {
1906                 if (flow->drxq.ibv_flow)
1907                         claim_zero(ibv_destroy_flow(flow->drxq.ibv_flow));
1908                 rte_free(flow->drxq.ibv_attr);
1909         } else {
1910                 for (i = 0; i != hash_rxq_init_n; ++i) {
1911                         struct mlx5_flow *frxq = &flow->frxq[i];
1912
1913                         if (frxq->ibv_flow)
1914                                 claim_zero(ibv_destroy_flow(frxq->ibv_flow));
1915                         if (frxq->hrxq)
1916                                 mlx5_priv_hrxq_release(priv, frxq->hrxq);
1917                         if (frxq->ibv_attr)
1918                                 rte_free(frxq->ibv_attr);
1919                 }
1920         }
1921         TAILQ_REMOVE(list, flow, next);
1922         DEBUG("Flow destroyed %p", (void *)flow);
1923         rte_free(flow);
1924 }
1925
1926 /**
1927  * Destroy all flows.
1928  *
1929  * @param priv
1930  *   Pointer to private structure.
1931  * @param list
1932  *   Pointer to a TAILQ flow list.
1933  */
1934 void
1935 priv_flow_flush(struct priv *priv, struct mlx5_flows *list)
1936 {
1937         while (!TAILQ_EMPTY(list)) {
1938                 struct rte_flow *flow;
1939
1940                 flow = TAILQ_FIRST(list);
1941                 priv_flow_destroy(priv, list, flow);
1942         }
1943 }
1944
1945 /**
1946  * Create drop queue.
1947  *
1948  * @param priv
1949  *   Pointer to private structure.
1950  *
1951  * @return
1952  *   0 on success.
1953  */
1954 int
1955 priv_flow_create_drop_queue(struct priv *priv)
1956 {
1957         struct mlx5_hrxq_drop *fdq = NULL;
1958
1959         assert(priv->pd);
1960         assert(priv->ctx);
1961         fdq = rte_calloc(__func__, 1, sizeof(*fdq), 0);
1962         if (!fdq) {
1963                 WARN("cannot allocate memory for drop queue");
1964                 goto error;
1965         }
1966         fdq->cq = ibv_create_cq(priv->ctx, 1, NULL, NULL, 0);
1967         if (!fdq->cq) {
1968                 WARN("cannot allocate CQ for drop queue");
1969                 goto error;
1970         }
1971         fdq->wq = ibv_create_wq(priv->ctx,
1972                         &(struct ibv_wq_init_attr){
1973                         .wq_type = IBV_WQT_RQ,
1974                         .max_wr = 1,
1975                         .max_sge = 1,
1976                         .pd = priv->pd,
1977                         .cq = fdq->cq,
1978                         });
1979         if (!fdq->wq) {
1980                 WARN("cannot allocate WQ for drop queue");
1981                 goto error;
1982         }
1983         fdq->ind_table = ibv_create_rwq_ind_table(priv->ctx,
1984                         &(struct ibv_rwq_ind_table_init_attr){
1985                         .log_ind_tbl_size = 0,
1986                         .ind_tbl = &fdq->wq,
1987                         .comp_mask = 0,
1988                         });
1989         if (!fdq->ind_table) {
1990                 WARN("cannot allocate indirection table for drop queue");
1991                 goto error;
1992         }
1993         fdq->qp = ibv_create_qp_ex(priv->ctx,
1994                 &(struct ibv_qp_init_attr_ex){
1995                         .qp_type = IBV_QPT_RAW_PACKET,
1996                         .comp_mask =
1997                                 IBV_QP_INIT_ATTR_PD |
1998                                 IBV_QP_INIT_ATTR_IND_TABLE |
1999                                 IBV_QP_INIT_ATTR_RX_HASH,
2000                         .rx_hash_conf = (struct ibv_rx_hash_conf){
2001                                 .rx_hash_function =
2002                                         IBV_RX_HASH_FUNC_TOEPLITZ,
2003                                 .rx_hash_key_len = rss_hash_default_key_len,
2004                                 .rx_hash_key = rss_hash_default_key,
2005                                 .rx_hash_fields_mask = 0,
2006                                 },
2007                         .rwq_ind_tbl = fdq->ind_table,
2008                         .pd = priv->pd
2009                 });
2010         if (!fdq->qp) {
2011                 WARN("cannot allocate QP for drop queue");
2012                 goto error;
2013         }
2014         priv->flow_drop_queue = fdq;
2015         return 0;
2016 error:
2017         if (fdq->qp)
2018                 claim_zero(ibv_destroy_qp(fdq->qp));
2019         if (fdq->ind_table)
2020                 claim_zero(ibv_destroy_rwq_ind_table(fdq->ind_table));
2021         if (fdq->wq)
2022                 claim_zero(ibv_destroy_wq(fdq->wq));
2023         if (fdq->cq)
2024                 claim_zero(ibv_destroy_cq(fdq->cq));
2025         if (fdq)
2026                 rte_free(fdq);
2027         priv->flow_drop_queue = NULL;
2028         return -1;
2029 }
2030
2031 /**
2032  * Delete drop queue.
2033  *
2034  * @param priv
2035  *   Pointer to private structure.
2036  */
2037 void
2038 priv_flow_delete_drop_queue(struct priv *priv)
2039 {
2040         struct mlx5_hrxq_drop *fdq = priv->flow_drop_queue;
2041
2042         if (!fdq)
2043                 return;
2044         if (fdq->qp)
2045                 claim_zero(ibv_destroy_qp(fdq->qp));
2046         if (fdq->ind_table)
2047                 claim_zero(ibv_destroy_rwq_ind_table(fdq->ind_table));
2048         if (fdq->wq)
2049                 claim_zero(ibv_destroy_wq(fdq->wq));
2050         if (fdq->cq)
2051                 claim_zero(ibv_destroy_cq(fdq->cq));
2052         rte_free(fdq);
2053         priv->flow_drop_queue = NULL;
2054 }
2055
2056 /**
2057  * Remove all flows.
2058  *
2059  * @param priv
2060  *   Pointer to private structure.
2061  * @param list
2062  *   Pointer to a TAILQ flow list.
2063  */
2064 void
2065 priv_flow_stop(struct priv *priv, struct mlx5_flows *list)
2066 {
2067         struct rte_flow *flow;
2068
2069         TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {
2070                 unsigned int i;
2071
2072                 if (flow->drop) {
2073                         if (!flow->drxq.ibv_flow)
2074                                 continue;
2075                         claim_zero(ibv_destroy_flow(flow->drxq.ibv_flow));
2076                         flow->drxq.ibv_flow = NULL;
2077                         /* Next flow. */
2078                         continue;
2079                 }
2080                 if (flow->mark) {
2081                         struct mlx5_ind_table_ibv *ind_tbl = NULL;
2082
2083                         for (i = 0; i != hash_rxq_init_n; ++i) {
2084                                 if (!flow->frxq[i].hrxq)
2085                                         continue;
2086                                 ind_tbl = flow->frxq[i].hrxq->ind_table;
2087                         }
2088                         assert(ind_tbl);
2089                         for (i = 0; i != ind_tbl->queues_n; ++i)
2090                                 (*priv->rxqs)[ind_tbl->queues[i]]->mark = 0;
2091                 }
2092                 for (i = 0; i != hash_rxq_init_n; ++i) {
2093                         if (!flow->frxq[i].ibv_flow)
2094                                 continue;
2095                         claim_zero(ibv_destroy_flow(flow->frxq[i].ibv_flow));
2096                         flow->frxq[i].ibv_flow = NULL;
2097                         mlx5_priv_hrxq_release(priv, flow->frxq[i].hrxq);
2098                         flow->frxq[i].hrxq = NULL;
2099                 }
2100                 DEBUG("Flow %p removed", (void *)flow);
2101         }
2102 }
2103
2104 /**
2105  * Add all flows.
2106  *
2107  * @param priv
2108  *   Pointer to private structure.
2109  * @param list
2110  *   Pointer to a TAILQ flow list.
2111  *
2112  * @return
2113  *   0 on success, a errno value otherwise and rte_errno is set.
2114  */
2115 int
2116 priv_flow_start(struct priv *priv, struct mlx5_flows *list)
2117 {
2118         struct rte_flow *flow;
2119
2120         TAILQ_FOREACH(flow, list, next) {
2121                 unsigned int i;
2122
2123                 if (flow->drop) {
2124                         flow->drxq.ibv_flow =
2125                                 ibv_create_flow(priv->flow_drop_queue->qp,
2126                                                 flow->drxq.ibv_attr);
2127                         if (!flow->drxq.ibv_flow) {
2128                                 DEBUG("Flow %p cannot be applied",
2129                                       (void *)flow);
2130                                 rte_errno = EINVAL;
2131                                 return rte_errno;
2132                         }
2133                         DEBUG("Flow %p applied", (void *)flow);
2134                         /* Next flow. */
2135                         continue;
2136                 }
2137                 for (i = 0; i != hash_rxq_init_n; ++i) {
2138                         if (!flow->frxq[i].ibv_attr)
2139                                 continue;
2140                         flow->frxq[i].hrxq =
2141                                 mlx5_priv_hrxq_get(priv, flow->rss_conf.rss_key,
2142                                                    flow->rss_conf.rss_key_len,
2143                                                    hash_rxq_init[i].hash_fields,
2144                                                    (*flow->queues),
2145                                                    flow->queues_n);
2146                         if (flow->frxq[i].hrxq)
2147                                 goto flow_create;
2148                         flow->frxq[i].hrxq =
2149                                 mlx5_priv_hrxq_new(priv, flow->rss_conf.rss_key,
2150                                                    flow->rss_conf.rss_key_len,
2151                                                    hash_rxq_init[i].hash_fields,
2152                                                    (*flow->queues),
2153                                                    flow->queues_n);
2154                         if (!flow->frxq[i].hrxq) {
2155                                 DEBUG("Flow %p cannot be applied",
2156                                       (void *)flow);
2157                                 rte_errno = EINVAL;
2158                                 return rte_errno;
2159                         }
2160 flow_create:
2161                         flow->frxq[i].ibv_flow =
2162                                 ibv_create_flow(flow->frxq[i].hrxq->qp,
2163                                                 flow->frxq[i].ibv_attr);
2164                         if (!flow->frxq[i].ibv_flow) {
2165                                 DEBUG("Flow %p cannot be applied",
2166                                       (void *)flow);
2167                                 rte_errno = EINVAL;
2168                                 return rte_errno;
2169                         }
2170                         DEBUG("Flow %p applied", (void *)flow);
2171                 }
2172                 if (!flow->mark)
2173                         continue;
2174                 for (i = 0; i != flow->queues_n; ++i)
2175                         (*priv->rxqs)[(*flow->queues)[i]]->mark = 1;
2176         }
2177         return 0;
2178 }
2179
2180 /**
2181  * Verify the flow list is empty
2182  *
2183  * @param priv
2184  *  Pointer to private structure.
2185  *
2186  * @return the number of flows not released.
2187  */
2188 int
2189 priv_flow_verify(struct priv *priv)
2190 {
2191         struct rte_flow *flow;
2192         int ret = 0;
2193
2194         TAILQ_FOREACH(flow, &priv->flows, next) {
2195                 DEBUG("%p: flow %p still referenced", (void *)priv,
2196                       (void *)flow);
2197                 ++ret;
2198         }
2199         return ret;
2200 }
2201
2202 /**
2203  * Enable a control flow configured from the control plane.
2204  *
2205  * @param dev
2206  *   Pointer to Ethernet device.
2207  * @param eth_spec
2208  *   An Ethernet flow spec to apply.
2209  * @param eth_mask
2210  *   An Ethernet flow mask to apply.
2211  * @param vlan_spec
2212  *   A VLAN flow spec to apply.
2213  * @param vlan_mask
2214  *   A VLAN flow mask to apply.
2215  *
2216  * @return
2217  *   0 on success.
2218  */
2219 int
2220 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
2221                     struct rte_flow_item_eth *eth_spec,
2222                     struct rte_flow_item_eth *eth_mask,
2223                     struct rte_flow_item_vlan *vlan_spec,
2224                     struct rte_flow_item_vlan *vlan_mask)
2225 {
2226         struct priv *priv = dev->data->dev_private;
2227         const struct rte_flow_attr attr = {
2228                 .ingress = 1,
2229                 .priority = MLX5_CTRL_FLOW_PRIORITY,
2230         };
2231         struct rte_flow_item items[] = {
2232                 {
2233                         .type = RTE_FLOW_ITEM_TYPE_ETH,
2234                         .spec = eth_spec,
2235                         .last = NULL,
2236                         .mask = eth_mask,
2237                 },
2238                 {
2239                         .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
2240                                 RTE_FLOW_ITEM_TYPE_END,
2241                         .spec = vlan_spec,
2242                         .last = NULL,
2243                         .mask = vlan_mask,
2244                 },
2245                 {
2246                         .type = RTE_FLOW_ITEM_TYPE_END,
2247                 },
2248         };
2249         struct rte_flow_action actions[] = {
2250                 {
2251                         .type = RTE_FLOW_ACTION_TYPE_RSS,
2252                 },
2253                 {
2254                         .type = RTE_FLOW_ACTION_TYPE_END,
2255                 },
2256         };
2257         struct rte_flow *flow;
2258         struct rte_flow_error error;
2259         unsigned int i;
2260         union {
2261                 struct rte_flow_action_rss rss;
2262                 struct {
2263                         const struct rte_eth_rss_conf *rss_conf;
2264                         uint16_t num;
2265                         uint16_t queue[RTE_MAX_QUEUES_PER_PORT];
2266                 } local;
2267         } action_rss;
2268
2269         if (!priv->reta_idx_n)
2270                 return EINVAL;
2271         for (i = 0; i != priv->reta_idx_n; ++i)
2272                 action_rss.local.queue[i] = (*priv->reta_idx)[i];
2273         action_rss.local.rss_conf = &priv->rss_conf;
2274         action_rss.local.num = priv->reta_idx_n;
2275         actions[0].conf = (const void *)&action_rss.rss;
2276         flow = priv_flow_create(priv, &priv->ctrl_flows, &attr, items, actions,
2277                                 &error);
2278         if (!flow)
2279                 return rte_errno;
2280         return 0;
2281 }
2282
2283 /**
2284  * Enable a flow control configured from the control plane.
2285  *
2286  * @param dev
2287  *   Pointer to Ethernet device.
2288  * @param eth_spec
2289  *   An Ethernet flow spec to apply.
2290  * @param eth_mask
2291  *   An Ethernet flow mask to apply.
2292  *
2293  * @return
2294  *   0 on success.
2295  */
2296 int
2297 mlx5_ctrl_flow(struct rte_eth_dev *dev,
2298                struct rte_flow_item_eth *eth_spec,
2299                struct rte_flow_item_eth *eth_mask)
2300 {
2301         return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
2302 }
2303
2304 /**
2305  * Destroy a flow.
2306  *
2307  * @see rte_flow_destroy()
2308  * @see rte_flow_ops
2309  */
2310 int
2311 mlx5_flow_destroy(struct rte_eth_dev *dev,
2312                   struct rte_flow *flow,
2313                   struct rte_flow_error *error)
2314 {
2315         struct priv *priv = dev->data->dev_private;
2316
2317         (void)error;
2318         priv_lock(priv);
2319         priv_flow_destroy(priv, &priv->flows, flow);
2320         priv_unlock(priv);
2321         return 0;
2322 }
2323
2324 /**
2325  * Destroy all flows.
2326  *
2327  * @see rte_flow_flush()
2328  * @see rte_flow_ops
2329  */
2330 int
2331 mlx5_flow_flush(struct rte_eth_dev *dev,
2332                 struct rte_flow_error *error)
2333 {
2334         struct priv *priv = dev->data->dev_private;
2335
2336         (void)error;
2337         priv_lock(priv);
2338         priv_flow_flush(priv, &priv->flows);
2339         priv_unlock(priv);
2340         return 0;
2341 }
2342
2343 /**
2344  * Isolated mode.
2345  *
2346  * @see rte_flow_isolate()
2347  * @see rte_flow_ops
2348  */
2349 int
2350 mlx5_flow_isolate(struct rte_eth_dev *dev,
2351                   int enable,
2352                   struct rte_flow_error *error)
2353 {
2354         struct priv *priv = dev->data->dev_private;
2355
2356         priv_lock(priv);
2357         if (dev->data->dev_started) {
2358                 rte_flow_error_set(error, EBUSY,
2359                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2360                                    NULL,
2361                                    "port must be stopped first");
2362                 priv_unlock(priv);
2363                 return -rte_errno;
2364         }
2365         priv->isolated = !!enable;
2366         if (enable)
2367                 priv->dev->dev_ops = &mlx5_dev_ops_isolate;
2368         else
2369                 priv->dev->dev_ops = &mlx5_dev_ops;
2370         priv_unlock(priv);
2371         return 0;
2372 }
2373
2374 /**
2375  * Convert a flow director filter to a generic flow.
2376  *
2377  * @param priv
2378  *   Private structure.
2379  * @param fdir_filter
2380  *   Flow director filter to add.
2381  * @param attributes
2382  *   Generic flow parameters structure.
2383  *
2384  * @return
2385  *  0 on success, errno value on error.
2386  */
2387 static int
2388 priv_fdir_filter_convert(struct priv *priv,
2389                          const struct rte_eth_fdir_filter *fdir_filter,
2390                          struct mlx5_fdir *attributes)
2391 {
2392         const struct rte_eth_fdir_input *input = &fdir_filter->input;
2393
2394         /* Validate queue number. */
2395         if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
2396                 ERROR("invalid queue number %d", fdir_filter->action.rx_queue);
2397                 return EINVAL;
2398         }
2399         /* Validate the behavior. */
2400         if (fdir_filter->action.behavior != RTE_ETH_FDIR_ACCEPT) {
2401                 ERROR("invalid behavior %d", fdir_filter->action.behavior);
2402                 return ENOTSUP;
2403         }
2404         attributes->attr.ingress = 1;
2405         attributes->items[0] = (struct rte_flow_item) {
2406                 .type = RTE_FLOW_ITEM_TYPE_ETH,
2407                 .spec = &attributes->l2,
2408         };
2409         attributes->actions[0] = (struct rte_flow_action){
2410                 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
2411                 .conf = &attributes->queue,
2412         };
2413         attributes->queue.index = fdir_filter->action.rx_queue;
2414         switch (fdir_filter->input.flow_type) {
2415         case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
2416                 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2417                         .src_addr = input->flow.udp4_flow.ip.src_ip,
2418                         .dst_addr = input->flow.udp4_flow.ip.dst_ip,
2419                         .time_to_live = input->flow.udp4_flow.ip.ttl,
2420                         .type_of_service = input->flow.udp4_flow.ip.tos,
2421                         .next_proto_id = input->flow.udp4_flow.ip.proto,
2422                 };
2423                 attributes->l4.udp.hdr = (struct udp_hdr){
2424                         .src_port = input->flow.udp4_flow.src_port,
2425                         .dst_port = input->flow.udp4_flow.dst_port,
2426                 };
2427                 attributes->items[1] = (struct rte_flow_item){
2428                         .type = RTE_FLOW_ITEM_TYPE_IPV4,
2429                         .spec = &attributes->l3,
2430                 };
2431                 attributes->items[2] = (struct rte_flow_item){
2432                         .type = RTE_FLOW_ITEM_TYPE_UDP,
2433                         .spec = &attributes->l4,
2434                 };
2435                 break;
2436         case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
2437                 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2438                         .src_addr = input->flow.tcp4_flow.ip.src_ip,
2439                         .dst_addr = input->flow.tcp4_flow.ip.dst_ip,
2440                         .time_to_live = input->flow.tcp4_flow.ip.ttl,
2441                         .type_of_service = input->flow.tcp4_flow.ip.tos,
2442                         .next_proto_id = input->flow.tcp4_flow.ip.proto,
2443                 };
2444                 attributes->l4.tcp.hdr = (struct tcp_hdr){
2445                         .src_port = input->flow.tcp4_flow.src_port,
2446                         .dst_port = input->flow.tcp4_flow.dst_port,
2447                 };
2448                 attributes->items[1] = (struct rte_flow_item){
2449                         .type = RTE_FLOW_ITEM_TYPE_IPV4,
2450                         .spec = &attributes->l3,
2451                 };
2452                 attributes->items[2] = (struct rte_flow_item){
2453                         .type = RTE_FLOW_ITEM_TYPE_TCP,
2454                         .spec = &attributes->l4,
2455                 };
2456                 break;
2457         case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
2458                 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2459                         .src_addr = input->flow.ip4_flow.src_ip,
2460                         .dst_addr = input->flow.ip4_flow.dst_ip,
2461                         .time_to_live = input->flow.ip4_flow.ttl,
2462                         .type_of_service = input->flow.ip4_flow.tos,
2463                         .next_proto_id = input->flow.ip4_flow.proto,
2464                 };
2465                 attributes->items[1] = (struct rte_flow_item){
2466                         .type = RTE_FLOW_ITEM_TYPE_IPV4,
2467                         .spec = &attributes->l3,
2468                 };
2469                 break;
2470         case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
2471                 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2472                         .hop_limits = input->flow.udp6_flow.ip.hop_limits,
2473                         .proto = input->flow.udp6_flow.ip.proto,
2474                 };
2475                 memcpy(attributes->l3.ipv6.hdr.src_addr,
2476                        input->flow.udp6_flow.ip.src_ip,
2477                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2478                 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2479                        input->flow.udp6_flow.ip.dst_ip,
2480                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2481                 attributes->l4.udp.hdr = (struct udp_hdr){
2482                         .src_port = input->flow.udp6_flow.src_port,
2483                         .dst_port = input->flow.udp6_flow.dst_port,
2484                 };
2485                 attributes->items[1] = (struct rte_flow_item){
2486                         .type = RTE_FLOW_ITEM_TYPE_IPV6,
2487                         .spec = &attributes->l3,
2488                 };
2489                 attributes->items[2] = (struct rte_flow_item){
2490                         .type = RTE_FLOW_ITEM_TYPE_UDP,
2491                         .spec = &attributes->l4,
2492                 };
2493                 break;
2494         case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
2495                 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2496                         .hop_limits = input->flow.tcp6_flow.ip.hop_limits,
2497                         .proto = input->flow.tcp6_flow.ip.proto,
2498                 };
2499                 memcpy(attributes->l3.ipv6.hdr.src_addr,
2500                        input->flow.tcp6_flow.ip.src_ip,
2501                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2502                 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2503                        input->flow.tcp6_flow.ip.dst_ip,
2504                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2505                 attributes->l4.tcp.hdr = (struct tcp_hdr){
2506                         .src_port = input->flow.tcp6_flow.src_port,
2507                         .dst_port = input->flow.tcp6_flow.dst_port,
2508                 };
2509                 attributes->items[1] = (struct rte_flow_item){
2510                         .type = RTE_FLOW_ITEM_TYPE_IPV6,
2511                         .spec = &attributes->l3,
2512                 };
2513                 attributes->items[2] = (struct rte_flow_item){
2514                         .type = RTE_FLOW_ITEM_TYPE_UDP,
2515                         .spec = &attributes->l4,
2516                 };
2517                 break;
2518         case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
2519                 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2520                         .hop_limits = input->flow.ipv6_flow.hop_limits,
2521                         .proto = input->flow.ipv6_flow.proto,
2522                 };
2523                 memcpy(attributes->l3.ipv6.hdr.src_addr,
2524                        input->flow.ipv6_flow.src_ip,
2525                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2526                 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2527                        input->flow.ipv6_flow.dst_ip,
2528                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2529                 attributes->items[1] = (struct rte_flow_item){
2530                         .type = RTE_FLOW_ITEM_TYPE_IPV6,
2531                         .spec = &attributes->l3,
2532                 };
2533                 break;
2534         default:
2535                 ERROR("invalid flow type%d",
2536                       fdir_filter->input.flow_type);
2537                 return ENOTSUP;
2538         }
2539         return 0;
2540 }
2541
2542 /**
2543  * Add new flow director filter and store it in list.
2544  *
2545  * @param priv
2546  *   Private structure.
2547  * @param fdir_filter
2548  *   Flow director filter to add.
2549  *
2550  * @return
2551  *   0 on success, errno value on failure.
2552  */
2553 static int
2554 priv_fdir_filter_add(struct priv *priv,
2555                      const struct rte_eth_fdir_filter *fdir_filter)
2556 {
2557         struct mlx5_fdir attributes = {
2558                 .attr.group = 0,
2559         };
2560         struct mlx5_flow_parse parser = {
2561                 .layer = HASH_RXQ_ETH,
2562         };
2563         struct rte_flow_error error;
2564         struct rte_flow *flow;
2565         int ret;
2566
2567         ret = priv_fdir_filter_convert(priv, fdir_filter, &attributes);
2568         if (ret)
2569                 return -ret;
2570         ret = priv_flow_convert(priv, &attributes.attr, attributes.items,
2571                                 attributes.actions, &error, &parser);
2572         if (ret)
2573                 return -ret;
2574         flow = priv_flow_create(priv,
2575                                 &priv->flows,
2576                                 &attributes.attr,
2577                                 attributes.items,
2578                                 attributes.actions,
2579                                 &error);
2580         if (flow) {
2581                 TAILQ_INSERT_TAIL(&priv->flows, flow, next);
2582                 DEBUG("FDIR created %p", (void *)flow);
2583                 return 0;
2584         }
2585         return ENOTSUP;
2586 }
2587
2588 /**
2589  * Delete specific filter.
2590  *
2591  * @param priv
2592  *   Private structure.
2593  * @param fdir_filter
2594  *   Filter to be deleted.
2595  *
2596  * @return
2597  *   0 on success, errno value on failure.
2598  */
2599 static int
2600 priv_fdir_filter_delete(struct priv *priv,
2601                         const struct rte_eth_fdir_filter *fdir_filter)
2602 {
2603         struct mlx5_fdir attributes;
2604         struct mlx5_flow_parse parser = {
2605                 .create = 1,
2606                 .layer = HASH_RXQ_ETH,
2607         };
2608         struct rte_flow_error error;
2609         struct rte_flow *flow;
2610         unsigned int i;
2611         int ret;
2612
2613         ret = priv_fdir_filter_convert(priv, fdir_filter, &attributes);
2614         if (ret)
2615                 return -ret;
2616         ret = priv_flow_convert(priv, &attributes.attr, attributes.items,
2617                                 attributes.actions, &error, &parser);
2618         if (ret)
2619                 goto exit;
2620         TAILQ_FOREACH(flow, &priv->flows, next) {
2621                 struct ibv_flow_attr *attr;
2622                 struct ibv_spec_header *attr_h;
2623                 void *spec;
2624                 struct ibv_flow_attr *flow_attr;
2625                 struct ibv_spec_header *flow_h;
2626                 void *flow_spec;
2627                 unsigned int specs_n;
2628
2629                 if (parser.drop)
2630                         attr = parser.drop_q.ibv_attr;
2631                 else
2632                         attr = parser.queue[HASH_RXQ_ETH].ibv_attr;
2633                 if (flow->drop)
2634                         flow_attr = flow->drxq.ibv_attr;
2635                 else
2636                         flow_attr = flow->frxq[HASH_RXQ_ETH].ibv_attr;
2637                 /* Compare first the attributes. */
2638                 if (memcmp(attr, flow_attr, sizeof(struct ibv_flow_attr)))
2639                         continue;
2640                 if (attr->num_of_specs == 0)
2641                         continue;
2642                 spec = (void *)((uintptr_t)attr +
2643                                 sizeof(struct ibv_flow_attr));
2644                 flow_spec = (void *)((uintptr_t)flow_attr +
2645                                      sizeof(struct ibv_flow_attr));
2646                 specs_n = RTE_MIN(attr->num_of_specs, flow_attr->num_of_specs);
2647                 for (i = 0; i != specs_n; ++i) {
2648                         attr_h = spec;
2649                         flow_h = flow_spec;
2650                         if (memcmp(spec, flow_spec,
2651                                    RTE_MIN(attr_h->size, flow_h->size)))
2652                                 continue;
2653                         spec = (void *)((uintptr_t)attr + attr_h->size);
2654                         flow_spec = (void *)((uintptr_t)flow_attr +
2655                                              flow_h->size);
2656                 }
2657                 /* At this point, the flow match. */
2658                 break;
2659         }
2660         if (flow)
2661                 priv_flow_destroy(priv, &priv->flows, flow);
2662 exit:
2663         if (parser.drop) {
2664                 rte_free(parser.drop_q.ibv_attr);
2665         } else {
2666                 for (i = 0; i != hash_rxq_init_n; ++i) {
2667                         if (parser.queue[i].ibv_attr)
2668                                 rte_free(parser.queue[i].ibv_attr);
2669                 }
2670         }
2671         return -ret;
2672 }
2673
2674 /**
2675  * Update queue for specific filter.
2676  *
2677  * @param priv
2678  *   Private structure.
2679  * @param fdir_filter
2680  *   Filter to be updated.
2681  *
2682  * @return
2683  *   0 on success, errno value on failure.
2684  */
2685 static int
2686 priv_fdir_filter_update(struct priv *priv,
2687                         const struct rte_eth_fdir_filter *fdir_filter)
2688 {
2689         int ret;
2690
2691         ret = priv_fdir_filter_delete(priv, fdir_filter);
2692         if (ret)
2693                 return ret;
2694         ret = priv_fdir_filter_add(priv, fdir_filter);
2695         return ret;
2696 }
2697
2698 /**
2699  * Flush all filters.
2700  *
2701  * @param priv
2702  *   Private structure.
2703  */
2704 static void
2705 priv_fdir_filter_flush(struct priv *priv)
2706 {
2707         priv_flow_flush(priv, &priv->flows);
2708 }
2709
2710 /**
2711  * Get flow director information.
2712  *
2713  * @param priv
2714  *   Private structure.
2715  * @param[out] fdir_info
2716  *   Resulting flow director information.
2717  */
2718 static void
2719 priv_fdir_info_get(struct priv *priv, struct rte_eth_fdir_info *fdir_info)
2720 {
2721         struct rte_eth_fdir_masks *mask =
2722                 &priv->dev->data->dev_conf.fdir_conf.mask;
2723
2724         fdir_info->mode = priv->dev->data->dev_conf.fdir_conf.mode;
2725         fdir_info->guarant_spc = 0;
2726         rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
2727         fdir_info->max_flexpayload = 0;
2728         fdir_info->flow_types_mask[0] = 0;
2729         fdir_info->flex_payload_unit = 0;
2730         fdir_info->max_flex_payload_segment_num = 0;
2731         fdir_info->flex_payload_limit = 0;
2732         memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
2733 }
2734
2735 /**
2736  * Deal with flow director operations.
2737  *
2738  * @param priv
2739  *   Pointer to private structure.
2740  * @param filter_op
2741  *   Operation to perform.
2742  * @param arg
2743  *   Pointer to operation-specific structure.
2744  *
2745  * @return
2746  *   0 on success, errno value on failure.
2747  */
2748 static int
2749 priv_fdir_ctrl_func(struct priv *priv, enum rte_filter_op filter_op, void *arg)
2750 {
2751         enum rte_fdir_mode fdir_mode =
2752                 priv->dev->data->dev_conf.fdir_conf.mode;
2753         int ret = 0;
2754
2755         if (filter_op == RTE_ETH_FILTER_NOP)
2756                 return 0;
2757         if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
2758             fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
2759                 ERROR("%p: flow director mode %d not supported",
2760                       (void *)priv, fdir_mode);
2761                 return EINVAL;
2762         }
2763         switch (filter_op) {
2764         case RTE_ETH_FILTER_ADD:
2765                 ret = priv_fdir_filter_add(priv, arg);
2766                 break;
2767         case RTE_ETH_FILTER_UPDATE:
2768                 ret = priv_fdir_filter_update(priv, arg);
2769                 break;
2770         case RTE_ETH_FILTER_DELETE:
2771                 ret = priv_fdir_filter_delete(priv, arg);
2772                 break;
2773         case RTE_ETH_FILTER_FLUSH:
2774                 priv_fdir_filter_flush(priv);
2775                 break;
2776         case RTE_ETH_FILTER_INFO:
2777                 priv_fdir_info_get(priv, arg);
2778                 break;
2779         default:
2780                 DEBUG("%p: unknown operation %u", (void *)priv,
2781                       filter_op);
2782                 ret = EINVAL;
2783                 break;
2784         }
2785         return ret;
2786 }
2787
2788 /**
2789  * Manage filter operations.
2790  *
2791  * @param dev
2792  *   Pointer to Ethernet device structure.
2793  * @param filter_type
2794  *   Filter type.
2795  * @param filter_op
2796  *   Operation to perform.
2797  * @param arg
2798  *   Pointer to operation-specific structure.
2799  *
2800  * @return
2801  *   0 on success, negative errno value on failure.
2802  */
2803 int
2804 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
2805                      enum rte_filter_type filter_type,
2806                      enum rte_filter_op filter_op,
2807                      void *arg)
2808 {
2809         int ret = EINVAL;
2810         struct priv *priv = dev->data->dev_private;
2811
2812         switch (filter_type) {
2813         case RTE_ETH_FILTER_GENERIC:
2814                 if (filter_op != RTE_ETH_FILTER_GET)
2815                         return -EINVAL;
2816                 *(const void **)arg = &mlx5_flow_ops;
2817                 return 0;
2818         case RTE_ETH_FILTER_FDIR:
2819                 priv_lock(priv);
2820                 ret = priv_fdir_ctrl_func(priv, filter_op, arg);
2821                 priv_unlock(priv);
2822                 break;
2823         default:
2824                 ERROR("%p: filter type (%d) not supported",
2825                       (void *)dev, filter_type);
2826                 break;
2827         }
2828         return -ret;
2829 }