net/mlx5: add device configuration structure
[dpdk.git] / drivers / net / mlx5 / mlx5_flow.c
1 /*-
2  *   BSD LICENSE
3  *
4  *   Copyright 2016 6WIND S.A.
5  *   Copyright 2016 Mellanox.
6  *
7  *   Redistribution and use in source and binary forms, with or without
8  *   modification, are permitted provided that the following conditions
9  *   are met:
10  *
11  *     * Redistributions of source code must retain the above copyright
12  *       notice, this list of conditions and the following disclaimer.
13  *     * Redistributions in binary form must reproduce the above copyright
14  *       notice, this list of conditions and the following disclaimer in
15  *       the documentation and/or other materials provided with the
16  *       distribution.
17  *     * Neither the name of 6WIND S.A. nor the names of its
18  *       contributors may be used to endorse or promote products derived
19  *       from this software without specific prior written permission.
20  *
21  *   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  *   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  *   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24  *   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25  *   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26  *   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27  *   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28  *   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29  *   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  *   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31  *   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  */
33
34 #include <sys/queue.h>
35 #include <string.h>
36
37 /* Verbs header. */
38 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
39 #ifdef PEDANTIC
40 #pragma GCC diagnostic ignored "-Wpedantic"
41 #endif
42 #include <infiniband/verbs.h>
43 #ifdef PEDANTIC
44 #pragma GCC diagnostic error "-Wpedantic"
45 #endif
46
47 #include <rte_ethdev.h>
48 #include <rte_flow.h>
49 #include <rte_flow_driver.h>
50 #include <rte_malloc.h>
51
52 #include "mlx5.h"
53 #include "mlx5_defs.h"
54 #include "mlx5_prm.h"
55
56 /* Define minimal priority for control plane flows. */
57 #define MLX5_CTRL_FLOW_PRIORITY 4
58
59 /* Internet Protocol versions. */
60 #define MLX5_IPV4 4
61 #define MLX5_IPV6 6
62
63 #ifndef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
64 struct ibv_counter_set_init_attr {
65         int dummy;
66 };
67 struct ibv_flow_spec_counter_action {
68         int dummy;
69 };
70 struct ibv_counter_set {
71         int dummy;
72 };
73
74 static inline int
75 ibv_destroy_counter_set(struct ibv_counter_set *cs)
76 {
77         (void)cs;
78         return -ENOTSUP;
79 }
80 #endif
81
82 /* Dev ops structure defined in mlx5.c */
83 extern const struct eth_dev_ops mlx5_dev_ops;
84 extern const struct eth_dev_ops mlx5_dev_ops_isolate;
85
86 static int
87 mlx5_flow_create_eth(const struct rte_flow_item *item,
88                      const void *default_mask,
89                      void *data);
90
91 static int
92 mlx5_flow_create_vlan(const struct rte_flow_item *item,
93                       const void *default_mask,
94                       void *data);
95
96 static int
97 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
98                       const void *default_mask,
99                       void *data);
100
101 static int
102 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
103                       const void *default_mask,
104                       void *data);
105
106 static int
107 mlx5_flow_create_udp(const struct rte_flow_item *item,
108                      const void *default_mask,
109                      void *data);
110
111 static int
112 mlx5_flow_create_tcp(const struct rte_flow_item *item,
113                      const void *default_mask,
114                      void *data);
115
116 static int
117 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
118                        const void *default_mask,
119                        void *data);
120
121 struct mlx5_flow_parse;
122
123 static void
124 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
125                       unsigned int size);
126
127 static int
128 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id);
129
130 static int
131 mlx5_flow_create_count(struct priv *priv, struct mlx5_flow_parse *parser);
132
133 /* Hash RX queue types. */
134 enum hash_rxq_type {
135         HASH_RXQ_TCPV4,
136         HASH_RXQ_UDPV4,
137         HASH_RXQ_IPV4,
138         HASH_RXQ_TCPV6,
139         HASH_RXQ_UDPV6,
140         HASH_RXQ_IPV6,
141         HASH_RXQ_ETH,
142 };
143
144 /* Initialization data for hash RX queue. */
145 struct hash_rxq_init {
146         uint64_t hash_fields; /* Fields that participate in the hash. */
147         uint64_t dpdk_rss_hf; /* Matching DPDK RSS hash fields. */
148         unsigned int flow_priority; /* Flow priority to use. */
149         unsigned int ip_version; /* Internet protocol. */
150 };
151
152 /* Initialization data for hash RX queues. */
153 const struct hash_rxq_init hash_rxq_init[] = {
154         [HASH_RXQ_TCPV4] = {
155                 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
156                                 IBV_RX_HASH_DST_IPV4 |
157                                 IBV_RX_HASH_SRC_PORT_TCP |
158                                 IBV_RX_HASH_DST_PORT_TCP),
159                 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_TCP,
160                 .flow_priority = 0,
161                 .ip_version = MLX5_IPV4,
162         },
163         [HASH_RXQ_UDPV4] = {
164                 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
165                                 IBV_RX_HASH_DST_IPV4 |
166                                 IBV_RX_HASH_SRC_PORT_UDP |
167                                 IBV_RX_HASH_DST_PORT_UDP),
168                 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV4_UDP,
169                 .flow_priority = 0,
170                 .ip_version = MLX5_IPV4,
171         },
172         [HASH_RXQ_IPV4] = {
173                 .hash_fields = (IBV_RX_HASH_SRC_IPV4 |
174                                 IBV_RX_HASH_DST_IPV4),
175                 .dpdk_rss_hf = (ETH_RSS_IPV4 |
176                                 ETH_RSS_FRAG_IPV4),
177                 .flow_priority = 1,
178                 .ip_version = MLX5_IPV4,
179         },
180         [HASH_RXQ_TCPV6] = {
181                 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
182                                 IBV_RX_HASH_DST_IPV6 |
183                                 IBV_RX_HASH_SRC_PORT_TCP |
184                                 IBV_RX_HASH_DST_PORT_TCP),
185                 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_TCP,
186                 .flow_priority = 0,
187                 .ip_version = MLX5_IPV6,
188         },
189         [HASH_RXQ_UDPV6] = {
190                 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
191                                 IBV_RX_HASH_DST_IPV6 |
192                                 IBV_RX_HASH_SRC_PORT_UDP |
193                                 IBV_RX_HASH_DST_PORT_UDP),
194                 .dpdk_rss_hf = ETH_RSS_NONFRAG_IPV6_UDP,
195                 .flow_priority = 0,
196                 .ip_version = MLX5_IPV6,
197         },
198         [HASH_RXQ_IPV6] = {
199                 .hash_fields = (IBV_RX_HASH_SRC_IPV6 |
200                                 IBV_RX_HASH_DST_IPV6),
201                 .dpdk_rss_hf = (ETH_RSS_IPV6 |
202                                 ETH_RSS_FRAG_IPV6),
203                 .flow_priority = 1,
204                 .ip_version = MLX5_IPV6,
205         },
206         [HASH_RXQ_ETH] = {
207                 .hash_fields = 0,
208                 .dpdk_rss_hf = 0,
209                 .flow_priority = 2,
210         },
211 };
212
213 /* Number of entries in hash_rxq_init[]. */
214 const unsigned int hash_rxq_init_n = RTE_DIM(hash_rxq_init);
215
216 /** Structure for holding counter stats. */
217 struct mlx5_flow_counter_stats {
218         uint64_t hits; /**< Number of packets matched by the rule. */
219         uint64_t bytes; /**< Number of bytes matched by the rule. */
220 };
221
222 /** Structure for Drop queue. */
223 struct mlx5_hrxq_drop {
224         struct ibv_rwq_ind_table *ind_table; /**< Indirection table. */
225         struct ibv_qp *qp; /**< Verbs queue pair. */
226         struct ibv_wq *wq; /**< Verbs work queue. */
227         struct ibv_cq *cq; /**< Verbs completion queue. */
228 };
229
230 /* Flows structures. */
231 struct mlx5_flow {
232         uint64_t hash_fields; /**< Fields that participate in the hash. */
233         struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
234         struct ibv_flow *ibv_flow; /**< Verbs flow. */
235         struct mlx5_hrxq *hrxq; /**< Hash Rx queues. */
236 };
237
238 /* Drop flows structures. */
239 struct mlx5_flow_drop {
240         struct ibv_flow_attr *ibv_attr; /**< Pointer to Verbs attributes. */
241         struct ibv_flow *ibv_flow; /**< Verbs flow. */
242 };
243
244 struct rte_flow {
245         TAILQ_ENTRY(rte_flow) next; /**< Pointer to the next flow structure. */
246         uint32_t mark:1; /**< Set if the flow is marked. */
247         uint32_t drop:1; /**< Drop queue. */
248         uint16_t queues_n; /**< Number of entries in queue[]. */
249         uint16_t (*queues)[]; /**< Queues indexes to use. */
250         struct rte_eth_rss_conf rss_conf; /**< RSS configuration */
251         uint8_t rss_key[40]; /**< copy of the RSS key. */
252         struct ibv_counter_set *cs; /**< Holds the counters for the rule. */
253         struct mlx5_flow_counter_stats counter_stats;/**<The counter stats. */
254         struct mlx5_flow frxq[RTE_DIM(hash_rxq_init)];
255         /**< Flow with Rx queue. */
256 };
257
258 /** Static initializer for items. */
259 #define ITEMS(...) \
260         (const enum rte_flow_item_type []){ \
261                 __VA_ARGS__, RTE_FLOW_ITEM_TYPE_END, \
262         }
263
264 /** Structure to generate a simple graph of layers supported by the NIC. */
265 struct mlx5_flow_items {
266         /** List of possible actions for these items. */
267         const enum rte_flow_action_type *const actions;
268         /** Bit-masks corresponding to the possibilities for the item. */
269         const void *mask;
270         /**
271          * Default bit-masks to use when item->mask is not provided. When
272          * \default_mask is also NULL, the full supported bit-mask (\mask) is
273          * used instead.
274          */
275         const void *default_mask;
276         /** Bit-masks size in bytes. */
277         const unsigned int mask_sz;
278         /**
279          * Conversion function from rte_flow to NIC specific flow.
280          *
281          * @param item
282          *   rte_flow item to convert.
283          * @param default_mask
284          *   Default bit-masks to use when item->mask is not provided.
285          * @param data
286          *   Internal structure to store the conversion.
287          *
288          * @return
289          *   0 on success, negative value otherwise.
290          */
291         int (*convert)(const struct rte_flow_item *item,
292                        const void *default_mask,
293                        void *data);
294         /** Size in bytes of the destination structure. */
295         const unsigned int dst_sz;
296         /** List of possible following items.  */
297         const enum rte_flow_item_type *const items;
298 };
299
300 /** Valid action for this PMD. */
301 static const enum rte_flow_action_type valid_actions[] = {
302         RTE_FLOW_ACTION_TYPE_DROP,
303         RTE_FLOW_ACTION_TYPE_QUEUE,
304         RTE_FLOW_ACTION_TYPE_MARK,
305         RTE_FLOW_ACTION_TYPE_FLAG,
306 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
307         RTE_FLOW_ACTION_TYPE_COUNT,
308 #endif
309         RTE_FLOW_ACTION_TYPE_END,
310 };
311
312 /** Graph of supported items and associated actions. */
313 static const struct mlx5_flow_items mlx5_flow_items[] = {
314         [RTE_FLOW_ITEM_TYPE_END] = {
315                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH,
316                                RTE_FLOW_ITEM_TYPE_VXLAN),
317         },
318         [RTE_FLOW_ITEM_TYPE_ETH] = {
319                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VLAN,
320                                RTE_FLOW_ITEM_TYPE_IPV4,
321                                RTE_FLOW_ITEM_TYPE_IPV6),
322                 .actions = valid_actions,
323                 .mask = &(const struct rte_flow_item_eth){
324                         .dst.addr_bytes = "\xff\xff\xff\xff\xff\xff",
325                         .src.addr_bytes = "\xff\xff\xff\xff\xff\xff",
326                         .type = -1,
327                 },
328                 .default_mask = &rte_flow_item_eth_mask,
329                 .mask_sz = sizeof(struct rte_flow_item_eth),
330                 .convert = mlx5_flow_create_eth,
331                 .dst_sz = sizeof(struct ibv_flow_spec_eth),
332         },
333         [RTE_FLOW_ITEM_TYPE_VLAN] = {
334                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_IPV4,
335                                RTE_FLOW_ITEM_TYPE_IPV6),
336                 .actions = valid_actions,
337                 .mask = &(const struct rte_flow_item_vlan){
338                         .tci = -1,
339                 },
340                 .default_mask = &rte_flow_item_vlan_mask,
341                 .mask_sz = sizeof(struct rte_flow_item_vlan),
342                 .convert = mlx5_flow_create_vlan,
343                 .dst_sz = 0,
344         },
345         [RTE_FLOW_ITEM_TYPE_IPV4] = {
346                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
347                                RTE_FLOW_ITEM_TYPE_TCP),
348                 .actions = valid_actions,
349                 .mask = &(const struct rte_flow_item_ipv4){
350                         .hdr = {
351                                 .src_addr = -1,
352                                 .dst_addr = -1,
353                                 .type_of_service = -1,
354                                 .next_proto_id = -1,
355                         },
356                 },
357                 .default_mask = &rte_flow_item_ipv4_mask,
358                 .mask_sz = sizeof(struct rte_flow_item_ipv4),
359                 .convert = mlx5_flow_create_ipv4,
360                 .dst_sz = sizeof(struct ibv_flow_spec_ipv4_ext),
361         },
362         [RTE_FLOW_ITEM_TYPE_IPV6] = {
363                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_UDP,
364                                RTE_FLOW_ITEM_TYPE_TCP),
365                 .actions = valid_actions,
366                 .mask = &(const struct rte_flow_item_ipv6){
367                         .hdr = {
368                                 .src_addr = {
369                                         0xff, 0xff, 0xff, 0xff,
370                                         0xff, 0xff, 0xff, 0xff,
371                                         0xff, 0xff, 0xff, 0xff,
372                                         0xff, 0xff, 0xff, 0xff,
373                                 },
374                                 .dst_addr = {
375                                         0xff, 0xff, 0xff, 0xff,
376                                         0xff, 0xff, 0xff, 0xff,
377                                         0xff, 0xff, 0xff, 0xff,
378                                         0xff, 0xff, 0xff, 0xff,
379                                 },
380                                 .vtc_flow = -1,
381                                 .proto = -1,
382                                 .hop_limits = -1,
383                         },
384                 },
385                 .default_mask = &rte_flow_item_ipv6_mask,
386                 .mask_sz = sizeof(struct rte_flow_item_ipv6),
387                 .convert = mlx5_flow_create_ipv6,
388                 .dst_sz = sizeof(struct ibv_flow_spec_ipv6),
389         },
390         [RTE_FLOW_ITEM_TYPE_UDP] = {
391                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_VXLAN),
392                 .actions = valid_actions,
393                 .mask = &(const struct rte_flow_item_udp){
394                         .hdr = {
395                                 .src_port = -1,
396                                 .dst_port = -1,
397                         },
398                 },
399                 .default_mask = &rte_flow_item_udp_mask,
400                 .mask_sz = sizeof(struct rte_flow_item_udp),
401                 .convert = mlx5_flow_create_udp,
402                 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
403         },
404         [RTE_FLOW_ITEM_TYPE_TCP] = {
405                 .actions = valid_actions,
406                 .mask = &(const struct rte_flow_item_tcp){
407                         .hdr = {
408                                 .src_port = -1,
409                                 .dst_port = -1,
410                         },
411                 },
412                 .default_mask = &rte_flow_item_tcp_mask,
413                 .mask_sz = sizeof(struct rte_flow_item_tcp),
414                 .convert = mlx5_flow_create_tcp,
415                 .dst_sz = sizeof(struct ibv_flow_spec_tcp_udp),
416         },
417         [RTE_FLOW_ITEM_TYPE_VXLAN] = {
418                 .items = ITEMS(RTE_FLOW_ITEM_TYPE_ETH),
419                 .actions = valid_actions,
420                 .mask = &(const struct rte_flow_item_vxlan){
421                         .vni = "\xff\xff\xff",
422                 },
423                 .default_mask = &rte_flow_item_vxlan_mask,
424                 .mask_sz = sizeof(struct rte_flow_item_vxlan),
425                 .convert = mlx5_flow_create_vxlan,
426                 .dst_sz = sizeof(struct ibv_flow_spec_tunnel),
427         },
428 };
429
430 /** Structure to pass to the conversion function. */
431 struct mlx5_flow_parse {
432         uint32_t inner; /**< Set once VXLAN is encountered. */
433         uint32_t allmulti:1; /**< Set once allmulti dst MAC is encountered. */
434         uint32_t create:1;
435         /**< Whether resources should remain after a validate. */
436         uint32_t drop:1; /**< Target is a drop queue. */
437         uint32_t mark:1; /**< Mark is present in the flow. */
438         uint32_t count:1; /**< Count is present in the flow. */
439         uint32_t mark_id; /**< Mark identifier. */
440         uint16_t queues[RTE_MAX_QUEUES_PER_PORT]; /**< Queues indexes to use. */
441         uint16_t queues_n; /**< Number of entries in queue[]. */
442         struct rte_eth_rss_conf rss_conf; /**< RSS configuration */
443         uint8_t rss_key[40]; /**< copy of the RSS key. */
444         enum hash_rxq_type layer; /**< Last pattern layer detected. */
445         struct ibv_counter_set *cs; /**< Holds the counter set for the rule */
446         struct {
447                 struct ibv_flow_attr *ibv_attr;
448                 /**< Pointer to Verbs attributes. */
449                 unsigned int offset;
450                 /**< Current position or total size of the attribute. */
451         } queue[RTE_DIM(hash_rxq_init)];
452 };
453
454 static const struct rte_flow_ops mlx5_flow_ops = {
455         .validate = mlx5_flow_validate,
456         .create = mlx5_flow_create,
457         .destroy = mlx5_flow_destroy,
458         .flush = mlx5_flow_flush,
459 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
460         .query = mlx5_flow_query,
461 #else
462         .query = NULL,
463 #endif
464         .isolate = mlx5_flow_isolate,
465 };
466
467 /* Convert FDIR request to Generic flow. */
468 struct mlx5_fdir {
469         struct rte_flow_attr attr;
470         struct rte_flow_action actions[2];
471         struct rte_flow_item items[4];
472         struct rte_flow_item_eth l2;
473         struct rte_flow_item_eth l2_mask;
474         union {
475                 struct rte_flow_item_ipv4 ipv4;
476                 struct rte_flow_item_ipv6 ipv6;
477         } l3;
478         union {
479                 struct rte_flow_item_udp udp;
480                 struct rte_flow_item_tcp tcp;
481         } l4;
482         struct rte_flow_action_queue queue;
483 };
484
485 /* Verbs specification header. */
486 struct ibv_spec_header {
487         enum ibv_flow_spec_type type;
488         uint16_t size;
489 };
490
491 /**
492  * Check support for a given item.
493  *
494  * @param item[in]
495  *   Item specification.
496  * @param mask[in]
497  *   Bit-masks covering supported fields to compare with spec, last and mask in
498  *   \item.
499  * @param size
500  *   Bit-Mask size in bytes.
501  *
502  * @return
503  *   0 on success.
504  */
505 static int
506 mlx5_flow_item_validate(const struct rte_flow_item *item,
507                         const uint8_t *mask, unsigned int size)
508 {
509         int ret = 0;
510
511         if (!item->spec && (item->mask || item->last))
512                 return -1;
513         if (item->spec && !item->mask) {
514                 unsigned int i;
515                 const uint8_t *spec = item->spec;
516
517                 for (i = 0; i < size; ++i)
518                         if ((spec[i] | mask[i]) != mask[i])
519                                 return -1;
520         }
521         if (item->last && !item->mask) {
522                 unsigned int i;
523                 const uint8_t *spec = item->last;
524
525                 for (i = 0; i < size; ++i)
526                         if ((spec[i] | mask[i]) != mask[i])
527                                 return -1;
528         }
529         if (item->mask) {
530                 unsigned int i;
531                 const uint8_t *spec = item->mask;
532
533                 for (i = 0; i < size; ++i)
534                         if ((spec[i] | mask[i]) != mask[i])
535                                 return -1;
536         }
537         if (item->spec && item->last) {
538                 uint8_t spec[size];
539                 uint8_t last[size];
540                 const uint8_t *apply = mask;
541                 unsigned int i;
542
543                 if (item->mask)
544                         apply = item->mask;
545                 for (i = 0; i < size; ++i) {
546                         spec[i] = ((const uint8_t *)item->spec)[i] & apply[i];
547                         last[i] = ((const uint8_t *)item->last)[i] & apply[i];
548                 }
549                 ret = memcmp(spec, last, size);
550         }
551         return ret;
552 }
553
554 /**
555  * Copy the RSS configuration from the user ones.
556  *
557  * @param priv
558  *   Pointer to private structure.
559  * @param parser
560  *   Internal parser structure.
561  * @param rss_conf
562  *   User RSS configuration to save.
563  *
564  * @return
565  *   0 on success, errno value on failure.
566  */
567 static int
568 priv_flow_convert_rss_conf(struct priv *priv,
569                            struct mlx5_flow_parse *parser,
570                            const struct rte_eth_rss_conf *rss_conf)
571 {
572         const struct rte_eth_rss_conf *rss;
573
574         if (rss_conf) {
575                 if (rss_conf->rss_hf & MLX5_RSS_HF_MASK)
576                         return EINVAL;
577                 rss = rss_conf;
578         } else {
579                 rss = &priv->rss_conf;
580         }
581         if (rss->rss_key_len > 40)
582                 return EINVAL;
583         parser->rss_conf.rss_key_len = rss->rss_key_len;
584         parser->rss_conf.rss_hf = rss->rss_hf;
585         memcpy(parser->rss_key, rss->rss_key, rss->rss_key_len);
586         parser->rss_conf.rss_key = parser->rss_key;
587         return 0;
588 }
589
590 /**
591  * Extract attribute to the parser.
592  *
593  * @param priv
594  *   Pointer to private structure.
595  * @param[in] attr
596  *   Flow rule attributes.
597  * @param[out] error
598  *   Perform verbose error reporting if not NULL.
599  * @param[in, out] parser
600  *   Internal parser structure.
601  *
602  * @return
603  *   0 on success, a negative errno value otherwise and rte_errno is set.
604  */
605 static int
606 priv_flow_convert_attributes(struct priv *priv,
607                              const struct rte_flow_attr *attr,
608                              struct rte_flow_error *error,
609                              struct mlx5_flow_parse *parser)
610 {
611         (void)priv;
612         (void)parser;
613         if (attr->group) {
614                 rte_flow_error_set(error, ENOTSUP,
615                                    RTE_FLOW_ERROR_TYPE_ATTR_GROUP,
616                                    NULL,
617                                    "groups are not supported");
618                 return -rte_errno;
619         }
620         if (attr->priority && attr->priority != MLX5_CTRL_FLOW_PRIORITY) {
621                 rte_flow_error_set(error, ENOTSUP,
622                                    RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY,
623                                    NULL,
624                                    "priorities are not supported");
625                 return -rte_errno;
626         }
627         if (attr->egress) {
628                 rte_flow_error_set(error, ENOTSUP,
629                                    RTE_FLOW_ERROR_TYPE_ATTR_EGRESS,
630                                    NULL,
631                                    "egress is not supported");
632                 return -rte_errno;
633         }
634         if (!attr->ingress) {
635                 rte_flow_error_set(error, ENOTSUP,
636                                    RTE_FLOW_ERROR_TYPE_ATTR_INGRESS,
637                                    NULL,
638                                    "only ingress is supported");
639                 return -rte_errno;
640         }
641         return 0;
642 }
643
644 /**
645  * Extract actions request to the parser.
646  *
647  * @param priv
648  *   Pointer to private structure.
649  * @param[in] actions
650  *   Associated actions (list terminated by the END action).
651  * @param[out] error
652  *   Perform verbose error reporting if not NULL.
653  * @param[in, out] parser
654  *   Internal parser structure.
655  *
656  * @return
657  *   0 on success, a negative errno value otherwise and rte_errno is set.
658  */
659 static int
660 priv_flow_convert_actions(struct priv *priv,
661                           const struct rte_flow_action actions[],
662                           struct rte_flow_error *error,
663                           struct mlx5_flow_parse *parser)
664 {
665         /*
666          * Add default RSS configuration necessary for Verbs to create QP even
667          * if no RSS is necessary.
668          */
669         priv_flow_convert_rss_conf(priv, parser,
670                                    (const struct rte_eth_rss_conf *)
671                                    &priv->rss_conf);
672         for (; actions->type != RTE_FLOW_ACTION_TYPE_END; ++actions) {
673                 if (actions->type == RTE_FLOW_ACTION_TYPE_VOID) {
674                         continue;
675                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
676                         parser->drop = 1;
677                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_QUEUE) {
678                         const struct rte_flow_action_queue *queue =
679                                 (const struct rte_flow_action_queue *)
680                                 actions->conf;
681                         uint16_t n;
682                         uint16_t found = 0;
683
684                         if (!queue || (queue->index > (priv->rxqs_n - 1)))
685                                 goto exit_action_not_supported;
686                         for (n = 0; n < parser->queues_n; ++n) {
687                                 if (parser->queues[n] == queue->index) {
688                                         found = 1;
689                                         break;
690                                 }
691                         }
692                         if (parser->queues_n > 1 && !found) {
693                                 rte_flow_error_set(error, ENOTSUP,
694                                            RTE_FLOW_ERROR_TYPE_ACTION,
695                                            actions,
696                                            "queue action not in RSS queues");
697                                 return -rte_errno;
698                         }
699                         if (!found) {
700                                 parser->queues_n = 1;
701                                 parser->queues[0] = queue->index;
702                         }
703                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_RSS) {
704                         const struct rte_flow_action_rss *rss =
705                                 (const struct rte_flow_action_rss *)
706                                 actions->conf;
707                         uint16_t n;
708
709                         if (!rss || !rss->num) {
710                                 rte_flow_error_set(error, EINVAL,
711                                                    RTE_FLOW_ERROR_TYPE_ACTION,
712                                                    actions,
713                                                    "no valid queues");
714                                 return -rte_errno;
715                         }
716                         if (parser->queues_n == 1) {
717                                 uint16_t found = 0;
718
719                                 assert(parser->queues_n);
720                                 for (n = 0; n < rss->num; ++n) {
721                                         if (parser->queues[0] ==
722                                             rss->queue[n]) {
723                                                 found = 1;
724                                                 break;
725                                         }
726                                 }
727                                 if (!found) {
728                                         rte_flow_error_set(error, ENOTSUP,
729                                                    RTE_FLOW_ERROR_TYPE_ACTION,
730                                                    actions,
731                                                    "queue action not in RSS"
732                                                    " queues");
733                                         return -rte_errno;
734                                 }
735                         }
736                         for (n = 0; n < rss->num; ++n) {
737                                 if (rss->queue[n] >= priv->rxqs_n) {
738                                         rte_flow_error_set(error, EINVAL,
739                                                    RTE_FLOW_ERROR_TYPE_ACTION,
740                                                    actions,
741                                                    "queue id > number of"
742                                                    " queues");
743                                         return -rte_errno;
744                                 }
745                         }
746                         for (n = 0; n < rss->num; ++n)
747                                 parser->queues[n] = rss->queue[n];
748                         parser->queues_n = rss->num;
749                         if (priv_flow_convert_rss_conf(priv, parser,
750                                                        rss->rss_conf)) {
751                                 rte_flow_error_set(error, EINVAL,
752                                                    RTE_FLOW_ERROR_TYPE_ACTION,
753                                                    actions,
754                                                    "wrong RSS configuration");
755                                 return -rte_errno;
756                         }
757                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_MARK) {
758                         const struct rte_flow_action_mark *mark =
759                                 (const struct rte_flow_action_mark *)
760                                 actions->conf;
761
762                         if (!mark) {
763                                 rte_flow_error_set(error, EINVAL,
764                                                    RTE_FLOW_ERROR_TYPE_ACTION,
765                                                    actions,
766                                                    "mark must be defined");
767                                 return -rte_errno;
768                         } else if (mark->id >= MLX5_FLOW_MARK_MAX) {
769                                 rte_flow_error_set(error, ENOTSUP,
770                                                    RTE_FLOW_ERROR_TYPE_ACTION,
771                                                    actions,
772                                                    "mark must be between 0"
773                                                    " and 16777199");
774                                 return -rte_errno;
775                         }
776                         parser->mark = 1;
777                         parser->mark_id = mark->id;
778                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_FLAG) {
779                         parser->mark = 1;
780                 } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT &&
781                            priv->config.counter_set_supported) {
782                         parser->count = 1;
783                 } else {
784                         goto exit_action_not_supported;
785                 }
786         }
787         if (parser->drop && parser->mark)
788                 parser->mark = 0;
789         if (!parser->queues_n && !parser->drop) {
790                 rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_HANDLE,
791                                    NULL, "no valid action");
792                 return -rte_errno;
793         }
794         return 0;
795 exit_action_not_supported:
796         rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ACTION,
797                            actions, "action not supported");
798         return -rte_errno;
799 }
800
801 /**
802  * Validate items.
803  *
804  * @param priv
805  *   Pointer to private structure.
806  * @param[in] items
807  *   Pattern specification (list terminated by the END pattern item).
808  * @param[out] error
809  *   Perform verbose error reporting if not NULL.
810  * @param[in, out] parser
811  *   Internal parser structure.
812  *
813  * @return
814  *   0 on success, a negative errno value otherwise and rte_errno is set.
815  */
816 static int
817 priv_flow_convert_items_validate(struct priv *priv,
818                                  const struct rte_flow_item items[],
819                                  struct rte_flow_error *error,
820                                  struct mlx5_flow_parse *parser)
821 {
822         const struct mlx5_flow_items *cur_item = mlx5_flow_items;
823         unsigned int i;
824
825         (void)priv;
826         /* Initialise the offsets to start after verbs attribute. */
827         for (i = 0; i != hash_rxq_init_n; ++i)
828                 parser->queue[i].offset = sizeof(struct ibv_flow_attr);
829         for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
830                 const struct mlx5_flow_items *token = NULL;
831                 unsigned int n;
832                 int err;
833
834                 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
835                         continue;
836                 for (i = 0;
837                      cur_item->items &&
838                      cur_item->items[i] != RTE_FLOW_ITEM_TYPE_END;
839                      ++i) {
840                         if (cur_item->items[i] == items->type) {
841                                 token = &mlx5_flow_items[items->type];
842                                 break;
843                         }
844                 }
845                 if (!token)
846                         goto exit_item_not_supported;
847                 cur_item = token;
848                 err = mlx5_flow_item_validate(items,
849                                               (const uint8_t *)cur_item->mask,
850                                               cur_item->mask_sz);
851                 if (err)
852                         goto exit_item_not_supported;
853                 if (items->type == RTE_FLOW_ITEM_TYPE_VXLAN) {
854                         if (parser->inner) {
855                                 rte_flow_error_set(error, ENOTSUP,
856                                                    RTE_FLOW_ERROR_TYPE_ITEM,
857                                                    items,
858                                                    "cannot recognize multiple"
859                                                    " VXLAN encapsulations");
860                                 return -rte_errno;
861                         }
862                         parser->inner = IBV_FLOW_SPEC_INNER;
863                 }
864                 if (parser->drop || parser->queues_n == 1) {
865                         parser->queue[HASH_RXQ_ETH].offset += cur_item->dst_sz;
866                 } else {
867                         for (n = 0; n != hash_rxq_init_n; ++n)
868                                 parser->queue[n].offset += cur_item->dst_sz;
869                 }
870         }
871         if (parser->mark) {
872                 for (i = 0; i != hash_rxq_init_n; ++i)
873                         parser->queue[i].offset +=
874                                 sizeof(struct ibv_flow_spec_action_tag);
875         }
876         if (parser->count) {
877                 unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
878
879                 for (i = 0; i != hash_rxq_init_n; ++i)
880                         parser->queue[i].offset += size;
881         }
882         return 0;
883 exit_item_not_supported:
884         rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_ITEM,
885                            items, "item not supported");
886         return -rte_errno;
887 }
888
889 /**
890  * Allocate memory space to store verbs flow attributes.
891  *
892  * @param priv
893  *   Pointer to private structure.
894  * @param[in] priority
895  *   Flow priority.
896  * @param[in] size
897  *   Amount of byte to allocate.
898  * @param[out] error
899  *   Perform verbose error reporting if not NULL.
900  *
901  * @return
902  *   A verbs flow attribute on success, NULL otherwise.
903  */
904 static struct ibv_flow_attr*
905 priv_flow_convert_allocate(struct priv *priv,
906                            unsigned int priority,
907                            unsigned int size,
908                            struct rte_flow_error *error)
909 {
910         struct ibv_flow_attr *ibv_attr;
911
912         (void)priv;
913         ibv_attr = rte_calloc(__func__, 1, size, 0);
914         if (!ibv_attr) {
915                 rte_flow_error_set(error, ENOMEM,
916                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
917                                    NULL,
918                                    "cannot allocate verbs spec attributes.");
919                 return NULL;
920         }
921         ibv_attr->priority = priority;
922         return ibv_attr;
923 }
924
925 /**
926  * Finalise verbs flow attributes.
927  *
928  * @param priv
929  *   Pointer to private structure.
930  * @param[in, out] parser
931  *   Internal parser structure.
932  */
933 static void
934 priv_flow_convert_finalise(struct priv *priv, struct mlx5_flow_parse *parser)
935 {
936         const unsigned int ipv4 =
937                 hash_rxq_init[parser->layer].ip_version == MLX5_IPV4;
938         const enum hash_rxq_type hmin = ipv4 ? HASH_RXQ_TCPV4 : HASH_RXQ_TCPV6;
939         const enum hash_rxq_type hmax = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
940         const enum hash_rxq_type ohmin = ipv4 ? HASH_RXQ_TCPV6 : HASH_RXQ_TCPV4;
941         const enum hash_rxq_type ohmax = ipv4 ? HASH_RXQ_IPV6 : HASH_RXQ_IPV4;
942         const enum hash_rxq_type ip = ipv4 ? HASH_RXQ_IPV4 : HASH_RXQ_IPV6;
943         unsigned int i;
944
945         (void)priv;
946         if (parser->layer == HASH_RXQ_ETH) {
947                 goto fill;
948         } else {
949                 /*
950                  * This layer becomes useless as the pattern define under
951                  * layers.
952                  */
953                 rte_free(parser->queue[HASH_RXQ_ETH].ibv_attr);
954                 parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
955         }
956         /* Remove opposite kind of layer e.g. IPv6 if the pattern is IPv4. */
957         for (i = ohmin; i != (ohmax + 1); ++i) {
958                 if (!parser->queue[i].ibv_attr)
959                         continue;
960                 rte_free(parser->queue[i].ibv_attr);
961                 parser->queue[i].ibv_attr = NULL;
962         }
963         /* Remove impossible flow according to the RSS configuration. */
964         if (hash_rxq_init[parser->layer].dpdk_rss_hf &
965             parser->rss_conf.rss_hf) {
966                 /* Remove any other flow. */
967                 for (i = hmin; i != (hmax + 1); ++i) {
968                         if ((i == parser->layer) ||
969                              (!parser->queue[i].ibv_attr))
970                                 continue;
971                         rte_free(parser->queue[i].ibv_attr);
972                         parser->queue[i].ibv_attr = NULL;
973                 }
974         } else  if (!parser->queue[ip].ibv_attr) {
975                 /* no RSS possible with the current configuration. */
976                 parser->queues_n = 1;
977                 return;
978         }
979 fill:
980         /*
981          * Fill missing layers in verbs specifications, or compute the correct
982          * offset to allocate the memory space for the attributes and
983          * specifications.
984          */
985         for (i = 0; i != hash_rxq_init_n - 1; ++i) {
986                 union {
987                         struct ibv_flow_spec_ipv4_ext ipv4;
988                         struct ibv_flow_spec_ipv6 ipv6;
989                         struct ibv_flow_spec_tcp_udp udp_tcp;
990                 } specs;
991                 void *dst;
992                 uint16_t size;
993
994                 if (i == parser->layer)
995                         continue;
996                 if (parser->layer == HASH_RXQ_ETH) {
997                         if (hash_rxq_init[i].ip_version == MLX5_IPV4) {
998                                 size = sizeof(struct ibv_flow_spec_ipv4_ext);
999                                 specs.ipv4 = (struct ibv_flow_spec_ipv4_ext){
1000                                         .type = IBV_FLOW_SPEC_IPV4_EXT,
1001                                         .size = size,
1002                                 };
1003                         } else {
1004                                 size = sizeof(struct ibv_flow_spec_ipv6);
1005                                 specs.ipv6 = (struct ibv_flow_spec_ipv6){
1006                                         .type = IBV_FLOW_SPEC_IPV6,
1007                                         .size = size,
1008                                 };
1009                         }
1010                         if (parser->queue[i].ibv_attr) {
1011                                 dst = (void *)((uintptr_t)
1012                                                parser->queue[i].ibv_attr +
1013                                                parser->queue[i].offset);
1014                                 memcpy(dst, &specs, size);
1015                                 ++parser->queue[i].ibv_attr->num_of_specs;
1016                         }
1017                         parser->queue[i].offset += size;
1018                 }
1019                 if ((i == HASH_RXQ_UDPV4) || (i == HASH_RXQ_TCPV4) ||
1020                     (i == HASH_RXQ_UDPV6) || (i == HASH_RXQ_TCPV6)) {
1021                         size = sizeof(struct ibv_flow_spec_tcp_udp);
1022                         specs.udp_tcp = (struct ibv_flow_spec_tcp_udp) {
1023                                 .type = ((i == HASH_RXQ_UDPV4 ||
1024                                           i == HASH_RXQ_UDPV6) ?
1025                                          IBV_FLOW_SPEC_UDP :
1026                                          IBV_FLOW_SPEC_TCP),
1027                                 .size = size,
1028                         };
1029                         if (parser->queue[i].ibv_attr) {
1030                                 dst = (void *)((uintptr_t)
1031                                                parser->queue[i].ibv_attr +
1032                                                parser->queue[i].offset);
1033                                 memcpy(dst, &specs, size);
1034                                 ++parser->queue[i].ibv_attr->num_of_specs;
1035                         }
1036                         parser->queue[i].offset += size;
1037                 }
1038         }
1039 }
1040
1041 /**
1042  * Validate and convert a flow supported by the NIC.
1043  *
1044  * @param priv
1045  *   Pointer to private structure.
1046  * @param[in] attr
1047  *   Flow rule attributes.
1048  * @param[in] pattern
1049  *   Pattern specification (list terminated by the END pattern item).
1050  * @param[in] actions
1051  *   Associated actions (list terminated by the END action).
1052  * @param[out] error
1053  *   Perform verbose error reporting if not NULL.
1054  * @param[in, out] parser
1055  *   Internal parser structure.
1056  *
1057  * @return
1058  *   0 on success, a negative errno value otherwise and rte_errno is set.
1059  */
1060 static int
1061 priv_flow_convert(struct priv *priv,
1062                   const struct rte_flow_attr *attr,
1063                   const struct rte_flow_item items[],
1064                   const struct rte_flow_action actions[],
1065                   struct rte_flow_error *error,
1066                   struct mlx5_flow_parse *parser)
1067 {
1068         const struct mlx5_flow_items *cur_item = mlx5_flow_items;
1069         unsigned int i;
1070         int ret;
1071
1072         /* First step. Validate the attributes, items and actions. */
1073         *parser = (struct mlx5_flow_parse){
1074                 .create = parser->create,
1075                 .layer = HASH_RXQ_ETH,
1076                 .mark_id = MLX5_FLOW_MARK_DEFAULT,
1077         };
1078         ret = priv_flow_convert_attributes(priv, attr, error, parser);
1079         if (ret)
1080                 return ret;
1081         ret = priv_flow_convert_actions(priv, actions, error, parser);
1082         if (ret)
1083                 return ret;
1084         ret = priv_flow_convert_items_validate(priv, items, error, parser);
1085         if (ret)
1086                 return ret;
1087         priv_flow_convert_finalise(priv, parser);
1088         /*
1089          * Second step.
1090          * Allocate the memory space to store verbs specifications.
1091          */
1092         if (parser->drop || parser->queues_n == 1) {
1093                 unsigned int priority =
1094                         attr->priority +
1095                         hash_rxq_init[HASH_RXQ_ETH].flow_priority;
1096                 unsigned int offset = parser->queue[HASH_RXQ_ETH].offset;
1097
1098                 parser->queue[HASH_RXQ_ETH].ibv_attr =
1099                         priv_flow_convert_allocate(priv, priority,
1100                                                    offset, error);
1101                 if (!parser->queue[HASH_RXQ_ETH].ibv_attr)
1102                         return ENOMEM;
1103                 parser->queue[HASH_RXQ_ETH].offset =
1104                         sizeof(struct ibv_flow_attr);
1105         } else {
1106                 for (i = 0; i != hash_rxq_init_n; ++i) {
1107                         unsigned int priority =
1108                                 attr->priority +
1109                                 hash_rxq_init[i].flow_priority;
1110                         unsigned int offset;
1111
1112                         if (!(parser->rss_conf.rss_hf &
1113                               hash_rxq_init[i].dpdk_rss_hf) &&
1114                             (i != HASH_RXQ_ETH))
1115                                 continue;
1116                         offset = parser->queue[i].offset;
1117                         parser->queue[i].ibv_attr =
1118                                 priv_flow_convert_allocate(priv, priority,
1119                                                            offset, error);
1120                         if (!parser->queue[i].ibv_attr)
1121                                 goto exit_enomem;
1122                         parser->queue[i].offset = sizeof(struct ibv_flow_attr);
1123                 }
1124         }
1125         /* Third step. Conversion parse, fill the specifications. */
1126         parser->inner = 0;
1127         for (; items->type != RTE_FLOW_ITEM_TYPE_END; ++items) {
1128                 if (items->type == RTE_FLOW_ITEM_TYPE_VOID)
1129                         continue;
1130                 cur_item = &mlx5_flow_items[items->type];
1131                 ret = cur_item->convert(items,
1132                                         (cur_item->default_mask ?
1133                                          cur_item->default_mask :
1134                                          cur_item->mask),
1135                                         parser);
1136                 if (ret) {
1137                         rte_flow_error_set(error, ret,
1138                                            RTE_FLOW_ERROR_TYPE_ITEM,
1139                                            items, "item not supported");
1140                         goto exit_free;
1141                 }
1142         }
1143         if (parser->mark)
1144                 mlx5_flow_create_flag_mark(parser, parser->mark_id);
1145         if (parser->count && parser->create) {
1146                 mlx5_flow_create_count(priv, parser);
1147                 if (!parser->cs)
1148                         goto exit_count_error;
1149         }
1150         /*
1151          * Last step. Complete missing specification to reach the RSS
1152          * configuration.
1153          */
1154         if (parser->queues_n > 1) {
1155                 priv_flow_convert_finalise(priv, parser);
1156         } else {
1157                 /*
1158                  * Action queue have their priority overridden with
1159                  * Ethernet priority, this priority needs to be adjusted to
1160                  * their most specific layer priority.
1161                  */
1162                 parser->queue[HASH_RXQ_ETH].ibv_attr->priority =
1163                         attr->priority +
1164                         hash_rxq_init[parser->layer].flow_priority;
1165         }
1166 exit_free:
1167         /* Only verification is expected, all resources should be released. */
1168         if (!parser->create) {
1169                 for (i = 0; i != hash_rxq_init_n; ++i) {
1170                         if (parser->queue[i].ibv_attr) {
1171                                 rte_free(parser->queue[i].ibv_attr);
1172                                 parser->queue[i].ibv_attr = NULL;
1173                         }
1174                 }
1175         }
1176         if (parser->allmulti &&
1177             parser->layer == HASH_RXQ_ETH) {
1178                 for (i = 0; i != hash_rxq_init_n; ++i) {
1179                         if (!parser->queue[i].ibv_attr)
1180                                 continue;
1181                         if (parser->queue[i].ibv_attr->num_of_specs != 1)
1182                                 break;
1183                         parser->queue[i].ibv_attr->type =
1184                                                 IBV_FLOW_ATTR_MC_DEFAULT;
1185                 }
1186         }
1187         return ret;
1188 exit_enomem:
1189         for (i = 0; i != hash_rxq_init_n; ++i) {
1190                 if (parser->queue[i].ibv_attr) {
1191                         rte_free(parser->queue[i].ibv_attr);
1192                         parser->queue[i].ibv_attr = NULL;
1193                 }
1194         }
1195         rte_flow_error_set(error, ENOTSUP, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1196                            NULL, "cannot allocate verbs spec attributes.");
1197         return ret;
1198 exit_count_error:
1199         rte_flow_error_set(error, EINVAL, RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1200                            NULL, "cannot create counter.");
1201         return rte_errno;
1202 }
1203
1204 /**
1205  * Copy the specification created into the flow.
1206  *
1207  * @param parser
1208  *   Internal parser structure.
1209  * @param src
1210  *   Create specification.
1211  * @param size
1212  *   Size in bytes of the specification to copy.
1213  */
1214 static void
1215 mlx5_flow_create_copy(struct mlx5_flow_parse *parser, void *src,
1216                       unsigned int size)
1217 {
1218         unsigned int i;
1219         void *dst;
1220
1221         for (i = 0; i != hash_rxq_init_n; ++i) {
1222                 if (!parser->queue[i].ibv_attr)
1223                         continue;
1224                 /* Specification must be the same l3 type or none. */
1225                 if (parser->layer == HASH_RXQ_ETH ||
1226                     (hash_rxq_init[parser->layer].ip_version ==
1227                      hash_rxq_init[i].ip_version) ||
1228                     (hash_rxq_init[i].ip_version == 0)) {
1229                         dst = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1230                                         parser->queue[i].offset);
1231                         memcpy(dst, src, size);
1232                         ++parser->queue[i].ibv_attr->num_of_specs;
1233                         parser->queue[i].offset += size;
1234                 }
1235         }
1236 }
1237
1238 /**
1239  * Convert Ethernet item to Verbs specification.
1240  *
1241  * @param item[in]
1242  *   Item specification.
1243  * @param default_mask[in]
1244  *   Default bit-masks to use when item->mask is not provided.
1245  * @param data[in, out]
1246  *   User structure.
1247  */
1248 static int
1249 mlx5_flow_create_eth(const struct rte_flow_item *item,
1250                      const void *default_mask,
1251                      void *data)
1252 {
1253         const struct rte_flow_item_eth *spec = item->spec;
1254         const struct rte_flow_item_eth *mask = item->mask;
1255         struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1256         const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1257         struct ibv_flow_spec_eth eth = {
1258                 .type = parser->inner | IBV_FLOW_SPEC_ETH,
1259                 .size = eth_size,
1260         };
1261
1262         /* Don't update layer for the inner pattern. */
1263         if (!parser->inner)
1264                 parser->layer = HASH_RXQ_ETH;
1265         if (spec) {
1266                 unsigned int i;
1267
1268                 if (!mask)
1269                         mask = default_mask;
1270                 memcpy(&eth.val.dst_mac, spec->dst.addr_bytes, ETHER_ADDR_LEN);
1271                 memcpy(&eth.val.src_mac, spec->src.addr_bytes, ETHER_ADDR_LEN);
1272                 eth.val.ether_type = spec->type;
1273                 memcpy(&eth.mask.dst_mac, mask->dst.addr_bytes, ETHER_ADDR_LEN);
1274                 memcpy(&eth.mask.src_mac, mask->src.addr_bytes, ETHER_ADDR_LEN);
1275                 eth.mask.ether_type = mask->type;
1276                 /* Remove unwanted bits from values. */
1277                 for (i = 0; i < ETHER_ADDR_LEN; ++i) {
1278                         eth.val.dst_mac[i] &= eth.mask.dst_mac[i];
1279                         eth.val.src_mac[i] &= eth.mask.src_mac[i];
1280                 }
1281                 eth.val.ether_type &= eth.mask.ether_type;
1282         }
1283         mlx5_flow_create_copy(parser, &eth, eth_size);
1284         parser->allmulti = eth.val.dst_mac[0] & 1;
1285         return 0;
1286 }
1287
1288 /**
1289  * Convert VLAN item to Verbs specification.
1290  *
1291  * @param item[in]
1292  *   Item specification.
1293  * @param default_mask[in]
1294  *   Default bit-masks to use when item->mask is not provided.
1295  * @param data[in, out]
1296  *   User structure.
1297  */
1298 static int
1299 mlx5_flow_create_vlan(const struct rte_flow_item *item,
1300                       const void *default_mask,
1301                       void *data)
1302 {
1303         const struct rte_flow_item_vlan *spec = item->spec;
1304         const struct rte_flow_item_vlan *mask = item->mask;
1305         struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1306         struct ibv_flow_spec_eth *eth;
1307         const unsigned int eth_size = sizeof(struct ibv_flow_spec_eth);
1308
1309         if (spec) {
1310                 unsigned int i;
1311                 if (!mask)
1312                         mask = default_mask;
1313
1314                 for (i = 0; i != hash_rxq_init_n; ++i) {
1315                         if (!parser->queue[i].ibv_attr)
1316                                 continue;
1317
1318                         eth = (void *)((uintptr_t)parser->queue[i].ibv_attr +
1319                                        parser->queue[i].offset - eth_size);
1320                         eth->val.vlan_tag = spec->tci;
1321                         eth->mask.vlan_tag = mask->tci;
1322                         eth->val.vlan_tag &= eth->mask.vlan_tag;
1323                 }
1324         }
1325         return 0;
1326 }
1327
1328 /**
1329  * Convert IPv4 item to Verbs specification.
1330  *
1331  * @param item[in]
1332  *   Item specification.
1333  * @param default_mask[in]
1334  *   Default bit-masks to use when item->mask is not provided.
1335  * @param data[in, out]
1336  *   User structure.
1337  */
1338 static int
1339 mlx5_flow_create_ipv4(const struct rte_flow_item *item,
1340                       const void *default_mask,
1341                       void *data)
1342 {
1343         const struct rte_flow_item_ipv4 *spec = item->spec;
1344         const struct rte_flow_item_ipv4 *mask = item->mask;
1345         struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1346         unsigned int ipv4_size = sizeof(struct ibv_flow_spec_ipv4_ext);
1347         struct ibv_flow_spec_ipv4_ext ipv4 = {
1348                 .type = parser->inner | IBV_FLOW_SPEC_IPV4_EXT,
1349                 .size = ipv4_size,
1350         };
1351
1352         /* Don't update layer for the inner pattern. */
1353         if (!parser->inner)
1354                 parser->layer = HASH_RXQ_IPV4;
1355         if (spec) {
1356                 if (!mask)
1357                         mask = default_mask;
1358                 ipv4.val = (struct ibv_flow_ipv4_ext_filter){
1359                         .src_ip = spec->hdr.src_addr,
1360                         .dst_ip = spec->hdr.dst_addr,
1361                         .proto = spec->hdr.next_proto_id,
1362                         .tos = spec->hdr.type_of_service,
1363                 };
1364                 ipv4.mask = (struct ibv_flow_ipv4_ext_filter){
1365                         .src_ip = mask->hdr.src_addr,
1366                         .dst_ip = mask->hdr.dst_addr,
1367                         .proto = mask->hdr.next_proto_id,
1368                         .tos = mask->hdr.type_of_service,
1369                 };
1370                 /* Remove unwanted bits from values. */
1371                 ipv4.val.src_ip &= ipv4.mask.src_ip;
1372                 ipv4.val.dst_ip &= ipv4.mask.dst_ip;
1373                 ipv4.val.proto &= ipv4.mask.proto;
1374                 ipv4.val.tos &= ipv4.mask.tos;
1375         }
1376         mlx5_flow_create_copy(parser, &ipv4, ipv4_size);
1377         return 0;
1378 }
1379
1380 /**
1381  * Convert IPv6 item to Verbs specification.
1382  *
1383  * @param item[in]
1384  *   Item specification.
1385  * @param default_mask[in]
1386  *   Default bit-masks to use when item->mask is not provided.
1387  * @param data[in, out]
1388  *   User structure.
1389  */
1390 static int
1391 mlx5_flow_create_ipv6(const struct rte_flow_item *item,
1392                       const void *default_mask,
1393                       void *data)
1394 {
1395         const struct rte_flow_item_ipv6 *spec = item->spec;
1396         const struct rte_flow_item_ipv6 *mask = item->mask;
1397         struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1398         unsigned int ipv6_size = sizeof(struct ibv_flow_spec_ipv6);
1399         struct ibv_flow_spec_ipv6 ipv6 = {
1400                 .type = parser->inner | IBV_FLOW_SPEC_IPV6,
1401                 .size = ipv6_size,
1402         };
1403
1404         /* Don't update layer for the inner pattern. */
1405         if (!parser->inner)
1406                 parser->layer = HASH_RXQ_IPV6;
1407         if (spec) {
1408                 unsigned int i;
1409
1410                 if (!mask)
1411                         mask = default_mask;
1412                 memcpy(&ipv6.val.src_ip, spec->hdr.src_addr,
1413                        RTE_DIM(ipv6.val.src_ip));
1414                 memcpy(&ipv6.val.dst_ip, spec->hdr.dst_addr,
1415                        RTE_DIM(ipv6.val.dst_ip));
1416                 memcpy(&ipv6.mask.src_ip, mask->hdr.src_addr,
1417                        RTE_DIM(ipv6.mask.src_ip));
1418                 memcpy(&ipv6.mask.dst_ip, mask->hdr.dst_addr,
1419                        RTE_DIM(ipv6.mask.dst_ip));
1420                 ipv6.mask.flow_label = mask->hdr.vtc_flow;
1421                 ipv6.mask.next_hdr = mask->hdr.proto;
1422                 ipv6.mask.hop_limit = mask->hdr.hop_limits;
1423                 /* Remove unwanted bits from values. */
1424                 for (i = 0; i < RTE_DIM(ipv6.val.src_ip); ++i) {
1425                         ipv6.val.src_ip[i] &= ipv6.mask.src_ip[i];
1426                         ipv6.val.dst_ip[i] &= ipv6.mask.dst_ip[i];
1427                 }
1428                 ipv6.val.flow_label &= ipv6.mask.flow_label;
1429                 ipv6.val.next_hdr &= ipv6.mask.next_hdr;
1430                 ipv6.val.hop_limit &= ipv6.mask.hop_limit;
1431         }
1432         mlx5_flow_create_copy(parser, &ipv6, ipv6_size);
1433         return 0;
1434 }
1435
1436 /**
1437  * Convert UDP item to Verbs specification.
1438  *
1439  * @param item[in]
1440  *   Item specification.
1441  * @param default_mask[in]
1442  *   Default bit-masks to use when item->mask is not provided.
1443  * @param data[in, out]
1444  *   User structure.
1445  */
1446 static int
1447 mlx5_flow_create_udp(const struct rte_flow_item *item,
1448                      const void *default_mask,
1449                      void *data)
1450 {
1451         const struct rte_flow_item_udp *spec = item->spec;
1452         const struct rte_flow_item_udp *mask = item->mask;
1453         struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1454         unsigned int udp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1455         struct ibv_flow_spec_tcp_udp udp = {
1456                 .type = parser->inner | IBV_FLOW_SPEC_UDP,
1457                 .size = udp_size,
1458         };
1459
1460         /* Don't update layer for the inner pattern. */
1461         if (!parser->inner) {
1462                 if (parser->layer == HASH_RXQ_IPV4)
1463                         parser->layer = HASH_RXQ_UDPV4;
1464                 else
1465                         parser->layer = HASH_RXQ_UDPV6;
1466         }
1467         if (spec) {
1468                 if (!mask)
1469                         mask = default_mask;
1470                 udp.val.dst_port = spec->hdr.dst_port;
1471                 udp.val.src_port = spec->hdr.src_port;
1472                 udp.mask.dst_port = mask->hdr.dst_port;
1473                 udp.mask.src_port = mask->hdr.src_port;
1474                 /* Remove unwanted bits from values. */
1475                 udp.val.src_port &= udp.mask.src_port;
1476                 udp.val.dst_port &= udp.mask.dst_port;
1477         }
1478         mlx5_flow_create_copy(parser, &udp, udp_size);
1479         return 0;
1480 }
1481
1482 /**
1483  * Convert TCP item to Verbs specification.
1484  *
1485  * @param item[in]
1486  *   Item specification.
1487  * @param default_mask[in]
1488  *   Default bit-masks to use when item->mask is not provided.
1489  * @param data[in, out]
1490  *   User structure.
1491  */
1492 static int
1493 mlx5_flow_create_tcp(const struct rte_flow_item *item,
1494                      const void *default_mask,
1495                      void *data)
1496 {
1497         const struct rte_flow_item_tcp *spec = item->spec;
1498         const struct rte_flow_item_tcp *mask = item->mask;
1499         struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1500         unsigned int tcp_size = sizeof(struct ibv_flow_spec_tcp_udp);
1501         struct ibv_flow_spec_tcp_udp tcp = {
1502                 .type = parser->inner | IBV_FLOW_SPEC_TCP,
1503                 .size = tcp_size,
1504         };
1505
1506         /* Don't update layer for the inner pattern. */
1507         if (!parser->inner) {
1508                 if (parser->layer == HASH_RXQ_IPV4)
1509                         parser->layer = HASH_RXQ_TCPV4;
1510                 else
1511                         parser->layer = HASH_RXQ_TCPV6;
1512         }
1513         if (spec) {
1514                 if (!mask)
1515                         mask = default_mask;
1516                 tcp.val.dst_port = spec->hdr.dst_port;
1517                 tcp.val.src_port = spec->hdr.src_port;
1518                 tcp.mask.dst_port = mask->hdr.dst_port;
1519                 tcp.mask.src_port = mask->hdr.src_port;
1520                 /* Remove unwanted bits from values. */
1521                 tcp.val.src_port &= tcp.mask.src_port;
1522                 tcp.val.dst_port &= tcp.mask.dst_port;
1523         }
1524         mlx5_flow_create_copy(parser, &tcp, tcp_size);
1525         return 0;
1526 }
1527
1528 /**
1529  * Convert VXLAN item to Verbs specification.
1530  *
1531  * @param item[in]
1532  *   Item specification.
1533  * @param default_mask[in]
1534  *   Default bit-masks to use when item->mask is not provided.
1535  * @param data[in, out]
1536  *   User structure.
1537  */
1538 static int
1539 mlx5_flow_create_vxlan(const struct rte_flow_item *item,
1540                        const void *default_mask,
1541                        void *data)
1542 {
1543         const struct rte_flow_item_vxlan *spec = item->spec;
1544         const struct rte_flow_item_vxlan *mask = item->mask;
1545         struct mlx5_flow_parse *parser = (struct mlx5_flow_parse *)data;
1546         unsigned int size = sizeof(struct ibv_flow_spec_tunnel);
1547         struct ibv_flow_spec_tunnel vxlan = {
1548                 .type = parser->inner | IBV_FLOW_SPEC_VXLAN_TUNNEL,
1549                 .size = size,
1550         };
1551         union vni {
1552                 uint32_t vlan_id;
1553                 uint8_t vni[4];
1554         } id;
1555
1556         id.vni[0] = 0;
1557         parser->inner = IBV_FLOW_SPEC_INNER;
1558         if (spec) {
1559                 if (!mask)
1560                         mask = default_mask;
1561                 memcpy(&id.vni[1], spec->vni, 3);
1562                 vxlan.val.tunnel_id = id.vlan_id;
1563                 memcpy(&id.vni[1], mask->vni, 3);
1564                 vxlan.mask.tunnel_id = id.vlan_id;
1565                 /* Remove unwanted bits from values. */
1566                 vxlan.val.tunnel_id &= vxlan.mask.tunnel_id;
1567         }
1568         /*
1569          * Tunnel id 0 is equivalent as not adding a VXLAN layer, if only this
1570          * layer is defined in the Verbs specification it is interpreted as
1571          * wildcard and all packets will match this rule, if it follows a full
1572          * stack layer (ex: eth / ipv4 / udp), all packets matching the layers
1573          * before will also match this rule.
1574          * To avoid such situation, VNI 0 is currently refused.
1575          */
1576         if (!vxlan.val.tunnel_id)
1577                 return EINVAL;
1578         mlx5_flow_create_copy(parser, &vxlan, size);
1579         return 0;
1580 }
1581
1582 /**
1583  * Convert mark/flag action to Verbs specification.
1584  *
1585  * @param parser
1586  *   Internal parser structure.
1587  * @param mark_id
1588  *   Mark identifier.
1589  */
1590 static int
1591 mlx5_flow_create_flag_mark(struct mlx5_flow_parse *parser, uint32_t mark_id)
1592 {
1593         unsigned int size = sizeof(struct ibv_flow_spec_action_tag);
1594         struct ibv_flow_spec_action_tag tag = {
1595                 .type = IBV_FLOW_SPEC_ACTION_TAG,
1596                 .size = size,
1597                 .tag_id = mlx5_flow_mark_set(mark_id),
1598         };
1599
1600         assert(parser->mark);
1601         mlx5_flow_create_copy(parser, &tag, size);
1602         return 0;
1603 }
1604
1605 /**
1606  * Convert count action to Verbs specification.
1607  *
1608  * @param priv
1609  *   Pointer to private structure.
1610  * @param parser
1611  *   Pointer to MLX5 flow parser structure.
1612  *
1613  * @return
1614  *   0 on success, errno value on failure.
1615  */
1616 static int
1617 mlx5_flow_create_count(struct priv *priv __rte_unused,
1618                        struct mlx5_flow_parse *parser __rte_unused)
1619 {
1620 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
1621         unsigned int size = sizeof(struct ibv_flow_spec_counter_action);
1622         struct ibv_counter_set_init_attr init_attr = {0};
1623         struct ibv_flow_spec_counter_action counter = {
1624                 .type = IBV_FLOW_SPEC_ACTION_COUNT,
1625                 .size = size,
1626                 .counter_set_handle = 0,
1627         };
1628
1629         init_attr.counter_set_id = 0;
1630         parser->cs = ibv_create_counter_set(priv->ctx, &init_attr);
1631         if (!parser->cs)
1632                 return EINVAL;
1633         counter.counter_set_handle = parser->cs->handle;
1634         mlx5_flow_create_copy(parser, &counter, size);
1635 #endif
1636         return 0;
1637 }
1638
1639 /**
1640  * Complete flow rule creation with a drop queue.
1641  *
1642  * @param priv
1643  *   Pointer to private structure.
1644  * @param parser
1645  *   Internal parser structure.
1646  * @param flow
1647  *   Pointer to the rte_flow.
1648  * @param[out] error
1649  *   Perform verbose error reporting if not NULL.
1650  *
1651  * @return
1652  *   0 on success, errno value on failure.
1653  */
1654 static int
1655 priv_flow_create_action_queue_drop(struct priv *priv,
1656                                    struct mlx5_flow_parse *parser,
1657                                    struct rte_flow *flow,
1658                                    struct rte_flow_error *error)
1659 {
1660         struct ibv_flow_spec_action_drop *drop;
1661         unsigned int size = sizeof(struct ibv_flow_spec_action_drop);
1662         int err = 0;
1663
1664         assert(priv->pd);
1665         assert(priv->ctx);
1666         flow->drop = 1;
1667         drop = (void *)((uintptr_t)parser->queue[HASH_RXQ_ETH].ibv_attr +
1668                         parser->queue[HASH_RXQ_ETH].offset);
1669         *drop = (struct ibv_flow_spec_action_drop){
1670                         .type = IBV_FLOW_SPEC_ACTION_DROP,
1671                         .size = size,
1672         };
1673         ++parser->queue[HASH_RXQ_ETH].ibv_attr->num_of_specs;
1674         parser->queue[HASH_RXQ_ETH].offset += size;
1675         flow->frxq[HASH_RXQ_ETH].ibv_attr =
1676                 parser->queue[HASH_RXQ_ETH].ibv_attr;
1677         if (parser->count)
1678                 flow->cs = parser->cs;
1679         if (!priv->dev->data->dev_started)
1680                 return 0;
1681         parser->queue[HASH_RXQ_ETH].ibv_attr = NULL;
1682         flow->frxq[HASH_RXQ_ETH].ibv_flow =
1683                 ibv_create_flow(priv->flow_drop_queue->qp,
1684                                 flow->frxq[HASH_RXQ_ETH].ibv_attr);
1685         if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1686                 rte_flow_error_set(error, ENOMEM, RTE_FLOW_ERROR_TYPE_HANDLE,
1687                                    NULL, "flow rule creation failure");
1688                 err = ENOMEM;
1689                 goto error;
1690         }
1691         return 0;
1692 error:
1693         assert(flow);
1694         if (flow->frxq[HASH_RXQ_ETH].ibv_flow) {
1695                 claim_zero(ibv_destroy_flow(flow->frxq[HASH_RXQ_ETH].ibv_flow));
1696                 flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
1697         }
1698         if (flow->frxq[HASH_RXQ_ETH].ibv_attr) {
1699                 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
1700                 flow->frxq[HASH_RXQ_ETH].ibv_attr = NULL;
1701         }
1702         if (flow->cs) {
1703                 claim_zero(ibv_destroy_counter_set(flow->cs));
1704                 flow->cs = NULL;
1705                 parser->cs = NULL;
1706         }
1707         return err;
1708 }
1709
1710 /**
1711  * Create hash Rx queues when RSS is enabled.
1712  *
1713  * @param priv
1714  *   Pointer to private structure.
1715  * @param parser
1716  *   Internal parser structure.
1717  * @param flow
1718  *   Pointer to the rte_flow.
1719  * @param[out] error
1720  *   Perform verbose error reporting if not NULL.
1721  *
1722  * @return
1723  *   0 on success, a errno value otherwise and rte_errno is set.
1724  */
1725 static int
1726 priv_flow_create_action_queue_rss(struct priv *priv,
1727                                   struct mlx5_flow_parse *parser,
1728                                   struct rte_flow *flow,
1729                                   struct rte_flow_error *error)
1730 {
1731         unsigned int i;
1732
1733         for (i = 0; i != hash_rxq_init_n; ++i) {
1734                 uint64_t hash_fields;
1735
1736                 if (!parser->queue[i].ibv_attr)
1737                         continue;
1738                 flow->frxq[i].ibv_attr = parser->queue[i].ibv_attr;
1739                 parser->queue[i].ibv_attr = NULL;
1740                 hash_fields = hash_rxq_init[i].hash_fields;
1741                 if (!priv->dev->data->dev_started)
1742                         continue;
1743                 flow->frxq[i].hrxq =
1744                         mlx5_priv_hrxq_get(priv,
1745                                            parser->rss_conf.rss_key,
1746                                            parser->rss_conf.rss_key_len,
1747                                            hash_fields,
1748                                            parser->queues,
1749                                            parser->queues_n);
1750                 if (flow->frxq[i].hrxq)
1751                         continue;
1752                 flow->frxq[i].hrxq =
1753                         mlx5_priv_hrxq_new(priv,
1754                                            parser->rss_conf.rss_key,
1755                                            parser->rss_conf.rss_key_len,
1756                                            hash_fields,
1757                                            parser->queues,
1758                                            parser->queues_n);
1759                 if (!flow->frxq[i].hrxq) {
1760                         rte_flow_error_set(error, ENOMEM,
1761                                            RTE_FLOW_ERROR_TYPE_HANDLE,
1762                                            NULL, "cannot create hash rxq");
1763                         return ENOMEM;
1764                 }
1765         }
1766         return 0;
1767 }
1768
1769 /**
1770  * Complete flow rule creation.
1771  *
1772  * @param priv
1773  *   Pointer to private structure.
1774  * @param parser
1775  *   Internal parser structure.
1776  * @param flow
1777  *   Pointer to the rte_flow.
1778  * @param[out] error
1779  *   Perform verbose error reporting if not NULL.
1780  *
1781  * @return
1782  *   0 on success, a errno value otherwise and rte_errno is set.
1783  */
1784 static int
1785 priv_flow_create_action_queue(struct priv *priv,
1786                               struct mlx5_flow_parse *parser,
1787                               struct rte_flow *flow,
1788                               struct rte_flow_error *error)
1789 {
1790         int err = 0;
1791         unsigned int i;
1792
1793         assert(priv->pd);
1794         assert(priv->ctx);
1795         assert(!parser->drop);
1796         err = priv_flow_create_action_queue_rss(priv, parser, flow, error);
1797         if (err)
1798                 goto error;
1799         if (parser->count)
1800                 flow->cs = parser->cs;
1801         if (!priv->dev->data->dev_started)
1802                 return 0;
1803         for (i = 0; i != hash_rxq_init_n; ++i) {
1804                 if (!flow->frxq[i].hrxq)
1805                         continue;
1806                 flow->frxq[i].ibv_flow =
1807                         ibv_create_flow(flow->frxq[i].hrxq->qp,
1808                                         flow->frxq[i].ibv_attr);
1809                 if (!flow->frxq[i].ibv_flow) {
1810                         rte_flow_error_set(error, ENOMEM,
1811                                            RTE_FLOW_ERROR_TYPE_HANDLE,
1812                                            NULL, "flow rule creation failure");
1813                         err = ENOMEM;
1814                         goto error;
1815                 }
1816                 DEBUG("%p type %d QP %p ibv_flow %p",
1817                       (void *)flow, i,
1818                       (void *)flow->frxq[i].hrxq,
1819                       (void *)flow->frxq[i].ibv_flow);
1820         }
1821         for (i = 0; i != parser->queues_n; ++i) {
1822                 struct mlx5_rxq_data *q =
1823                         (*priv->rxqs)[parser->queues[i]];
1824
1825                 q->mark |= parser->mark;
1826         }
1827         return 0;
1828 error:
1829         assert(flow);
1830         for (i = 0; i != hash_rxq_init_n; ++i) {
1831                 if (flow->frxq[i].ibv_flow) {
1832                         struct ibv_flow *ibv_flow = flow->frxq[i].ibv_flow;
1833
1834                         claim_zero(ibv_destroy_flow(ibv_flow));
1835                 }
1836                 if (flow->frxq[i].hrxq)
1837                         mlx5_priv_hrxq_release(priv, flow->frxq[i].hrxq);
1838                 if (flow->frxq[i].ibv_attr)
1839                         rte_free(flow->frxq[i].ibv_attr);
1840         }
1841         if (flow->cs) {
1842                 claim_zero(ibv_destroy_counter_set(flow->cs));
1843                 flow->cs = NULL;
1844                 parser->cs = NULL;
1845         }
1846         return err;
1847 }
1848
1849 /**
1850  * Convert a flow.
1851  *
1852  * @param priv
1853  *   Pointer to private structure.
1854  * @param list
1855  *   Pointer to a TAILQ flow list.
1856  * @param[in] attr
1857  *   Flow rule attributes.
1858  * @param[in] pattern
1859  *   Pattern specification (list terminated by the END pattern item).
1860  * @param[in] actions
1861  *   Associated actions (list terminated by the END action).
1862  * @param[out] error
1863  *   Perform verbose error reporting if not NULL.
1864  *
1865  * @return
1866  *   A flow on success, NULL otherwise.
1867  */
1868 static struct rte_flow *
1869 priv_flow_create(struct priv *priv,
1870                  struct mlx5_flows *list,
1871                  const struct rte_flow_attr *attr,
1872                  const struct rte_flow_item items[],
1873                  const struct rte_flow_action actions[],
1874                  struct rte_flow_error *error)
1875 {
1876         struct mlx5_flow_parse parser = { .create = 1, };
1877         struct rte_flow *flow = NULL;
1878         unsigned int i;
1879         int err;
1880
1881         err = priv_flow_convert(priv, attr, items, actions, error, &parser);
1882         if (err)
1883                 goto exit;
1884         flow = rte_calloc(__func__, 1,
1885                           sizeof(*flow) + parser.queues_n * sizeof(uint16_t),
1886                           0);
1887         if (!flow) {
1888                 rte_flow_error_set(error, ENOMEM,
1889                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
1890                                    NULL,
1891                                    "cannot allocate flow memory");
1892                 return NULL;
1893         }
1894         /* Copy queues configuration. */
1895         flow->queues = (uint16_t (*)[])(flow + 1);
1896         memcpy(flow->queues, parser.queues, parser.queues_n * sizeof(uint16_t));
1897         flow->queues_n = parser.queues_n;
1898         flow->mark = parser.mark;
1899         /* Copy RSS configuration. */
1900         flow->rss_conf = parser.rss_conf;
1901         flow->rss_conf.rss_key = flow->rss_key;
1902         memcpy(flow->rss_key, parser.rss_key, parser.rss_conf.rss_key_len);
1903         /* finalise the flow. */
1904         if (parser.drop)
1905                 err = priv_flow_create_action_queue_drop(priv, &parser, flow,
1906                                                          error);
1907         else
1908                 err = priv_flow_create_action_queue(priv, &parser, flow, error);
1909         if (err)
1910                 goto exit;
1911         TAILQ_INSERT_TAIL(list, flow, next);
1912         DEBUG("Flow created %p", (void *)flow);
1913         return flow;
1914 exit:
1915         for (i = 0; i != hash_rxq_init_n; ++i) {
1916                 if (parser.queue[i].ibv_attr)
1917                         rte_free(parser.queue[i].ibv_attr);
1918         }
1919         rte_free(flow);
1920         return NULL;
1921 }
1922
1923 /**
1924  * Validate a flow supported by the NIC.
1925  *
1926  * @see rte_flow_validate()
1927  * @see rte_flow_ops
1928  */
1929 int
1930 mlx5_flow_validate(struct rte_eth_dev *dev,
1931                    const struct rte_flow_attr *attr,
1932                    const struct rte_flow_item items[],
1933                    const struct rte_flow_action actions[],
1934                    struct rte_flow_error *error)
1935 {
1936         struct priv *priv = dev->data->dev_private;
1937         int ret;
1938         struct mlx5_flow_parse parser = { .create = 0, };
1939
1940         priv_lock(priv);
1941         ret = priv_flow_convert(priv, attr, items, actions, error, &parser);
1942         priv_unlock(priv);
1943         return ret;
1944 }
1945
1946 /**
1947  * Create a flow.
1948  *
1949  * @see rte_flow_create()
1950  * @see rte_flow_ops
1951  */
1952 struct rte_flow *
1953 mlx5_flow_create(struct rte_eth_dev *dev,
1954                  const struct rte_flow_attr *attr,
1955                  const struct rte_flow_item items[],
1956                  const struct rte_flow_action actions[],
1957                  struct rte_flow_error *error)
1958 {
1959         struct priv *priv = dev->data->dev_private;
1960         struct rte_flow *flow;
1961
1962         priv_lock(priv);
1963         flow = priv_flow_create(priv, &priv->flows, attr, items, actions,
1964                                 error);
1965         priv_unlock(priv);
1966         return flow;
1967 }
1968
1969 /**
1970  * Destroy a flow.
1971  *
1972  * @param priv
1973  *   Pointer to private structure.
1974  * @param list
1975  *   Pointer to a TAILQ flow list.
1976  * @param[in] flow
1977  *   Flow to destroy.
1978  */
1979 static void
1980 priv_flow_destroy(struct priv *priv,
1981                   struct mlx5_flows *list,
1982                   struct rte_flow *flow)
1983 {
1984         unsigned int i;
1985
1986         if (flow->drop || !flow->mark)
1987                 goto free;
1988         for (i = 0; i != flow->queues_n; ++i) {
1989                 struct rte_flow *tmp;
1990                 int mark = 0;
1991
1992                 /*
1993                  * To remove the mark from the queue, the queue must not be
1994                  * present in any other marked flow (RSS or not).
1995                  */
1996                 TAILQ_FOREACH(tmp, list, next) {
1997                         unsigned int j;
1998                         uint16_t *tqs = NULL;
1999                         uint16_t tq_n = 0;
2000
2001                         if (!tmp->mark)
2002                                 continue;
2003                         for (j = 0; j != hash_rxq_init_n; ++j) {
2004                                 if (!tmp->frxq[j].hrxq)
2005                                         continue;
2006                                 tqs = tmp->frxq[j].hrxq->ind_table->queues;
2007                                 tq_n = tmp->frxq[j].hrxq->ind_table->queues_n;
2008                         }
2009                         if (!tq_n)
2010                                 continue;
2011                         for (j = 0; (j != tq_n) && !mark; j++)
2012                                 if (tqs[j] == (*flow->queues)[i])
2013                                         mark = 1;
2014                 }
2015                 (*priv->rxqs)[(*flow->queues)[i]]->mark = mark;
2016         }
2017 free:
2018         if (flow->drop) {
2019                 if (flow->frxq[HASH_RXQ_ETH].ibv_flow)
2020                         claim_zero(ibv_destroy_flow
2021                                    (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2022                 rte_free(flow->frxq[HASH_RXQ_ETH].ibv_attr);
2023         } else {
2024                 for (i = 0; i != hash_rxq_init_n; ++i) {
2025                         struct mlx5_flow *frxq = &flow->frxq[i];
2026
2027                         if (frxq->ibv_flow)
2028                                 claim_zero(ibv_destroy_flow(frxq->ibv_flow));
2029                         if (frxq->hrxq)
2030                                 mlx5_priv_hrxq_release(priv, frxq->hrxq);
2031                         if (frxq->ibv_attr)
2032                                 rte_free(frxq->ibv_attr);
2033                 }
2034         }
2035         if (flow->cs) {
2036                 claim_zero(ibv_destroy_counter_set(flow->cs));
2037                 flow->cs = NULL;
2038         }
2039         TAILQ_REMOVE(list, flow, next);
2040         DEBUG("Flow destroyed %p", (void *)flow);
2041         rte_free(flow);
2042 }
2043
2044 /**
2045  * Destroy all flows.
2046  *
2047  * @param priv
2048  *   Pointer to private structure.
2049  * @param list
2050  *   Pointer to a TAILQ flow list.
2051  */
2052 void
2053 priv_flow_flush(struct priv *priv, struct mlx5_flows *list)
2054 {
2055         while (!TAILQ_EMPTY(list)) {
2056                 struct rte_flow *flow;
2057
2058                 flow = TAILQ_FIRST(list);
2059                 priv_flow_destroy(priv, list, flow);
2060         }
2061 }
2062
2063 /**
2064  * Create drop queue.
2065  *
2066  * @param priv
2067  *   Pointer to private structure.
2068  *
2069  * @return
2070  *   0 on success.
2071  */
2072 int
2073 priv_flow_create_drop_queue(struct priv *priv)
2074 {
2075         struct mlx5_hrxq_drop *fdq = NULL;
2076
2077         assert(priv->pd);
2078         assert(priv->ctx);
2079         fdq = rte_calloc(__func__, 1, sizeof(*fdq), 0);
2080         if (!fdq) {
2081                 WARN("cannot allocate memory for drop queue");
2082                 goto error;
2083         }
2084         fdq->cq = ibv_create_cq(priv->ctx, 1, NULL, NULL, 0);
2085         if (!fdq->cq) {
2086                 WARN("cannot allocate CQ for drop queue");
2087                 goto error;
2088         }
2089         fdq->wq = ibv_create_wq(priv->ctx,
2090                         &(struct ibv_wq_init_attr){
2091                         .wq_type = IBV_WQT_RQ,
2092                         .max_wr = 1,
2093                         .max_sge = 1,
2094                         .pd = priv->pd,
2095                         .cq = fdq->cq,
2096                         });
2097         if (!fdq->wq) {
2098                 WARN("cannot allocate WQ for drop queue");
2099                 goto error;
2100         }
2101         fdq->ind_table = ibv_create_rwq_ind_table(priv->ctx,
2102                         &(struct ibv_rwq_ind_table_init_attr){
2103                         .log_ind_tbl_size = 0,
2104                         .ind_tbl = &fdq->wq,
2105                         .comp_mask = 0,
2106                         });
2107         if (!fdq->ind_table) {
2108                 WARN("cannot allocate indirection table for drop queue");
2109                 goto error;
2110         }
2111         fdq->qp = ibv_create_qp_ex(priv->ctx,
2112                 &(struct ibv_qp_init_attr_ex){
2113                         .qp_type = IBV_QPT_RAW_PACKET,
2114                         .comp_mask =
2115                                 IBV_QP_INIT_ATTR_PD |
2116                                 IBV_QP_INIT_ATTR_IND_TABLE |
2117                                 IBV_QP_INIT_ATTR_RX_HASH,
2118                         .rx_hash_conf = (struct ibv_rx_hash_conf){
2119                                 .rx_hash_function =
2120                                         IBV_RX_HASH_FUNC_TOEPLITZ,
2121                                 .rx_hash_key_len = rss_hash_default_key_len,
2122                                 .rx_hash_key = rss_hash_default_key,
2123                                 .rx_hash_fields_mask = 0,
2124                                 },
2125                         .rwq_ind_tbl = fdq->ind_table,
2126                         .pd = priv->pd
2127                 });
2128         if (!fdq->qp) {
2129                 WARN("cannot allocate QP for drop queue");
2130                 goto error;
2131         }
2132         priv->flow_drop_queue = fdq;
2133         return 0;
2134 error:
2135         if (fdq->qp)
2136                 claim_zero(ibv_destroy_qp(fdq->qp));
2137         if (fdq->ind_table)
2138                 claim_zero(ibv_destroy_rwq_ind_table(fdq->ind_table));
2139         if (fdq->wq)
2140                 claim_zero(ibv_destroy_wq(fdq->wq));
2141         if (fdq->cq)
2142                 claim_zero(ibv_destroy_cq(fdq->cq));
2143         if (fdq)
2144                 rte_free(fdq);
2145         priv->flow_drop_queue = NULL;
2146         return -1;
2147 }
2148
2149 /**
2150  * Delete drop queue.
2151  *
2152  * @param priv
2153  *   Pointer to private structure.
2154  */
2155 void
2156 priv_flow_delete_drop_queue(struct priv *priv)
2157 {
2158         struct mlx5_hrxq_drop *fdq = priv->flow_drop_queue;
2159
2160         if (!fdq)
2161                 return;
2162         if (fdq->qp)
2163                 claim_zero(ibv_destroy_qp(fdq->qp));
2164         if (fdq->ind_table)
2165                 claim_zero(ibv_destroy_rwq_ind_table(fdq->ind_table));
2166         if (fdq->wq)
2167                 claim_zero(ibv_destroy_wq(fdq->wq));
2168         if (fdq->cq)
2169                 claim_zero(ibv_destroy_cq(fdq->cq));
2170         rte_free(fdq);
2171         priv->flow_drop_queue = NULL;
2172 }
2173
2174 /**
2175  * Remove all flows.
2176  *
2177  * @param priv
2178  *   Pointer to private structure.
2179  * @param list
2180  *   Pointer to a TAILQ flow list.
2181  */
2182 void
2183 priv_flow_stop(struct priv *priv, struct mlx5_flows *list)
2184 {
2185         struct rte_flow *flow;
2186
2187         TAILQ_FOREACH_REVERSE(flow, list, mlx5_flows, next) {
2188                 unsigned int i;
2189
2190                 if (flow->drop) {
2191                         if (!flow->frxq[HASH_RXQ_ETH].ibv_flow)
2192                                 continue;
2193                         claim_zero(ibv_destroy_flow
2194                                    (flow->frxq[HASH_RXQ_ETH].ibv_flow));
2195                         flow->frxq[HASH_RXQ_ETH].ibv_flow = NULL;
2196                         /* Next flow. */
2197                         continue;
2198                 }
2199                 if (flow->mark) {
2200                         struct mlx5_ind_table_ibv *ind_tbl = NULL;
2201
2202                         for (i = 0; i != hash_rxq_init_n; ++i) {
2203                                 if (!flow->frxq[i].hrxq)
2204                                         continue;
2205                                 ind_tbl = flow->frxq[i].hrxq->ind_table;
2206                         }
2207                         assert(ind_tbl);
2208                         for (i = 0; i != ind_tbl->queues_n; ++i)
2209                                 (*priv->rxqs)[ind_tbl->queues[i]]->mark = 0;
2210                 }
2211                 for (i = 0; i != hash_rxq_init_n; ++i) {
2212                         if (!flow->frxq[i].ibv_flow)
2213                                 continue;
2214                         claim_zero(ibv_destroy_flow(flow->frxq[i].ibv_flow));
2215                         flow->frxq[i].ibv_flow = NULL;
2216                         mlx5_priv_hrxq_release(priv, flow->frxq[i].hrxq);
2217                         flow->frxq[i].hrxq = NULL;
2218                 }
2219                 DEBUG("Flow %p removed", (void *)flow);
2220         }
2221 }
2222
2223 /**
2224  * Add all flows.
2225  *
2226  * @param priv
2227  *   Pointer to private structure.
2228  * @param list
2229  *   Pointer to a TAILQ flow list.
2230  *
2231  * @return
2232  *   0 on success, a errno value otherwise and rte_errno is set.
2233  */
2234 int
2235 priv_flow_start(struct priv *priv, struct mlx5_flows *list)
2236 {
2237         struct rte_flow *flow;
2238
2239         TAILQ_FOREACH(flow, list, next) {
2240                 unsigned int i;
2241
2242                 if (flow->drop) {
2243                         flow->frxq[HASH_RXQ_ETH].ibv_flow =
2244                                 ibv_create_flow
2245                                 (priv->flow_drop_queue->qp,
2246                                  flow->frxq[HASH_RXQ_ETH].ibv_attr);
2247                         if (!flow->frxq[HASH_RXQ_ETH].ibv_flow) {
2248                                 DEBUG("Flow %p cannot be applied",
2249                                       (void *)flow);
2250                                 rte_errno = EINVAL;
2251                                 return rte_errno;
2252                         }
2253                         DEBUG("Flow %p applied", (void *)flow);
2254                         /* Next flow. */
2255                         continue;
2256                 }
2257                 for (i = 0; i != hash_rxq_init_n; ++i) {
2258                         if (!flow->frxq[i].ibv_attr)
2259                                 continue;
2260                         flow->frxq[i].hrxq =
2261                                 mlx5_priv_hrxq_get(priv, flow->rss_conf.rss_key,
2262                                                    flow->rss_conf.rss_key_len,
2263                                                    hash_rxq_init[i].hash_fields,
2264                                                    (*flow->queues),
2265                                                    flow->queues_n);
2266                         if (flow->frxq[i].hrxq)
2267                                 goto flow_create;
2268                         flow->frxq[i].hrxq =
2269                                 mlx5_priv_hrxq_new(priv, flow->rss_conf.rss_key,
2270                                                    flow->rss_conf.rss_key_len,
2271                                                    hash_rxq_init[i].hash_fields,
2272                                                    (*flow->queues),
2273                                                    flow->queues_n);
2274                         if (!flow->frxq[i].hrxq) {
2275                                 DEBUG("Flow %p cannot be applied",
2276                                       (void *)flow);
2277                                 rte_errno = EINVAL;
2278                                 return rte_errno;
2279                         }
2280 flow_create:
2281                         flow->frxq[i].ibv_flow =
2282                                 ibv_create_flow(flow->frxq[i].hrxq->qp,
2283                                                 flow->frxq[i].ibv_attr);
2284                         if (!flow->frxq[i].ibv_flow) {
2285                                 DEBUG("Flow %p cannot be applied",
2286                                       (void *)flow);
2287                                 rte_errno = EINVAL;
2288                                 return rte_errno;
2289                         }
2290                         DEBUG("Flow %p applied", (void *)flow);
2291                 }
2292                 if (!flow->mark)
2293                         continue;
2294                 for (i = 0; i != flow->queues_n; ++i)
2295                         (*priv->rxqs)[(*flow->queues)[i]]->mark = 1;
2296         }
2297         return 0;
2298 }
2299
2300 /**
2301  * Verify the flow list is empty
2302  *
2303  * @param priv
2304  *  Pointer to private structure.
2305  *
2306  * @return the number of flows not released.
2307  */
2308 int
2309 priv_flow_verify(struct priv *priv)
2310 {
2311         struct rte_flow *flow;
2312         int ret = 0;
2313
2314         TAILQ_FOREACH(flow, &priv->flows, next) {
2315                 DEBUG("%p: flow %p still referenced", (void *)priv,
2316                       (void *)flow);
2317                 ++ret;
2318         }
2319         return ret;
2320 }
2321
2322 /**
2323  * Enable a control flow configured from the control plane.
2324  *
2325  * @param dev
2326  *   Pointer to Ethernet device.
2327  * @param eth_spec
2328  *   An Ethernet flow spec to apply.
2329  * @param eth_mask
2330  *   An Ethernet flow mask to apply.
2331  * @param vlan_spec
2332  *   A VLAN flow spec to apply.
2333  * @param vlan_mask
2334  *   A VLAN flow mask to apply.
2335  *
2336  * @return
2337  *   0 on success.
2338  */
2339 int
2340 mlx5_ctrl_flow_vlan(struct rte_eth_dev *dev,
2341                     struct rte_flow_item_eth *eth_spec,
2342                     struct rte_flow_item_eth *eth_mask,
2343                     struct rte_flow_item_vlan *vlan_spec,
2344                     struct rte_flow_item_vlan *vlan_mask)
2345 {
2346         struct priv *priv = dev->data->dev_private;
2347         const struct rte_flow_attr attr = {
2348                 .ingress = 1,
2349                 .priority = MLX5_CTRL_FLOW_PRIORITY,
2350         };
2351         struct rte_flow_item items[] = {
2352                 {
2353                         .type = RTE_FLOW_ITEM_TYPE_ETH,
2354                         .spec = eth_spec,
2355                         .last = NULL,
2356                         .mask = eth_mask,
2357                 },
2358                 {
2359                         .type = (vlan_spec) ? RTE_FLOW_ITEM_TYPE_VLAN :
2360                                 RTE_FLOW_ITEM_TYPE_END,
2361                         .spec = vlan_spec,
2362                         .last = NULL,
2363                         .mask = vlan_mask,
2364                 },
2365                 {
2366                         .type = RTE_FLOW_ITEM_TYPE_END,
2367                 },
2368         };
2369         struct rte_flow_action actions[] = {
2370                 {
2371                         .type = RTE_FLOW_ACTION_TYPE_RSS,
2372                 },
2373                 {
2374                         .type = RTE_FLOW_ACTION_TYPE_END,
2375                 },
2376         };
2377         struct rte_flow *flow;
2378         struct rte_flow_error error;
2379         unsigned int i;
2380         union {
2381                 struct rte_flow_action_rss rss;
2382                 struct {
2383                         const struct rte_eth_rss_conf *rss_conf;
2384                         uint16_t num;
2385                         uint16_t queue[RTE_MAX_QUEUES_PER_PORT];
2386                 } local;
2387         } action_rss;
2388
2389         if (!priv->reta_idx_n)
2390                 return EINVAL;
2391         for (i = 0; i != priv->reta_idx_n; ++i)
2392                 action_rss.local.queue[i] = (*priv->reta_idx)[i];
2393         action_rss.local.rss_conf = &priv->rss_conf;
2394         action_rss.local.num = priv->reta_idx_n;
2395         actions[0].conf = (const void *)&action_rss.rss;
2396         flow = priv_flow_create(priv, &priv->ctrl_flows, &attr, items, actions,
2397                                 &error);
2398         if (!flow)
2399                 return rte_errno;
2400         return 0;
2401 }
2402
2403 /**
2404  * Enable a flow control configured from the control plane.
2405  *
2406  * @param dev
2407  *   Pointer to Ethernet device.
2408  * @param eth_spec
2409  *   An Ethernet flow spec to apply.
2410  * @param eth_mask
2411  *   An Ethernet flow mask to apply.
2412  *
2413  * @return
2414  *   0 on success.
2415  */
2416 int
2417 mlx5_ctrl_flow(struct rte_eth_dev *dev,
2418                struct rte_flow_item_eth *eth_spec,
2419                struct rte_flow_item_eth *eth_mask)
2420 {
2421         return mlx5_ctrl_flow_vlan(dev, eth_spec, eth_mask, NULL, NULL);
2422 }
2423
2424 /**
2425  * Destroy a flow.
2426  *
2427  * @see rte_flow_destroy()
2428  * @see rte_flow_ops
2429  */
2430 int
2431 mlx5_flow_destroy(struct rte_eth_dev *dev,
2432                   struct rte_flow *flow,
2433                   struct rte_flow_error *error)
2434 {
2435         struct priv *priv = dev->data->dev_private;
2436
2437         (void)error;
2438         priv_lock(priv);
2439         priv_flow_destroy(priv, &priv->flows, flow);
2440         priv_unlock(priv);
2441         return 0;
2442 }
2443
2444 /**
2445  * Destroy all flows.
2446  *
2447  * @see rte_flow_flush()
2448  * @see rte_flow_ops
2449  */
2450 int
2451 mlx5_flow_flush(struct rte_eth_dev *dev,
2452                 struct rte_flow_error *error)
2453 {
2454         struct priv *priv = dev->data->dev_private;
2455
2456         (void)error;
2457         priv_lock(priv);
2458         priv_flow_flush(priv, &priv->flows);
2459         priv_unlock(priv);
2460         return 0;
2461 }
2462
2463 #ifdef HAVE_IBV_DEVICE_COUNTERS_SET_SUPPORT
2464 /**
2465  * Query flow counter.
2466  *
2467  * @param cs
2468  *   the counter set.
2469  * @param counter_value
2470  *   returned data from the counter.
2471  *
2472  * @return
2473  *   0 on success, a errno value otherwise and rte_errno is set.
2474  */
2475 static int
2476 priv_flow_query_count(struct ibv_counter_set *cs,
2477                       struct mlx5_flow_counter_stats *counter_stats,
2478                       struct rte_flow_query_count *query_count,
2479                       struct rte_flow_error *error)
2480 {
2481         uint64_t counters[2];
2482         struct ibv_query_counter_set_attr query_cs_attr = {
2483                 .cs = cs,
2484                 .query_flags = IBV_COUNTER_SET_FORCE_UPDATE,
2485         };
2486         struct ibv_counter_set_data query_out = {
2487                 .out = counters,
2488                 .outlen = 2 * sizeof(uint64_t),
2489         };
2490         int res = ibv_query_counter_set(&query_cs_attr, &query_out);
2491
2492         if (res) {
2493                 rte_flow_error_set(error, -res,
2494                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2495                                    NULL,
2496                                    "cannot read counter");
2497                 return -res;
2498         }
2499         query_count->hits_set = 1;
2500         query_count->bytes_set = 1;
2501         query_count->hits = counters[0] - counter_stats->hits;
2502         query_count->bytes = counters[1] - counter_stats->bytes;
2503         if (query_count->reset) {
2504                 counter_stats->hits = counters[0];
2505                 counter_stats->bytes = counters[1];
2506         }
2507         return 0;
2508 }
2509
2510 /**
2511  * Query a flows.
2512  *
2513  * @see rte_flow_query()
2514  * @see rte_flow_ops
2515  */
2516 int
2517 mlx5_flow_query(struct rte_eth_dev *dev,
2518                 struct rte_flow *flow,
2519                 enum rte_flow_action_type action __rte_unused,
2520                 void *data,
2521                 struct rte_flow_error *error)
2522 {
2523         struct priv *priv = dev->data->dev_private;
2524         int res = EINVAL;
2525
2526         priv_lock(priv);
2527         if (flow->cs) {
2528                 res = priv_flow_query_count(flow->cs,
2529                                         &flow->counter_stats,
2530                                         (struct rte_flow_query_count *)data,
2531                                         error);
2532         } else {
2533                 rte_flow_error_set(error, res,
2534                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2535                                    NULL,
2536                                    "no counter found for flow");
2537         }
2538         priv_unlock(priv);
2539         return -res;
2540 }
2541 #endif
2542
2543 /**
2544  * Isolated mode.
2545  *
2546  * @see rte_flow_isolate()
2547  * @see rte_flow_ops
2548  */
2549 int
2550 mlx5_flow_isolate(struct rte_eth_dev *dev,
2551                   int enable,
2552                   struct rte_flow_error *error)
2553 {
2554         struct priv *priv = dev->data->dev_private;
2555
2556         priv_lock(priv);
2557         if (dev->data->dev_started) {
2558                 rte_flow_error_set(error, EBUSY,
2559                                    RTE_FLOW_ERROR_TYPE_UNSPECIFIED,
2560                                    NULL,
2561                                    "port must be stopped first");
2562                 priv_unlock(priv);
2563                 return -rte_errno;
2564         }
2565         priv->isolated = !!enable;
2566         if (enable)
2567                 priv->dev->dev_ops = &mlx5_dev_ops_isolate;
2568         else
2569                 priv->dev->dev_ops = &mlx5_dev_ops;
2570         priv_unlock(priv);
2571         return 0;
2572 }
2573
2574 /**
2575  * Convert a flow director filter to a generic flow.
2576  *
2577  * @param priv
2578  *   Private structure.
2579  * @param fdir_filter
2580  *   Flow director filter to add.
2581  * @param attributes
2582  *   Generic flow parameters structure.
2583  *
2584  * @return
2585  *  0 on success, errno value on error.
2586  */
2587 static int
2588 priv_fdir_filter_convert(struct priv *priv,
2589                          const struct rte_eth_fdir_filter *fdir_filter,
2590                          struct mlx5_fdir *attributes)
2591 {
2592         const struct rte_eth_fdir_input *input = &fdir_filter->input;
2593
2594         /* Validate queue number. */
2595         if (fdir_filter->action.rx_queue >= priv->rxqs_n) {
2596                 ERROR("invalid queue number %d", fdir_filter->action.rx_queue);
2597                 return EINVAL;
2598         }
2599         attributes->attr.ingress = 1;
2600         attributes->items[0] = (struct rte_flow_item) {
2601                 .type = RTE_FLOW_ITEM_TYPE_ETH,
2602                 .spec = &attributes->l2,
2603                 .mask = &attributes->l2_mask,
2604         };
2605         switch (fdir_filter->action.behavior) {
2606         case RTE_ETH_FDIR_ACCEPT:
2607                 attributes->actions[0] = (struct rte_flow_action){
2608                         .type = RTE_FLOW_ACTION_TYPE_QUEUE,
2609                         .conf = &attributes->queue,
2610                 };
2611                 break;
2612         case RTE_ETH_FDIR_REJECT:
2613                 attributes->actions[0] = (struct rte_flow_action){
2614                         .type = RTE_FLOW_ACTION_TYPE_DROP,
2615                 };
2616                 break;
2617         default:
2618                 ERROR("invalid behavior %d", fdir_filter->action.behavior);
2619                 return ENOTSUP;
2620         }
2621         attributes->queue.index = fdir_filter->action.rx_queue;
2622         switch (fdir_filter->input.flow_type) {
2623         case RTE_ETH_FLOW_NONFRAG_IPV4_UDP:
2624                 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2625                         .src_addr = input->flow.udp4_flow.ip.src_ip,
2626                         .dst_addr = input->flow.udp4_flow.ip.dst_ip,
2627                         .time_to_live = input->flow.udp4_flow.ip.ttl,
2628                         .type_of_service = input->flow.udp4_flow.ip.tos,
2629                         .next_proto_id = input->flow.udp4_flow.ip.proto,
2630                 };
2631                 attributes->l4.udp.hdr = (struct udp_hdr){
2632                         .src_port = input->flow.udp4_flow.src_port,
2633                         .dst_port = input->flow.udp4_flow.dst_port,
2634                 };
2635                 attributes->items[1] = (struct rte_flow_item){
2636                         .type = RTE_FLOW_ITEM_TYPE_IPV4,
2637                         .spec = &attributes->l3,
2638                 };
2639                 attributes->items[2] = (struct rte_flow_item){
2640                         .type = RTE_FLOW_ITEM_TYPE_UDP,
2641                         .spec = &attributes->l4,
2642                 };
2643                 break;
2644         case RTE_ETH_FLOW_NONFRAG_IPV4_TCP:
2645                 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2646                         .src_addr = input->flow.tcp4_flow.ip.src_ip,
2647                         .dst_addr = input->flow.tcp4_flow.ip.dst_ip,
2648                         .time_to_live = input->flow.tcp4_flow.ip.ttl,
2649                         .type_of_service = input->flow.tcp4_flow.ip.tos,
2650                         .next_proto_id = input->flow.tcp4_flow.ip.proto,
2651                 };
2652                 attributes->l4.tcp.hdr = (struct tcp_hdr){
2653                         .src_port = input->flow.tcp4_flow.src_port,
2654                         .dst_port = input->flow.tcp4_flow.dst_port,
2655                 };
2656                 attributes->items[1] = (struct rte_flow_item){
2657                         .type = RTE_FLOW_ITEM_TYPE_IPV4,
2658                         .spec = &attributes->l3,
2659                 };
2660                 attributes->items[2] = (struct rte_flow_item){
2661                         .type = RTE_FLOW_ITEM_TYPE_TCP,
2662                         .spec = &attributes->l4,
2663                 };
2664                 break;
2665         case RTE_ETH_FLOW_NONFRAG_IPV4_OTHER:
2666                 attributes->l3.ipv4.hdr = (struct ipv4_hdr){
2667                         .src_addr = input->flow.ip4_flow.src_ip,
2668                         .dst_addr = input->flow.ip4_flow.dst_ip,
2669                         .time_to_live = input->flow.ip4_flow.ttl,
2670                         .type_of_service = input->flow.ip4_flow.tos,
2671                         .next_proto_id = input->flow.ip4_flow.proto,
2672                 };
2673                 attributes->items[1] = (struct rte_flow_item){
2674                         .type = RTE_FLOW_ITEM_TYPE_IPV4,
2675                         .spec = &attributes->l3,
2676                 };
2677                 break;
2678         case RTE_ETH_FLOW_NONFRAG_IPV6_UDP:
2679                 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2680                         .hop_limits = input->flow.udp6_flow.ip.hop_limits,
2681                         .proto = input->flow.udp6_flow.ip.proto,
2682                 };
2683                 memcpy(attributes->l3.ipv6.hdr.src_addr,
2684                        input->flow.udp6_flow.ip.src_ip,
2685                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2686                 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2687                        input->flow.udp6_flow.ip.dst_ip,
2688                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2689                 attributes->l4.udp.hdr = (struct udp_hdr){
2690                         .src_port = input->flow.udp6_flow.src_port,
2691                         .dst_port = input->flow.udp6_flow.dst_port,
2692                 };
2693                 attributes->items[1] = (struct rte_flow_item){
2694                         .type = RTE_FLOW_ITEM_TYPE_IPV6,
2695                         .spec = &attributes->l3,
2696                 };
2697                 attributes->items[2] = (struct rte_flow_item){
2698                         .type = RTE_FLOW_ITEM_TYPE_UDP,
2699                         .spec = &attributes->l4,
2700                 };
2701                 break;
2702         case RTE_ETH_FLOW_NONFRAG_IPV6_TCP:
2703                 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2704                         .hop_limits = input->flow.tcp6_flow.ip.hop_limits,
2705                         .proto = input->flow.tcp6_flow.ip.proto,
2706                 };
2707                 memcpy(attributes->l3.ipv6.hdr.src_addr,
2708                        input->flow.tcp6_flow.ip.src_ip,
2709                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2710                 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2711                        input->flow.tcp6_flow.ip.dst_ip,
2712                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2713                 attributes->l4.tcp.hdr = (struct tcp_hdr){
2714                         .src_port = input->flow.tcp6_flow.src_port,
2715                         .dst_port = input->flow.tcp6_flow.dst_port,
2716                 };
2717                 attributes->items[1] = (struct rte_flow_item){
2718                         .type = RTE_FLOW_ITEM_TYPE_IPV6,
2719                         .spec = &attributes->l3,
2720                 };
2721                 attributes->items[2] = (struct rte_flow_item){
2722                         .type = RTE_FLOW_ITEM_TYPE_TCP,
2723                         .spec = &attributes->l4,
2724                 };
2725                 break;
2726         case RTE_ETH_FLOW_NONFRAG_IPV6_OTHER:
2727                 attributes->l3.ipv6.hdr = (struct ipv6_hdr){
2728                         .hop_limits = input->flow.ipv6_flow.hop_limits,
2729                         .proto = input->flow.ipv6_flow.proto,
2730                 };
2731                 memcpy(attributes->l3.ipv6.hdr.src_addr,
2732                        input->flow.ipv6_flow.src_ip,
2733                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2734                 memcpy(attributes->l3.ipv6.hdr.dst_addr,
2735                        input->flow.ipv6_flow.dst_ip,
2736                        RTE_DIM(attributes->l3.ipv6.hdr.src_addr));
2737                 attributes->items[1] = (struct rte_flow_item){
2738                         .type = RTE_FLOW_ITEM_TYPE_IPV6,
2739                         .spec = &attributes->l3,
2740                 };
2741                 break;
2742         default:
2743                 ERROR("invalid flow type%d",
2744                       fdir_filter->input.flow_type);
2745                 return ENOTSUP;
2746         }
2747         return 0;
2748 }
2749
2750 /**
2751  * Add new flow director filter and store it in list.
2752  *
2753  * @param priv
2754  *   Private structure.
2755  * @param fdir_filter
2756  *   Flow director filter to add.
2757  *
2758  * @return
2759  *   0 on success, errno value on failure.
2760  */
2761 static int
2762 priv_fdir_filter_add(struct priv *priv,
2763                      const struct rte_eth_fdir_filter *fdir_filter)
2764 {
2765         struct mlx5_fdir attributes = {
2766                 .attr.group = 0,
2767                 .l2_mask = {
2768                         .dst.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2769                         .src.addr_bytes = "\x00\x00\x00\x00\x00\x00",
2770                         .type = 0,
2771                 },
2772         };
2773         struct mlx5_flow_parse parser = {
2774                 .layer = HASH_RXQ_ETH,
2775         };
2776         struct rte_flow_error error;
2777         struct rte_flow *flow;
2778         int ret;
2779
2780         ret = priv_fdir_filter_convert(priv, fdir_filter, &attributes);
2781         if (ret)
2782                 return -ret;
2783         ret = priv_flow_convert(priv, &attributes.attr, attributes.items,
2784                                 attributes.actions, &error, &parser);
2785         if (ret)
2786                 return -ret;
2787         flow = priv_flow_create(priv,
2788                                 &priv->flows,
2789                                 &attributes.attr,
2790                                 attributes.items,
2791                                 attributes.actions,
2792                                 &error);
2793         if (flow) {
2794                 DEBUG("FDIR created %p", (void *)flow);
2795                 return 0;
2796         }
2797         return ENOTSUP;
2798 }
2799
2800 /**
2801  * Delete specific filter.
2802  *
2803  * @param priv
2804  *   Private structure.
2805  * @param fdir_filter
2806  *   Filter to be deleted.
2807  *
2808  * @return
2809  *   0 on success, errno value on failure.
2810  */
2811 static int
2812 priv_fdir_filter_delete(struct priv *priv,
2813                         const struct rte_eth_fdir_filter *fdir_filter)
2814 {
2815         struct mlx5_fdir attributes = {
2816                 .attr.group = 0,
2817         };
2818         struct mlx5_flow_parse parser = {
2819                 .create = 1,
2820                 .layer = HASH_RXQ_ETH,
2821         };
2822         struct rte_flow_error error;
2823         struct rte_flow *flow;
2824         unsigned int i;
2825         int ret;
2826
2827         ret = priv_fdir_filter_convert(priv, fdir_filter, &attributes);
2828         if (ret)
2829                 return -ret;
2830         ret = priv_flow_convert(priv, &attributes.attr, attributes.items,
2831                                 attributes.actions, &error, &parser);
2832         if (ret)
2833                 goto exit;
2834         /*
2835          * Special case for drop action which is only set in the
2836          * specifications when the flow is created.  In this situation the
2837          * drop specification is missing.
2838          */
2839         if (parser.drop) {
2840                 struct ibv_flow_spec_action_drop *drop;
2841
2842                 drop = (void *)((uintptr_t)parser.queue[HASH_RXQ_ETH].ibv_attr +
2843                                 parser.queue[HASH_RXQ_ETH].offset);
2844                 *drop = (struct ibv_flow_spec_action_drop){
2845                         .type = IBV_FLOW_SPEC_ACTION_DROP,
2846                         .size = sizeof(struct ibv_flow_spec_action_drop),
2847                 };
2848                 parser.queue[HASH_RXQ_ETH].ibv_attr->num_of_specs++;
2849         }
2850         TAILQ_FOREACH(flow, &priv->flows, next) {
2851                 struct ibv_flow_attr *attr;
2852                 struct ibv_spec_header *attr_h;
2853                 void *spec;
2854                 struct ibv_flow_attr *flow_attr;
2855                 struct ibv_spec_header *flow_h;
2856                 void *flow_spec;
2857                 unsigned int specs_n;
2858
2859                 attr = parser.queue[HASH_RXQ_ETH].ibv_attr;
2860                 flow_attr = flow->frxq[HASH_RXQ_ETH].ibv_attr;
2861                 /* Compare first the attributes. */
2862                 if (memcmp(attr, flow_attr, sizeof(struct ibv_flow_attr)))
2863                         continue;
2864                 if (attr->num_of_specs == 0)
2865                         continue;
2866                 spec = (void *)((uintptr_t)attr +
2867                                 sizeof(struct ibv_flow_attr));
2868                 flow_spec = (void *)((uintptr_t)flow_attr +
2869                                      sizeof(struct ibv_flow_attr));
2870                 specs_n = RTE_MIN(attr->num_of_specs, flow_attr->num_of_specs);
2871                 for (i = 0; i != specs_n; ++i) {
2872                         attr_h = spec;
2873                         flow_h = flow_spec;
2874                         if (memcmp(spec, flow_spec,
2875                                    RTE_MIN(attr_h->size, flow_h->size)))
2876                                 goto wrong_flow;
2877                         spec = (void *)((uintptr_t)spec + attr_h->size);
2878                         flow_spec = (void *)((uintptr_t)flow_spec +
2879                                              flow_h->size);
2880                 }
2881                 /* At this point, the flow match. */
2882                 break;
2883 wrong_flow:
2884                 /* The flow does not match. */
2885                 continue;
2886         }
2887         if (flow)
2888                 priv_flow_destroy(priv, &priv->flows, flow);
2889 exit:
2890         for (i = 0; i != hash_rxq_init_n; ++i) {
2891                 if (parser.queue[i].ibv_attr)
2892                         rte_free(parser.queue[i].ibv_attr);
2893         }
2894         return -ret;
2895 }
2896
2897 /**
2898  * Update queue for specific filter.
2899  *
2900  * @param priv
2901  *   Private structure.
2902  * @param fdir_filter
2903  *   Filter to be updated.
2904  *
2905  * @return
2906  *   0 on success, errno value on failure.
2907  */
2908 static int
2909 priv_fdir_filter_update(struct priv *priv,
2910                         const struct rte_eth_fdir_filter *fdir_filter)
2911 {
2912         int ret;
2913
2914         ret = priv_fdir_filter_delete(priv, fdir_filter);
2915         if (ret)
2916                 return ret;
2917         ret = priv_fdir_filter_add(priv, fdir_filter);
2918         return ret;
2919 }
2920
2921 /**
2922  * Flush all filters.
2923  *
2924  * @param priv
2925  *   Private structure.
2926  */
2927 static void
2928 priv_fdir_filter_flush(struct priv *priv)
2929 {
2930         priv_flow_flush(priv, &priv->flows);
2931 }
2932
2933 /**
2934  * Get flow director information.
2935  *
2936  * @param priv
2937  *   Private structure.
2938  * @param[out] fdir_info
2939  *   Resulting flow director information.
2940  */
2941 static void
2942 priv_fdir_info_get(struct priv *priv, struct rte_eth_fdir_info *fdir_info)
2943 {
2944         struct rte_eth_fdir_masks *mask =
2945                 &priv->dev->data->dev_conf.fdir_conf.mask;
2946
2947         fdir_info->mode = priv->dev->data->dev_conf.fdir_conf.mode;
2948         fdir_info->guarant_spc = 0;
2949         rte_memcpy(&fdir_info->mask, mask, sizeof(fdir_info->mask));
2950         fdir_info->max_flexpayload = 0;
2951         fdir_info->flow_types_mask[0] = 0;
2952         fdir_info->flex_payload_unit = 0;
2953         fdir_info->max_flex_payload_segment_num = 0;
2954         fdir_info->flex_payload_limit = 0;
2955         memset(&fdir_info->flex_conf, 0, sizeof(fdir_info->flex_conf));
2956 }
2957
2958 /**
2959  * Deal with flow director operations.
2960  *
2961  * @param priv
2962  *   Pointer to private structure.
2963  * @param filter_op
2964  *   Operation to perform.
2965  * @param arg
2966  *   Pointer to operation-specific structure.
2967  *
2968  * @return
2969  *   0 on success, errno value on failure.
2970  */
2971 static int
2972 priv_fdir_ctrl_func(struct priv *priv, enum rte_filter_op filter_op, void *arg)
2973 {
2974         enum rte_fdir_mode fdir_mode =
2975                 priv->dev->data->dev_conf.fdir_conf.mode;
2976         int ret = 0;
2977
2978         if (filter_op == RTE_ETH_FILTER_NOP)
2979                 return 0;
2980         if (fdir_mode != RTE_FDIR_MODE_PERFECT &&
2981             fdir_mode != RTE_FDIR_MODE_PERFECT_MAC_VLAN) {
2982                 ERROR("%p: flow director mode %d not supported",
2983                       (void *)priv, fdir_mode);
2984                 return EINVAL;
2985         }
2986         switch (filter_op) {
2987         case RTE_ETH_FILTER_ADD:
2988                 ret = priv_fdir_filter_add(priv, arg);
2989                 break;
2990         case RTE_ETH_FILTER_UPDATE:
2991                 ret = priv_fdir_filter_update(priv, arg);
2992                 break;
2993         case RTE_ETH_FILTER_DELETE:
2994                 ret = priv_fdir_filter_delete(priv, arg);
2995                 break;
2996         case RTE_ETH_FILTER_FLUSH:
2997                 priv_fdir_filter_flush(priv);
2998                 break;
2999         case RTE_ETH_FILTER_INFO:
3000                 priv_fdir_info_get(priv, arg);
3001                 break;
3002         default:
3003                 DEBUG("%p: unknown operation %u", (void *)priv,
3004                       filter_op);
3005                 ret = EINVAL;
3006                 break;
3007         }
3008         return ret;
3009 }
3010
3011 /**
3012  * Manage filter operations.
3013  *
3014  * @param dev
3015  *   Pointer to Ethernet device structure.
3016  * @param filter_type
3017  *   Filter type.
3018  * @param filter_op
3019  *   Operation to perform.
3020  * @param arg
3021  *   Pointer to operation-specific structure.
3022  *
3023  * @return
3024  *   0 on success, negative errno value on failure.
3025  */
3026 int
3027 mlx5_dev_filter_ctrl(struct rte_eth_dev *dev,
3028                      enum rte_filter_type filter_type,
3029                      enum rte_filter_op filter_op,
3030                      void *arg)
3031 {
3032         int ret = EINVAL;
3033         struct priv *priv = dev->data->dev_private;
3034
3035         switch (filter_type) {
3036         case RTE_ETH_FILTER_GENERIC:
3037                 if (filter_op != RTE_ETH_FILTER_GET)
3038                         return -EINVAL;
3039                 *(const void **)arg = &mlx5_flow_ops;
3040                 return 0;
3041         case RTE_ETH_FILTER_FDIR:
3042                 priv_lock(priv);
3043                 ret = priv_fdir_ctrl_func(priv, filter_op, arg);
3044                 priv_unlock(priv);
3045                 break;
3046         default:
3047                 ERROR("%p: filter type (%d) not supported",
3048                       (void *)dev, filter_type);
3049                 break;
3050         }
3051         return -ret;
3052 }